Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5563 → Rev 5564

/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/Makefile
0,0 → 1,12
ENVYAS ?= envyas
 
all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h
 
gf100.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@
gk104.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@
gk110.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mgk110 $< -o $@
gm107.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mgm107 $< -o $@
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gf100.asm
0,0 → 1,107
.section #gf100_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
gf100_div_u32:
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
ret
 
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gf100_div_s32:
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
cvt s32 $r0 abs s32 $r0
cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p3 cvt s32 $r0 neg s32 $r0
$p2 cvt s32 $r1 neg s32 $r1
ret
 
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rcp(x))
// CLOBBER: $r2 - $r7
// SIZE: 9 * 8 bytes
//
gf100_rcp_f64:
nop
ret
 
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rsqrt(x))
// CLOBBER: $r2 - $r7
// SIZE: 14 * 8 bytes
//
gf100_rsq_f64:
nop
ret
 
.section #gf100_builtin_offsets
.b64 #gf100_div_u32
.b64 #gf100_div_s32
.b64 #gf100_rcp_f64
.b64 #gf100_rsq_f64
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gf100.asm.h
0,0 → 1,63
uint64_t gf100_builtin_code[] = {
/* 0x0000: gf100_div_u32 */
0x7800000004009c03,
0x0010dd187c209cdd,
0x6000000008309c03,
0x0810dc2a05605c18,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x0010430d05609c18,
0x1b0e00000811dc03,
0x4800000008104103,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x90001dff040000ac,
/* 0x00b0: gf100_div_s32 */
0x188e0000fc05dc23,
0x18c40000fc17dc23,
0x07305e1803301e18,
0x7800000004009c03,
0x0010dd187c209cdd,
0x6000000008309c03,
0x0810dc2a05605c18,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x0010430d05609c18,
0x1b0e00000811dc03,
0x4800000008104103,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x01700e18040000ac,
0x90001dff05704a18,
/* 0x0180: gf100_rcp_f64 */
0x90001dff00001c08,
/* 0x0188: gf100_rsq_f64 */
0x90001dff00001c08,
};
 
uint64_t gf100_builtin_offsets[] = {
0x0000000000000000,
0x00000000000000b0,
0x0000000000000180,
0x0000000000000188,
};
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
0,0 → 1,711
.section #gk104_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
gk104_div_u32:
sched 0x28 0x4 0x28 0x4 0x28 0x28 0x28
bfind u32 $r2 $r1
long xor b32 $r2 $r2 0x1f
long mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
long cvt u32 $r1 neg u32 $r1
long mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x4 0x28 0x4 0x28 0x28 0x2c 0x4
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
long cvt u32 $r2 neg u32 $r1
long add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
sched 0x28 0x2c 0x4 0x20 0x2e 0x28 0x20
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
long ret
 
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gk104_div_s32:
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
sched 0x20 0x28 0x28 0x4 0x28 0x04 0x28
long cvt s32 $r0 abs s32 $r0
long cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
long xor b32 $r2 $r2 0x1f
long mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x28 0x28 0x4 0x28 0x04 0x28 0x28
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
long cvt u32 $r2 neg u32 $r1
long add $r1 (mul u32 $r1 u32 $r0) $r3
sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
long $p0 add b32 $r0 $r0 0x1
long $p3 cvt s32 $r0 neg s32 $r0
sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c
$p2 cvt s32 $r1 neg s32 $r1
long ret
 
// SULDP [for each format]
// $r4d: address
// $r2: surface info (format)
// $p0: access predicate
// $p1, $p2: caching predicate (00: cv, 01: ca, 10: cg)
//
// RGBA32
$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
long ret
// RGBA16_UNORM
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0
cvt rn f32 $r3 u16 1 $r1
cvt rn f32 $r2 u16 0 $r1
mul f32 $r3 $r3 0x37800074
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt rn f32 $r1 u16 1 $r0
mul f32 $r2 $r2 0x37800074
cvt rn f32 $r0 u16 0 $r0
mul f32 $r1 $r1 0x37800074
mul f32 $r0 $r0 0x37800074
long ret
// RGBA16_SNORM
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
cvt rn f32 $r3 s16 1 $r1
cvt rn f32 $r2 s16 0 $r1
mul f32 $r3 $r3 0x38000187
cvt rn f32 $r1 s16 1 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mul f32 $r2 $r2 0x38000187
cvt rn f32 $r0 s16 0 $r0
mul f32 $r1 $r1 0x38000187
mul f32 $r0 $r0 0x38000187
long ret
// RGBA16_SINT
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
cvt s32 $r3 s16 1 $r1
cvt s32 $r2 s16 0 $r1
cvt s32 $r1 s16 1 $r0
cvt s32 $r0 s16 0 $r0
long ret
// RGBA16_UINT
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
cvt u32 $r3 u16 1 $r1
cvt u32 $r2 u16 0 $r1
cvt u32 $r1 u16 1 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt u32 $r0 u16 0 $r0
long ret
// RGBA16_FLOAT
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
cvt f32 $r3 f16 $r1 1
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt f32 $r2 f16 $r1 0
cvt f32 $r1 f16 $r0 1
cvt f32 $r0 f16 $r0 0
long ret
// RG32_FLOAT
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r2 0x00000000
long mov b32 $r3 0x3f800000
long ret
// RG32_xINT
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r2 0x00000000
long mov b32 $r3 0x00000001
long ret
// RGB10A2_UNORM
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
ext u32 $r1 $r0 0x0a0a
long mov b32 $r3 0x3f800000
ext u32 $r2 $r0 0x0a14
long and b32 $r0 $r0 0x3ff
cvt rn f32 $r2 u16 0 $r2
cvt rn f32 $r1 u16 0 $r1
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mul f32 $r2 $r2 0x3a802007
cvt rn f32 $r0 u16 0 $r0
mul f32 $r1 $r1 0x3a802007
mul f32 $r0 $r0 0x3a802007
long ret
// RGB10A2_UINT
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
ext u32 $r1 $r0 0x0a0a
long mov b32 $r3 0x00000001
ext u32 $r2 $r0 0x0a14
long and b32 $r0 $r0 0x3ff
long ret
// RGBA8_UNORM
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
cvt rn f32 $r3 u8 3 $r0
cvt rn f32 $r2 u8 2 $r0
mul f32 $r3 $r3 0x3b808081
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt rn f32 $r1 u8 1 $r0
mul f32 $r2 $r2 0x3b808081
cvt rn f32 $r0 u8 0 $r0
mul f32 $r1 $r1 0x3b808081
mul f32 $r0 $r0 0x3b808081
long ret
// RGBA8_SNORM
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
cvt rn f32 $r3 s8 3 $r0
cvt rn f32 $r2 s8 2 $r0
mul f32 $r3 $r3 0x3c010204
cvt rn f32 $r1 s8 1 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mul f32 $r2 $r2 0x3c010204
cvt rn f32 $r0 s8 0 $r0
mul f32 $r1 $r1 0x3c010204
mul f32 $r0 $r0 0x3c010204
long ret
// RGBA8_SINT
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
cvt s32 $r3 s8 3 $r0
cvt s32 $r2 s8 2 $r0
cvt s32 $r1 s8 1 $r0
cvt s32 $r0 s8 0 $r0
long ret
// RGBA8_UINT
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
cvt u32 $r3 u8 3 $r0
cvt u32 $r2 u8 2 $r0
cvt u32 $r1 u8 1 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt u32 $r0 u8 0 $r0
long ret
// R5G6B5_UNORM
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
ext u32 $r1 $r0 0x0605
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
long mov b32 $r3 0x3f800000
ext u32 $r2 $r0 0x050b
long and b32 $r0 $r0 0x1f
cvt rn f32 $r2 u8 0 $r2
cvt rn f32 $r1 u8 0 $r1
mul f32 $r2 $r2 0x3d042108
cvt rn f32 $r0 u8 0 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mul f32 $r1 $r1 0x3c820821
mul f32 $r0 $r0 0x3d042108
long ret
// R5G5B5X1_UNORM
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
ext u32 $r1 $r0 0x0505
ext u32 $r2 $r0 0x050a
long and b32 $r0 $r0 0x1f
long mov b32 $r3 0x3f800000
cvt rn f32 $r2 u8 0 $r2
cvt rn f32 $r1 u8 0 $r1
cvt rn f32 $r0 u8 0 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mul f32 $r2 $r2 0x3d042108
mul f32 $r1 $r1 0x3d042108
mul f32 $r0 $r0 0x3d042108
long ret
// RG16_UNORM
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
cvt rn f32 $r1 u16 1 $r0
cvt rn f32 $r0 u16 0 $r0
mul f32 $r1 $r1 0x37800074
mul f32 $r0 $r0 0x37800074
long mov b32 $r2 0x00000000
long mov b32 $r3 0x3f800000
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
long ret
// RG16_SNORM
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
mov b32 $r3 0x3f800000
cvt rn f32 $r1 s16 1 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mov b32 $r2 0x00000000
cvt rn f32 $r0 s16 0 $r0
mul f32 $r1 $r1 0x38000187
mul f32 $r0 $r0 0x38000187
long ret
// RG16_SINT
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
mov b32 $r3 0x00000001
cvt s32 $r1 s16 1 $r0
mov b32 $r2 0x00000000
cvt s32 $r0 s16 0 $r0
long ret
// RG16_UINT
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
mov b32 $r3 0x00000001
cvt u32 $r1 u16 1 $r0
mov b32 $r2 0x00000000
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt u32 $r0 u16 0 $r0
long ret
// RG16_FLOAT
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
mov b32 $r3 0x3f800000
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt f32 $r1 f16 $r0 1
mov b32 $r2 0x00000000
cvt f32 $r0 f16 $r0 0
long ret
// R32_FLOAT
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x3f800000
long mov b32 $r2 0x00000000
long mov b32 $r1 0x00000000
long ret
// R32_xINT
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x00000001
long mov b32 $r2 0x00000000
long mov b32 $r1 0x00000000
long ret
// RG8_UNORM
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
mov b32 $r3 0x3f800000
cvt rn f32 $r1 u8 1 $r0
mov b32 $r2 0x00000000
cvt rn f32 $r0 u8 0 $r0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mul f32 $r1 $r1 0x3b808081
mul f32 $r0 $r0 0x3b808081
long ret
// RG8_SNORM
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
long mov b32 $r3 0x3f800000
cvt rn f32 $r1 s8 1 $r0
long mov b32 $r2 0x00000000
cvt rn f32 $r0 s8 0 $r0
mul f32 $r1 $r1 0x3c010204
mul f32 $r0 $r0 0x3c010204
long ret
// RG8_UINT
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x00000001
cvt u32 $r1 u8 1 $r0
long mov b32 $r2 0x00000000
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt u32 $r0 u8 0 $r0
long ret
// RG8_SINT
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x00000001
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
cvt s32 $r1 s8 1 $r0
long mov b32 $r2 0x00000000
cvt s32 $r0 s8 0 $r0
long ret
// R16_UNORM
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x3f800000
cvt rn f32 $r0 u16 0 $r0
long mov b32 $r2 0x00000000
long mov b32 $r1 0x00000000
mul f32 $r0 $r0 0x37800074
long ret
// R16_SNORM
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
mov b32 $r3 0x3f800000
cvt rn f32 $r0 s16 0 $r0
long mov b32 $r2 0x00000000
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
long mov b32 $r1 0x00000000
mul f32 $r0 $r0 0x38000187
long ret
// R16_SINT
$p1 suldgb s16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb s16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb s16 $r0 cv zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
long mov b32 $r3 0x00000001
long mov b32 $r2 0x00000000
long mov b32 $r1 0x00000000
long ret
// R16_UINT
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x00000001
long mov b32 $r2 0x00000000
long mov b32 $r1 0x00000000
long ret
// R16_FLOAT
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x3f800000
long mov b32 $r2 0x00000000
cvt f32 $r0 f16 $r0 0
mov b32 $r1 0x00000000
long ret
// R8_UNORM
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0
mov b32 $r3 0x3f800000
cvt rn f32 $r0 u8 0 $r0
mov b32 $r2 0x00000000
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mul f32 $r0 $r0 0x3b808081
mov b32 $r1 0x00000000
long ret
// R8_SNORM
$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
mov b32 $r3 0x3f800000
cvt rn f32 $r0 s8 0 $r0
mov b32 $r2 0x00000000
mul f32 $r0 $r0 0x3c010204
mov b32 $r1 0x00000000
long ret
// R8_SINT
$p1 suldgb s8 $r0 ca zero u8 g[$r4d] $r2 $p0
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
set $p1 0x1 $p1 xor not $p2
$p2 suldgb s8 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb s8 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x00000001
long mov b32 $r2 0x00000000
long mov b32 $r1 0x00000000
long ret
// R8_UINT
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x00000001
long mov b32 $r2 0x00000000
long mov b32 $r1 0x00000000
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
long ret
// R11G11B10_FLOAT TODO
$p1 suldgb b32 $r3 ca zero u8 g[$r4d] $r2 $p0
set $p1 0x1 $p1 xor not $p2
$p2 suldgb b32 $r3 cg zero u8 g[$r4d] $r2 $p0
$p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0
long mov b32 $r3 0x3f800000
long nop
long ret
 
 
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rcp(x))
// CLOBBER: $r2 - $r7
// SIZE: 9 * 8 bytes
//
gk104_rcp_f64:
long nop
long ret
 
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
//
// INPUT: $r0d (x)
// OUTPUT: $r0d (rsqrt(x))
// CLOBBER: $r2 - $r7
// SIZE: 14 * 8 bytes
//
gk104_rsq_f64:
long nop
long ret
 
//
// Trap handler.
// Requires at least 4 GPRs and 32 bytes of l[] memory to temporarily save GPRs.
// Low 32 bytes of l[] memory shouldn't be used if resumeability is required.
//
// Trap info:
// 0x000: mutex
// 0x004: PC
// 0x008: trapstat
// 0x00c: warperr
// 0x010: tidx
// 0x014: tidy
// 0x018: tidz
// 0x01c: ctaidx
// 0x020: ctaidy
// 0x024: ctaidz
// 0x030: $r0q
// 0x130: $flags
// 0x140: s[]
//
st b128 wb l[0x00] $r0q
// check state of the warp and continue if it didn't cause the trap
long mov b32 $r1 $trapstat
long mov b32 $r3 $warperr
mov $r2 $flags mask 0xffff
and b32 0 $c $r1 $r3
e $c bra #end_cont
// spill control flow stack to l[]
long mov b32 $r3 16
spill_cfstack:
preret #end_exit
sub b32 $r3 $c $r3 0x1
lg $c bra #spill_cfstack
// retrieve pointer to trap info
mov b32 $r0 c0[0x1900]
mov b32 $r1 c0[0x1904]
// we only let a single faulting thread store its state
mov b32 $r3 0x1
exch b32 $r3 g[$r0d] $r3
joinat #end_exit
set $p0 0x1 eq u32 $r3 0x1
join $p0 nop
// store $c and $p registers
st b32 wb g[$r0d+0x130] $r2
// store $trapstat and $warperr
long mov b32 $r2 $trapstat
long mov b32 $r3 $warperr
st b64 wb g[$r0d+0x8] $r2d
// store registers
st b128 wb g[$r0d+0x40] $r4q
st b128 wb g[$r0d+0x50] $r8q
st b128 wb g[$r0d+0x60] $r12q
st b128 wb g[$r0d+0x70] $r16q
st b128 wb g[$r0d+0x80] $r20q
st b128 wb g[$r0d+0x90] $r24q
st b128 wb g[$r0d+0xa0] $r28q
st b128 wb g[$r0d+0xb0] $r32q
st b128 wb g[$r0d+0xc0] $r36q
st b128 wb g[$r0d+0xd0] $r40q
st b128 wb g[$r0d+0xe0] $r44q
st b128 wb g[$r0d+0xf0] $r48q
st b128 wb g[$r0d+0x100] $r52q
st b128 wb g[$r0d+0x110] $r56q
st b128 wb g[$r0d+0x120] $r60q
ld b64 $r2d cs l[0x0]
st b64 wb g[$r0d+0x30] $r2d
ld b64 $r2d cs l[0x8]
st b64 wb g[$r0d+0x38] $r2d
// store thread id
long mov b32 $r2 $tidx
long mov b32 $r3 $tidy
st b64 wb g[$r0d+0x10] $r2d
long mov b32 $r2 $tidz
long mov b32 $r3 $ctaidx
st b64 wb g[$r0d+0x18] $r2d
long mov b32 $r2 $ctaidy
long mov b32 $r3 $ctaidz
st b64 wb g[$r0d+0x20] $r2d
// store shared memory (in reverse order so $r0d is base again at the end)
long mov b32 $r3 $smemsz
sub b32 $r3 $c $r3 0x4
s $c bra #shared_done
add b32 $r0 $c $r0 $r3
add b32 $r1 $r1 0x0 $c
shared_loop:
long ld b32 $r2 s[$r3]
long st b32 wb g[$r0d+0x140] $r2
sub b32 $r0 $c $r0 0x4
sub b32 $r1 $r1 0x0 $c
sub b32 $r3 $c $r3 0x4
lg $c bra #shared_loop
shared_done:
// search the stack for trap entry to retrieve PC
mov b32 $r0 c0[0x1908]
mov b32 $r1 c0[0x190c]
membar sys
// invalidate caches so we can read stack entries via g[]
cctl ivall 0 l[0]
cctl ivall 0 g[$r0d]
// get offsets
mov b32 $r2 $physid
ext u32 $r3 $r2 0x0814 // MP id
ext u32 $r2 $r2 0x0608 // warp id
mul $r2 u32 $r2 u32 c0[0x1914] // warp offset
mul $r3 u32 $r3 u32 c0[0x1910] // MP offset
add b32 $r2 $r2 $r3 // MP + warp offset
add b32 $r0 $c $r0 $r2
add b32 $r1 $r1 0x0 $c
search_cstack:
mov b32 $r3 c0[0x1918] // cstack size
ld u8 $r2 cv g[$r0d+0x8]
set $p0 0x1 eq u32 $r2 0xa
$p0 bra #entry_found
add b32 $r0 $c $r0 0x10
add b32 $r1 $r1 0x0 $c
sub b32 $r3 $c $r3 0x10
lg $c bra #search_cstack
bra #end_exit
entry_found:
// load PC (may be unaligned and spread out)
ld b32 $r2 cv g[$r0d]
mov b32 $r0 c0[0x1900]
mov b32 $r1 c0[0x1904]
st b32 wb g[$r0d+0x4] $r2
join nop
// invalidate caches and exit
end_exit:
cctl ivall 0 g[0]
bpt pause 0x0
rtt terminate
end_cont:
bpt pause 0x0
mov $flags $r2 mask 0xffff
ld b128 $r0q cs l[0x00]
rtt
 
.section #gk104_builtin_offsets
.b64 #gk104_div_u32
.b64 #gk104_div_s32
.b64 #gk104_rcp_f64
.b64 #gk104_rsq_f64
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h
0,0 → 1,598
uint64_t gk104_builtin_code[] = {
/* 0x0000: gk104_div_u32 */
0x2282828042804287,
0x7800000004009c03,
0x380000007c209c82,
0x180000000400dde2,
0x6000000008309c03,
0x1c00000005205d04,
0x500000000810dc03,
0x200400000c209c43,
0x2282828282828287,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x2042c28280428047,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x1c00000005209d04,
0x2006000000105c03,
0x1b0e00000811dc03,
0x4800000008104103,
0x220282e20042c287,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x0800000004000002,
0x9000000000001de7,
/* 0x00f0: gk104_div_s32 */
0x188e0000fc05dc23,
0x18c40000fc17dc23,
0x2280428042828207,
0x1c00000001201ec4,
0x1c00000005205ec4,
0x7800000004009c03,
0x380000007c209c82,
0x180000000400dde2,
0x6000000008309c03,
0x1c00000005205d04,
0x2282828282828287,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x500000000810dc03,
0x2282804280428287,
0x200400000c209c43,
0x500000000810dc03,
0x200400000c209c43,
0x280000000000dde4,
0x5000000008001c43,
0x1c00000005209d04,
0x2006000000105c03,
0x22028042c28042c7,
0x1b0e00000811dc03,
0x4800000008104103,
0x0800000004000002,
0x1b0e00000811c003,
0x4800000008104103,
0x0800000004000002,
0x1c00000001200f84,
0x22c200428042e047,
0x1c00000005204b84,
0x9000000000001de7,
0xd4004000084004c5,
0x0c5400000013dc04,
0xd4004000084009c5,
0xd4004000084007c5,
0x9000000000001de7,
0x2000000000000007,
0xd4004000084004c5,
0x0c5400000013dc04,
0xd4004000084009c5,
0xd4004000084007c5,
0x1900000004a0dc04,
0x1800000004a09c04,
0x30de0001d030dc02,
0x2000000000000007,
0x1900000000a05c04,
0x30de0001d0209c02,
0x1800000000a01c04,
0x30de0001d0105c02,
0x30de0001d0001c02,
0x9000000000001de7,
0xd4004000084004a5,
0x2000000000000007,
0x0c5400000013dc04,
0xd4004000084009a5,
0xd4004000084007a5,
0x1900000004a0de04,
0x1800000004a09e04,
0x30e000061c30dc02,
0x1900000000a05e04,
0x2000000000000007,
0x30e000061c209c02,
0x1800000000a01e04,
0x30e000061c105c02,
0x30e000061c001c02,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0x2000000000000007,
0xd4004000084009a5,
0xd4004000084007a5,
0x1d00000004a0de84,
0x1c00000004a09e84,
0x1d00000000a05e84,
0x1c00000000a01e84,
0x9000000000001de7,
0x2000000000000007,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0xd4004000084007a5,
0x1d00000004a0dc04,
0x1c00000004a09c04,
0x1d00000000a05c04,
0x2000000000000007,
0x1c00000000a01c04,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0xd4004000084007a5,
0x1100000004a0dc04,
0x2000000000000007,
0x1000000004a09c04,
0x1100000000a05c04,
0x1000000000a01c04,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0x2000000000000007,
0xd4004000084007a5,
0x1800000000009de2,
0x18fe00000000dde2,
0x9000000000001de7,
0xd4004000084004a5,
0x0c5400000013dc04,
0xd4004000084009a5,
0x2000000000000007,
0xd4004000084007a5,
0x1800000000009de2,
0x180000000400dde2,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0x2000000000000007,
0xd400400008400785,
0x7000c02828005c03,
0x18fe00000000dde2,
0x7000c02850009c03,
0x3800000ffc001c02,
0x1800000008a09c04,
0x1800000004a05c04,
0x2000000000000007,
0x30ea00801c209c02,
0x1800000000a01c04,
0x30ea00801c105c02,
0x30ea00801c001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x7000c02828005c03,
0x180000000400dde2,
0x7000c02850009c03,
0x3800000ffc001c02,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x198000000020dc04,
0x1900000000209c04,
0x30ee02020430dc02,
0x2000000000000007,
0x1880000000205c04,
0x30ee020204209c02,
0x1800000000201c04,
0x30ee020204105c02,
0x30ee020204001c02,
0x9000000000001de7,
0xd400400008400485,
0x2000000000000007,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x198000000020de04,
0x1900000000209e04,
0x30f004081030dc02,
0x1880000000205e04,
0x2000000000000007,
0x30f0040810209c02,
0x1800000000201e04,
0x30f0040810105c02,
0x30f0040810001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x1d8000000020de84,
0x1d00000000209e84,
0x1c80000000205e84,
0x1c00000000201e84,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x1d8000000020dc04,
0x1d00000000209c04,
0x1c80000000205c04,
0x2000000000000007,
0x1c00000000201c04,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x7000c01814005c03,
0x2000000000000007,
0x18fe00000000dde2,
0x7000c0142c009c03,
0x380000007c001c02,
0x1800000008209c04,
0x1800000004205c04,
0x30f4108420209c02,
0x1800000000201c04,
0x2000000000000007,
0x30f2082084105c02,
0x30f4108420001c02,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x2000000000000007,
0x7000c01414005c03,
0x7000c01428009c03,
0x380000007c001c02,
0x18fe00000000dde2,
0x1800000008209c04,
0x1800000004205c04,
0x1800000000201c04,
0x2000000000000007,
0x30f4108420209c02,
0x30f4108420105c02,
0x30f4108420001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0x2000000000000007,
0xd400400008400785,
0x1900000000a05c04,
0x1800000000a01c04,
0x30de0001d0105c02,
0x30de0001d0001c02,
0x1800000000009de2,
0x18fe00000000dde2,
0x2000000000000007,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x18fe00000000dde2,
0x1900000000a05e04,
0x2000000000000007,
0x1800000000009de2,
0x1800000000a01e04,
0x30e000061c105c02,
0x30e000061c001c02,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x180000000400dde2,
0x1d00000000a05e84,
0x1800000000009de2,
0x1c00000000a01e84,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x180000000400dde2,
0x1d00000000a05c04,
0x1800000000009de2,
0x2000000000000007,
0x1c00000000a01c04,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0xd400400008400785,
0x18fe00000000dde2,
0x2000000000000007,
0x1100000000a05c04,
0x1800000000009de2,
0x1000000000a01c04,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0xd400400008400985,
0x2000000000000007,
0xd400400008400785,
0x18fe00000000dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400485,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400985,
0xd400400008400785,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400445,
0x2000000000000007,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x18fe00000000dde2,
0x1880000000205c04,
0x1800000000009de2,
0x1800000000201c04,
0x2000000000000007,
0x30ee020204105c02,
0x30ee020204001c02,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x2000000000000007,
0x18fe00000000dde2,
0x1880000000205e04,
0x1800000000009de2,
0x1800000000201e04,
0x30f0040810105c02,
0x30f0040810001c02,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x180000000400dde2,
0x1c80000000205c04,
0x1800000000009de2,
0x2000000000000007,
0x1c00000000201c04,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x180000000400dde2,
0x2000000000000007,
0x1c80000000205e84,
0x1800000000009de2,
0x1c00000000201e84,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0x2000000000000007,
0xd400400008400745,
0x18fe00000000dde2,
0x1800000000a01c04,
0x1800000000009de2,
0x1800000000005de2,
0x30de0001d0001c02,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0xd400400008400745,
0x18fe00000000dde2,
0x1800000000a01e04,
0x1800000000009de2,
0x2000000000000007,
0x1800000000005de2,
0x30e000061c001c02,
0x9000000000001de7,
0xd400400008400465,
0x0c5400000013dc04,
0xd400400008400965,
0xd400400008400765,
0x2000000000000007,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0xd400400008400945,
0x2000000000000007,
0xd400400008400745,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400445,
0x0c5400000013dc04,
0x2000000000000007,
0xd400400008400945,
0xd400400008400745,
0x18fe00000000dde2,
0x1800000000009de2,
0x1000000000a01c04,
0x1800000000005de2,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400405,
0x0c5400000013dc04,
0xd400400008400905,
0xd400400008400705,
0x18fe00000000dde2,
0x1800000000201c04,
0x1800000000009de2,
0x2000000000000007,
0x30ee020204001c02,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400405,
0x0c5400000013dc04,
0xd400400008400905,
0xd400400008400705,
0x2000000000000007,
0x18fe00000000dde2,
0x1800000000201e04,
0x1800000000009de2,
0x30f0040810001c02,
0x1800000000005de2,
0x9000000000001de7,
0xd400400008400425,
0x2000000000000007,
0x0c5400000013dc04,
0xd400400008400925,
0xd400400008400725,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x9000000000001de7,
0x2000000000000007,
0xd400400008400405,
0x0c5400000013dc04,
0xd400400008400905,
0xd400400008400705,
0x180000000400dde2,
0x1800000000009de2,
0x1800000000005de2,
0x2000000000000007,
0x9000000000001de7,
0xd40040000840c485,
0x0c5400000013dc04,
0xd40040000840c985,
0xd40040000840c785,
0x18fe00000000dde2,
0x4000000000001de4,
0x9000000000001de7,
/* 0x0f08: gk104_rcp_f64 */
0x4000000000001de4,
0x9000000000001de7,
/* 0x0f18: gk104_rsq_f64 */
0x4000000000001de4,
0x9000000000001de7,
0xc800000003f01cc5,
0x2c00000100005c04,
0x2c0000010800dc04,
0x3000c3fffff09c04,
0x680100000c1fdc03,
0x4000000a60001c47,
0x180000004000dde2,
/* 0x0f60: spill_cfstack */
0x78000009c0000007,
0x0c0000000430dd02,
0x4003ffffa0001ca7,
0x2800406400001de4,
0x2800406410005de4,
0x180000000400dde2,
0x547e18000000dd05,
0x60000008e0000007,
0x190ec0000431dc03,
0x40000000000001f4,
0x94000004c0009c85,
0x2c00000100009c04,
0x2c0000010800dc04,
0x9400000020009ca5,
0x9400000100011cc5,
0x9400000140021cc5,
0x9400000180031cc5,
0x94000001c0041cc5,
0x9400000200051cc5,
0x9400000240061cc5,
0x9400000280071cc5,
0x94000002c0081cc5,
0x9400000300091cc5,
0x94000003400a1cc5,
0x94000003800b1cc5,
0x94000003c00c1cc5,
0x94000004000d1cc5,
0x94000004400e1cc5,
0x94000004800f1cc5,
0xc000000003f09ea5,
0x94000000c0009ca5,
0xc000000023f09ea5,
0x94000000e0009ca5,
0x2c00000084009c04,
0x2c0000008800dc04,
0x9400000040009ca5,
0x2c0000008c009c04,
0x2c0000009400dc04,
0x9400000060009ca5,
0x2c00000098009c04,
0x2c0000009c00dc04,
0x9400000080009ca5,
0x2c000000c800dc04,
0x0c0000001030dd02,
0x4000000100001ea7,
0x480100000c001c03,
0x0800000000105c42,
/* 0x10d8: shared_loop */
0xc100000000309c85,
0x9400000500009c85,
0x0c00000010001d02,
0x0800000000105d42,
0x0c0000001030dd02,
0x4003ffff40001ca7,
/* 0x1108: shared_done */
0x2800406420001de4,
0x2800406430005de4,
0xe000000000001c45,
0xd000000003ffdcc5,
0x9c000000000fdcc5,
0x2c0000000c009c04,
0x7000c0205020dc03,
0x7000c01820209c03,
0x5000406450209c03,
0x500040644030dc03,
0x480000000c209c03,
0x4801000008001c03,
0x0800000000105c42,
/* 0x1170: search_cstack */
0x280040646000dde4,
0x8400000020009f05,
0x190ec0002821dc03,
0x40000000800001e7,
0x0c00000040001c02,
0x0800000000105c42,
0x0c0000004030dd02,
0x00029dff0ffc5cbf,
/* 0x11b0: entry_found */
0x8400000000009f85,
0x2800406400001de4,
0x2800406410005de4,
0x9400000010009c85,
0x4000000000001df4,
/* 0x11d8: end_exit */
0x9800000003ffdcc5,
0xd000000000008007,
0xa000000000004007,
/* 0x11f0: end_cont */
0xd000000000008007,
0x3400c3fffc201c04,
0xc000000003f01ec5,
0xa000000000000007,
};
 
uint64_t gk104_builtin_offsets[] = {
0x0000000000000000,
0x00000000000000f0,
0x0000000000000f08,
0x0000000000000f18,
};
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
0,0 → 1,98
.section #gk110_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
gk110_div_u32:
sched 0x28 0x04 0x28 0x04 0x28 0x28 0x28
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x04 0x28 0x04 0x28 0x28 0x2c 0x04
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
sched 0x28 0x2c 0x04 0x20 0x2e 0x28 0x20
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
ret
 
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gk110_div_s32:
set $p2 0x1 lt s32 $r0 0x0
set $p3 0x1 lt s32 $r1 0x0 xor $p2
sched 0x20 0x28 0x28 0x04 0x28 0x04 0x28
cvt s32 $r0 abs s32 $r0
cvt s32 $r1 abs s32 $r1
bfind u32 $r2 $r1
xor b32 $r2 $r2 0x1f
mov b32 $r3 0x1
shl b32 $r2 $r3 clamp $r2
cvt u32 $r1 neg u32 $r1
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
sched 0x28 0x28 0x04 0x28 0x04 0x28 0x28
add $r2 (mul high u32 $r2 u32 $r3) $r2
mul $r3 u32 $r1 u32 $r2
add $r2 (mul high u32 $r2 u32 $r3) $r2
mov b32 $r3 $r0
mul high $r0 u32 $r0 u32 $r2
cvt u32 $r2 neg u32 $r1
add $r1 (mul u32 $r1 u32 $r0) $r3
sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20
set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p0 set $p0 0x1 ge u32 $r1 $r2
$p0 sub b32 $r1 $r1 $r2
$p0 add b32 $r0 $r0 0x1
$p3 cvt s32 $r0 neg s32 $r0
sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c
$p2 cvt s32 $r1 neg s32 $r1
ret
 
gk110_rcp_f64:
gk110_rsq_f64:
ret
 
.section #gk110_builtin_offsets
.b64 #gk110_div_u32
.b64 #gk110_div_s32
.b64 #gk110_rcp_f64
.b64 #gk110_rsq_f64
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h
0,0 → 1,81
uint64_t gk110_builtin_code[] = {
/* 0x0000: gk110_div_u32 */
0x08a0a0a010a010a0,
0xe1800000009c000a,
0x220000000f9c0808,
0x74000000009fc00e,
0xe2400000011c0c0a,
0xe6010000009c2806,
0xe1c00000011c040e,
0xd2000800019c080a,
0x08a0a0a0a0a0a0a0,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0x0810b0a0a010a010,
0xd2000800019c080a,
0xe4c03c00001c000e,
0xe1c00400011c0002,
0xe6010000009c280a,
0xd0000c00001c0406,
0xdb601c00011c041e,
0xe088000001000406,
0x0880a0b88010b0a0,
0x4000000000800001,
0xdb601c000100041e,
0xe088000001000406,
0x4000000000800001,
0x19000000001c003c,
/* 0x00f0: gk110_div_s32 */
0xdb181c007f9c005e,
0xdb1a08007f9c047e,
0x08a010a010a0a080,
0xe6100000001ce802,
0xe6100000009ce806,
0xe1800000009c000a,
0x220000000f9c0808,
0x74000000009fc00e,
0xe2400000011c0c0a,
0xe6010000009c2806,
0x08a0a0a0a0a0a0a0,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe1c00000011c040e,
0x08a0a010a010a0a0,
0xd2000800019c080a,
0xe1c00000011c040e,
0xd2000800019c080a,
0xe4c03c00001c000e,
0xe1c00400011c0002,
0xe6010000009c280a,
0xd0000c00001c0406,
0x0880a010b0a010b0,
0xdb601c00011c041e,
0xe088000001000406,
0x4000000000800001,
0xdb601c000100041e,
0xe088000001000406,
0x4000000000800001,
0xe6010000000ce802,
0x08b08010a010b810,
0xe60100000088e806,
0x19000000001c003c,
/* 0x0218: gk110_rcp_f64 */
/* 0x0218: gk110_rsq_f64 */
0x19000000001c003c,
};
 
uint64_t gk110_builtin_offsets[] = {
0x0000000000000000,
0x00000000000000f0,
0x0000000000000218,
0x0000000000000218,
};
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gm107.asm
0,0 → 1,115
.section #gm107_builtin_code
// DIV U32
//
// UNR recurrence (q = a / b):
// look for z such that 2^32 - b <= b * z < 2^32
// then q - 1 <= (a * z) / 2^32 <= q
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p1
// SIZE: 22 / 14 * 8 bytes
//
gm107_div_u32:
sched 0x7e0 0x7e0 0x7e0
flo u32 $r2 $r1
lop xor 1 $r2 $r2 0x1f
mov $r3 0x1 0xf
sched 0x7e0 0x7e0 0x7e0
shl $r2 $r3 $r2
i2i u32 u32 $r1 neg $r1
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched 0x7e0 0x7e0 0x7e0
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched 0x7e0 0x7e0 0x7e0
mov $r3 $r0 0xf
imul u32 u32 hi $r0 $r0 $r2
i2i u32 u32 $r2 neg $r1
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 $r1 $r1 $r0 $r3
isetp ge u32 and $p0 1 $r1 $r2 1
$p0 iadd $r1 $r1 neg $r2
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r0 $r0 0x1
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
$p0 iadd $r1 $r1 neg $r2
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r0 $r0 0x1
ret
nop 0
 
// DIV S32, like DIV U32 after taking ABS(inputs)
//
// INPUT: $r0: dividend, $r1: divisor
// OUTPUT: $r0: result, $r1: modulus
// CLOBBER: $r2 - $r3, $p0 - $p3
//
gm107_div_s32:
sched 0x7e0 0x7e0 0x7e0
isetp lt and $p2 0x1 $r0 0 1
isetp lt xor $p3 1 $r1 0 $p2
i2i s32 s32 $r0 abs $r0
sched 0x7e0 0x7e0 0x7e0
i2i s32 s32 $r1 abs $r1
flo u32 $r2 $r1
lop xor 1 $r2 $r2 0x1f
sched 0x7e0 0x7e0 0x7e0
mov $r3 0x1 0xf
shl $r2 $r3 $r2
i2i u32 u32 $r1 neg $r1
sched 0x7e0 0x7e0 0x7e0
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
sched 0x7e0 0x7e0 0x7e0
imul u32 u32 $r3 $r1 $r2
imad u32 u32 hi $r2 $r2 $r3 $r2
imul u32 u32 $r3 $r1 $r2
sched 0x7e0 0x7e0 0x7e0
imad u32 u32 hi $r2 $r2 $r3 $r2
mov $r3 $r0 0xf
imul u32 u32 hi $r0 $r0 $r2
sched 0x7e0 0x7e0 0x7e0
i2i u32 u32 $r2 neg $r1
imad u32 u32 $r1 $r1 $r0 $r3
isetp ge u32 and $p0 1 $r1 $r2 1
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r1 $r1 neg $r2
$p0 iadd $r0 $r0 0x1
$p0 isetp ge u32 and $p0 1 $r1 $r2 1
sched 0x7e0 0x7e0 0x7e0
$p0 iadd $r1 $r1 neg $r2
$p0 iadd $r0 $r0 0x1
$p3 i2i s32 s32 $r0 neg $r0
sched 0x7e0 0x7e0 0x7e0
$p2 i2i s32 s32 $r1 neg $r1
ret
nop 0
 
// STUB
gm107_rcp_f64:
gm107_rsq_f64:
sched 0x7e0 0x7e0 0x7e0
ret
nop 0
nop 0
 
.section #gm107_builtin_offsets
.b64 #gm107_div_u32
.b64 #gm107_div_s32
.b64 #gm107_rcp_f64
.b64 #gm107_rsq_f64
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h
0,0 → 1,97
uint64_t gm107_builtin_code[] = {
/* 0x0000: gm107_div_u32 */
0x001f8000fc0007e0,
0x5c30000000170002,
0x3847040001f70202,
0x3898078000170003,
0x001f8000fc0007e0,
0x5c48000000270302,
0x5ce0200000170a01,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x5c98078000070003,
0x5c38008000270000,
0x5ce0200000170a02,
0x001f8000fc0007e0,
0x5a00018000070101,
0x5b6c038000270107,
0x5c11000000200101,
0x001f8000fc0007e0,
0x3810000000100000,
0x5b6c038000200107,
0x5c11000000200101,
0x001f8000fc0007e0,
0x3810000000100000,
0xe32000000007000f,
0x50b0000000070f00,
/* 0x0120: gm107_div_s32 */
0x001f8000fc0007e0,
0x5b6303800ff70017,
0x5b6341000ff7011f,
0x5ce2000000073a00,
0x001f8000fc0007e0,
0x5ce2000000173a01,
0x5c30000000170002,
0x3847040001f70202,
0x001f8000fc0007e0,
0x3898078000170003,
0x5c48000000270302,
0x5ce0200000170a01,
0x001f8000fc0007e0,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c38000000270103,
0x5a40010000370202,
0x001f8000fc0007e0,
0x5c38000000270103,
0x5a40010000370202,
0x5c38000000270103,
0x001f8000fc0007e0,
0x5a40010000370202,
0x5c98078000070003,
0x5c38008000270000,
0x001f8000fc0007e0,
0x5ce0200000170a02,
0x5a00018000070101,
0x5b6c038000270107,
0x001f8000fc0007e0,
0x5c11000000200101,
0x3810000000100000,
0x5b6c038000200107,
0x001f8000fc0007e0,
0x5c11000000200101,
0x3810000000100000,
0x5ce0200000033a00,
0x001f8000fc0007e0,
0x5ce0200000123a01,
0xe32000000007000f,
0x50b0000000070f00,
/* 0x0280: gm107_rcp_f64 */
/* 0x0280: gm107_rsq_f64 */
0x001f8000fc0007e0,
0xe32000000007000f,
0x50b0000000070f00,
0x50b0000000070f00,
};
 
uint64_t gm107_builtin_offsets[] = {
0x0000000000000000,
0x0000000000000120,
0x0000000000000280,
0x0000000000000280,
};
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
0,0 → 1,1238
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target.h"
#include "codegen/nv50_ir_driver.h"
 
extern "C" {
#include "nouveau_debug.h"
#include "nv50/nv50_program.h"
}
 
namespace nv50_ir {
 
Modifier::Modifier(operation op)
{
switch (op) {
case OP_NEG: bits = NV50_IR_MOD_NEG; break;
case OP_ABS: bits = NV50_IR_MOD_ABS; break;
case OP_SAT: bits = NV50_IR_MOD_SAT; break;
case OP_NOT: bits = NV50_IR_MOD_NOT; break;
default:
bits = 0;
break;
}
}
 
Modifier Modifier::operator*(const Modifier m) const
{
unsigned int a, b, c;
 
b = m.bits;
if (this->bits & NV50_IR_MOD_ABS)
b &= ~NV50_IR_MOD_NEG;
 
a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
 
return Modifier(a | c);
}
 
ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
{
indirect[0] = -1;
indirect[1] = -1;
usedAsPtr = false;
set(v);
}
 
ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
{
set(ref);
usedAsPtr = ref.usedAsPtr;
}
 
ValueRef::~ValueRef()
{
this->set(NULL);
}
 
bool ValueRef::getImmediate(ImmediateValue &imm) const
{
const ValueRef *src = this;
Modifier m;
DataType type = src->insn->sType;
 
while (src) {
if (src->mod) {
if (src->insn->sType != type)
break;
m *= src->mod;
}
if (src->getFile() == FILE_IMMEDIATE) {
imm = *(src->value->asImm());
// The immediate's type isn't required to match its use, it's
// more of a hint; applying a modifier makes use of that hint.
imm.reg.type = type;
m.applyTo(imm);
return true;
}
 
Instruction *insn = src->value->getUniqueInsn();
 
if (insn && insn->op == OP_MOV) {
src = &insn->src(0);
if (src->mod)
WARN("OP_MOV with modifier encountered !\n");
} else {
src = NULL;
}
}
return false;
}
 
ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
{
set(v);
}
 
ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
{
set(def.get());
}
 
ValueDef::~ValueDef()
{
this->set(NULL);
}
 
void
ValueRef::set(const ValueRef &ref)
{
this->set(ref.get());
mod = ref.mod;
indirect[0] = ref.indirect[0];
indirect[1] = ref.indirect[1];
}
 
void
ValueRef::set(Value *refVal)
{
if (value == refVal)
return;
if (value)
value->uses.erase(this);
if (refVal)
refVal->uses.insert(this);
 
value = refVal;
}
 
void
ValueDef::set(Value *defVal)
{
if (value == defVal)
return;
if (value)
value->defs.remove(this);
if (defVal)
defVal->defs.push_back(this);
 
value = defVal;
}
 
// Check if we can replace this definition's value by the value in @rep,
// including the source modifiers, i.e. make sure that all uses support
// @rep.mod.
bool
ValueDef::mayReplace(const ValueRef &rep)
{
if (!rep.mod)
return true;
 
if (!insn || !insn->bb) // Unbound instruction ?
return false;
 
const Target *target = insn->bb->getProgram()->getTarget();
 
for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
++it) {
Instruction *insn = (*it)->getInsn();
int s = -1;
 
for (int i = 0; insn->srcExists(i); ++i) {
if (insn->src(i).get() == value) {
// If there are multiple references to us we'd have to check if the
// combination of mods is still supported, but just bail for now.
if (&insn->src(i) != (*it))
return false;
s = i;
}
}
assert(s >= 0); // integrity of uses list
 
if (!target->isModSupported(insn, s, rep.mod))
return false;
}
return true;
}
 
void
ValueDef::replace(const ValueRef &repVal, bool doSet)
{
assert(mayReplace(repVal));
 
if (value == repVal.get())
return;
 
while (!value->uses.empty()) {
ValueRef *ref = *value->uses.begin();
ref->set(repVal.get());
ref->mod *= repVal.mod;
}
 
if (doSet)
set(repVal.get());
}
 
Value::Value()
{
join = this;
memset(&reg, 0, sizeof(reg));
reg.size = 4;
}
 
LValue::LValue(Function *fn, DataFile file)
{
reg.file = file;
reg.size = (file != FILE_PREDICATE) ? 4 : 1;
reg.data.id = -1;
 
compMask = 0;
compound = 0;
ssa = 0;
fixedReg = 0;
noSpill = 0;
 
fn->add(this, this->id);
}
 
LValue::LValue(Function *fn, LValue *lval)
{
assert(lval);
 
reg.file = lval->reg.file;
reg.size = lval->reg.size;
reg.data.id = -1;
 
compMask = 0;
compound = 0;
ssa = 0;
fixedReg = 0;
noSpill = 0;
 
fn->add(this, this->id);
}
 
LValue *
LValue::clone(ClonePolicy<Function>& pol) const
{
LValue *that = new_LValue(pol.context(), reg.file);
 
pol.set<Value>(this, that);
 
that->reg.size = this->reg.size;
that->reg.type = this->reg.type;
that->reg.data = this->reg.data;
 
return that;
}
 
bool
LValue::isUniform() const
{
if (defs.size() > 1)
return false;
Instruction *insn = getInsn();
// let's not try too hard here for now ...
return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
}
 
Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
{
baseSym = NULL;
 
reg.file = f;
reg.fileIndex = fidx;
reg.data.offset = 0;
 
prog->add(this, this->id);
}
 
Symbol *
Symbol::clone(ClonePolicy<Function>& pol) const
{
Program *prog = pol.context()->getProgram();
 
Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
 
pol.set<Value>(this, that);
 
that->reg.size = this->reg.size;
that->reg.type = this->reg.type;
that->reg.data = this->reg.data;
 
that->baseSym = this->baseSym;
 
return that;
}
 
bool
Symbol::isUniform() const
{
return
reg.file != FILE_SYSTEM_VALUE &&
reg.file != FILE_MEMORY_LOCAL &&
reg.file != FILE_SHADER_INPUT;
}
 
ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
{
memset(&reg, 0, sizeof(reg));
 
reg.file = FILE_IMMEDIATE;
reg.size = 4;
reg.type = TYPE_U32;
 
reg.data.u32 = uval;
 
prog->add(this, this->id);
}
 
ImmediateValue::ImmediateValue(Program *prog, float fval)
{
memset(&reg, 0, sizeof(reg));
 
reg.file = FILE_IMMEDIATE;
reg.size = 4;
reg.type = TYPE_F32;
 
reg.data.f32 = fval;
 
prog->add(this, this->id);
}
 
ImmediateValue::ImmediateValue(Program *prog, double dval)
{
memset(&reg, 0, sizeof(reg));
 
reg.file = FILE_IMMEDIATE;
reg.size = 8;
reg.type = TYPE_F64;
 
reg.data.f64 = dval;
 
prog->add(this, this->id);
}
 
ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
{
reg = proto->reg;
 
reg.type = ty;
reg.size = typeSizeof(ty);
}
 
ImmediateValue *
ImmediateValue::clone(ClonePolicy<Function>& pol) const
{
Program *prog = pol.context()->getProgram();
ImmediateValue *that = new_ImmediateValue(prog, 0u);
 
pol.set<Value>(this, that);
 
that->reg.size = this->reg.size;
that->reg.type = this->reg.type;
that->reg.data = this->reg.data;
 
return that;
}
 
bool
ImmediateValue::isInteger(const int i) const
{
switch (reg.type) {
case TYPE_S8:
return reg.data.s8 == i;
case TYPE_U8:
return reg.data.u8 == i;
case TYPE_S16:
return reg.data.s16 == i;
case TYPE_U16:
return reg.data.u16 == i;
case TYPE_S32:
case TYPE_U32:
return reg.data.s32 == i; // as if ...
case TYPE_F32:
return reg.data.f32 == static_cast<float>(i);
case TYPE_F64:
return reg.data.f64 == static_cast<double>(i);
default:
return false;
}
}
 
bool
ImmediateValue::isNegative() const
{
switch (reg.type) {
case TYPE_S8: return reg.data.s8 < 0;
case TYPE_S16: return reg.data.s16 < 0;
case TYPE_S32:
case TYPE_U32: return reg.data.s32 < 0;
case TYPE_F32: return reg.data.u32 & (1 << 31);
case TYPE_F64: return reg.data.u64 & (1ULL << 63);
default:
return false;
}
}
 
bool
ImmediateValue::isPow2() const
{
switch (reg.type) {
case TYPE_U8:
case TYPE_U16:
case TYPE_U32: return util_is_power_of_two(reg.data.u32);
default:
return false;
}
}
 
void
ImmediateValue::applyLog2()
{
switch (reg.type) {
case TYPE_S8:
case TYPE_S16:
case TYPE_S32:
assert(!this->isNegative());
// fall through
case TYPE_U8:
case TYPE_U16:
case TYPE_U32:
reg.data.u32 = util_logbase2(reg.data.u32);
break;
case TYPE_F32:
reg.data.f32 = log2f(reg.data.f32);
break;
case TYPE_F64:
reg.data.f64 = log2(reg.data.f64);
break;
default:
assert(0);
break;
}
}
 
bool
ImmediateValue::compare(CondCode cc, float fval) const
{
if (reg.type != TYPE_F32)
ERROR("immediate value is not of type f32");
 
switch (static_cast<CondCode>(cc & 7)) {
case CC_TR: return true;
case CC_FL: return false;
case CC_LT: return reg.data.f32 < fval;
case CC_LE: return reg.data.f32 <= fval;
case CC_GT: return reg.data.f32 > fval;
case CC_GE: return reg.data.f32 >= fval;
case CC_EQ: return reg.data.f32 == fval;
case CC_NE: return reg.data.f32 != fval;
default:
assert(0);
return false;
}
}
 
ImmediateValue&
ImmediateValue::operator=(const ImmediateValue &that)
{
this->reg = that.reg;
return (*this);
}
 
bool
Value::interfers(const Value *that) const
{
uint32_t idA, idB;
 
if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
return false;
if (this->asImm())
return false;
 
if (this->asSym()) {
idA = this->join->reg.data.offset;
idB = that->join->reg.data.offset;
} else {
idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
}
 
if (idA < idB)
return (idA + this->reg.size > idB);
else
if (idA > idB)
return (idB + that->reg.size > idA);
else
return (idA == idB);
}
 
bool
Value::equals(const Value *that, bool strict) const
{
if (strict)
return this == that;
 
if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
return false;
if (that->reg.size != this->reg.size)
return false;
 
if (that->reg.data.id != this->reg.data.id)
return false;
 
return true;
}
 
bool
ImmediateValue::equals(const Value *that, bool strict) const
{
const ImmediateValue *imm = that->asImm();
if (!imm)
return false;
return reg.data.u64 == imm->reg.data.u64;
}
 
bool
Symbol::equals(const Value *that, bool strict) const
{
if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
return false;
assert(that->asSym());
 
if (this->baseSym != that->asSym()->baseSym)
return false;
 
if (reg.file == FILE_SYSTEM_VALUE)
return (this->reg.data.sv.sv == that->reg.data.sv.sv &&
this->reg.data.sv.index == that->reg.data.sv.index);
return this->reg.data.offset == that->reg.data.offset;
}
 
void Instruction::init()
{
next = prev = 0;
 
cc = CC_ALWAYS;
rnd = ROUND_N;
cache = CACHE_CA;
subOp = 0;
 
saturate = 0;
join = 0;
exit = 0;
terminator = 0;
ftz = 0;
dnz = 0;
perPatch = 0;
fixed = 0;
encSize = 0;
ipa = 0;
mask = 0;
 
lanes = 0xf;
 
postFactor = 0;
 
predSrc = -1;
flagsDef = -1;
flagsSrc = -1;
}
 
Instruction::Instruction()
{
init();
 
op = OP_NOP;
dType = sType = TYPE_F32;
 
id = -1;
bb = 0;
}
 
Instruction::Instruction(Function *fn, operation opr, DataType ty)
{
init();
 
op = opr;
dType = sType = ty;
 
fn->add(this, id);
}
 
Instruction::~Instruction()
{
if (bb) {
Function *fn = bb->getFunction();
bb->remove(this);
fn->allInsns.remove(id);
}
 
for (int s = 0; srcExists(s); ++s)
setSrc(s, NULL);
// must unlink defs too since the list pointers will get deallocated
for (int d = 0; defExists(d); ++d)
setDef(d, NULL);
}
 
void
Instruction::setDef(int i, Value *val)
{
int size = defs.size();
if (i >= size) {
defs.resize(i + 1);
while (size <= i)
defs[size++].setInsn(this);
}
defs[i].set(val);
}
 
void
Instruction::setSrc(int s, Value *val)
{
int size = srcs.size();
if (s >= size) {
srcs.resize(s + 1);
while (size <= s)
srcs[size++].setInsn(this);
}
srcs[s].set(val);
}
 
void
Instruction::setSrc(int s, const ValueRef& ref)
{
setSrc(s, ref.get());
srcs[s].mod = ref.mod;
}
 
void
Instruction::swapSources(int a, int b)
{
Value *value = srcs[a].get();
Modifier m = srcs[a].mod;
 
setSrc(a, srcs[b]);
 
srcs[b].set(value);
srcs[b].mod = m;
}
 
static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
{
if (index >= s)
index += delta;
else
if ((delta < 0) && (index >= (s + delta)))
index = -1;
}
 
// Moves sources [@s,last_source] by @delta.
// If @delta < 0, sources [@s - abs(@delta), @s) are erased.
void
Instruction::moveSources(const int s, const int delta)
{
if (delta == 0)
return;
assert(s + delta >= 0);
 
int k;
 
for (k = 0; srcExists(k); ++k) {
for (int i = 0; i < 2; ++i)
moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
}
moveSourcesAdjustIndex(predSrc, s, delta);
moveSourcesAdjustIndex(flagsSrc, s, delta);
if (asTex()) {
TexInstruction *tex = asTex();
moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
}
 
if (delta > 0) {
--k;
for (int p = k + delta; k >= s; --k, --p)
setSrc(p, src(k));
} else {
int p;
for (p = s; p < k; ++p)
setSrc(p + delta, src(p));
for (; (p + delta) < k; ++p)
setSrc(p + delta, NULL);
}
}
 
void
Instruction::takeExtraSources(int s, Value *values[3])
{
values[0] = getIndirect(s, 0);
if (values[0])
setIndirect(s, 0, NULL);
 
values[1] = getIndirect(s, 1);
if (values[1])
setIndirect(s, 1, NULL);
 
values[2] = getPredicate();
if (values[2])
setPredicate(cc, NULL);
}
 
void
Instruction::putExtraSources(int s, Value *values[3])
{
if (values[0])
setIndirect(s, 0, values[0]);
if (values[1])
setIndirect(s, 1, values[1]);
if (values[2])
setPredicate(cc, values[2]);
}
 
Instruction *
Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
{
if (!i)
i = new_Instruction(pol.context(), op, dType);
#ifndef NDEBUG // non-conformant assert, so this is required
assert(typeid(*i) == typeid(*this));
#endif
 
pol.set<Instruction>(this, i);
 
i->sType = sType;
 
i->rnd = rnd;
i->cache = cache;
i->subOp = subOp;
 
i->saturate = saturate;
i->join = join;
i->exit = exit;
i->mask = mask;
i->ftz = ftz;
i->dnz = dnz;
i->ipa = ipa;
i->lanes = lanes;
i->perPatch = perPatch;
 
i->postFactor = postFactor;
 
for (int d = 0; defExists(d); ++d)
i->setDef(d, pol.get(getDef(d)));
 
for (int s = 0; srcExists(s); ++s) {
i->setSrc(s, pol.get(getSrc(s)));
i->src(s).mod = src(s).mod;
}
 
i->cc = cc;
i->predSrc = predSrc;
i->flagsDef = flagsDef;
i->flagsSrc = flagsSrc;
 
return i;
}
 
unsigned int
Instruction::defCount(unsigned int mask, bool singleFile) const
{
unsigned int i, n;
 
if (singleFile) {
unsigned int d = ffs(mask);
if (!d)
return 0;
for (i = d--; defExists(i); ++i)
if (getDef(i)->reg.file != getDef(d)->reg.file)
mask &= ~(1 << i);
}
 
for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
n += mask & 1;
return n;
}
 
unsigned int
Instruction::srcCount(unsigned int mask, bool singleFile) const
{
unsigned int i, n;
 
if (singleFile) {
unsigned int s = ffs(mask);
if (!s)
return 0;
for (i = s--; srcExists(i); ++i)
if (getSrc(i)->reg.file != getSrc(s)->reg.file)
mask &= ~(1 << i);
}
 
for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
n += mask & 1;
return n;
}
 
bool
Instruction::setIndirect(int s, int dim, Value *value)
{
assert(this->srcExists(s));
 
int p = srcs[s].indirect[dim];
if (p < 0) {
if (!value)
return true;
p = srcs.size();
while (p > 0 && !srcExists(p - 1))
--p;
}
setSrc(p, value);
srcs[p].usedAsPtr = (value != 0);
srcs[s].indirect[dim] = value ? p : -1;
return true;
}
 
bool
Instruction::setPredicate(CondCode ccode, Value *value)
{
cc = ccode;
 
if (!value) {
if (predSrc >= 0) {
srcs[predSrc].set(NULL);
predSrc = -1;
}
return true;
}
 
if (predSrc < 0) {
predSrc = srcs.size();
while (predSrc > 0 && !srcExists(predSrc - 1))
--predSrc;
}
 
setSrc(predSrc, value);
return true;
}
 
bool
Instruction::writesPredicate() const
{
for (int d = 0; defExists(d); ++d)
if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
return true;
return false;
}
 
static bool
insnCheckCommutationDefSrc(const Instruction *a, const Instruction *b)
{
for (int d = 0; a->defExists(d); ++d)
for (int s = 0; b->srcExists(s); ++s)
if (a->getDef(d)->interfers(b->getSrc(s)))
return false;
return true;
}
 
static bool
insnCheckCommutationDefDef(const Instruction *a, const Instruction *b)
{
for (int d = 0; a->defExists(d); ++d)
for (int c = 0; b->defExists(c); ++c)
if (a->getDef(d)->interfers(b->getDef(c)))
return false;
return true;
}
 
bool
Instruction::isCommutationLegal(const Instruction *i) const
{
bool ret = insnCheckCommutationDefDef(this, i);
ret = ret && insnCheckCommutationDefSrc(this, i);
ret = ret && insnCheckCommutationDefSrc(i, this);
return ret;
}
 
TexInstruction::TexInstruction(Function *fn, operation op)
: Instruction(fn, op, TYPE_F32)
{
memset(&tex, 0, sizeof(tex));
 
tex.rIndirectSrc = -1;
tex.sIndirectSrc = -1;
}
 
TexInstruction::~TexInstruction()
{
for (int c = 0; c < 3; ++c) {
dPdx[c].set(NULL);
dPdy[c].set(NULL);
}
for (int n = 0; n < 4; ++n)
for (int c = 0; c < 3; ++c)
offset[n][c].set(NULL);
}
 
TexInstruction *
TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
{
TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
new_TexInstruction(pol.context(), op));
 
Instruction::clone(pol, tex);
 
tex->tex = this->tex;
 
if (op == OP_TXD) {
for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
tex->dPdx[c].set(dPdx[c]);
tex->dPdy[c].set(dPdy[c]);
}
}
 
for (int n = 0; n < tex->tex.useOffsets; ++n)
for (int c = 0; c < 3; ++c)
tex->offset[n][c].set(offset[n][c]);
 
return tex;
}
 
const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
{
{ "1D", 1, 1, false, false, false },
{ "2D", 2, 2, false, false, false },
{ "2D_MS", 2, 3, false, false, false },
{ "3D", 3, 3, false, false, false },
{ "CUBE", 2, 3, false, true, false },
{ "1D_SHADOW", 1, 1, false, false, true },
{ "2D_SHADOW", 2, 2, false, false, true },
{ "CUBE_SHADOW", 2, 3, false, true, true },
{ "1D_ARRAY", 1, 2, true, false, false },
{ "2D_ARRAY", 2, 3, true, false, false },
{ "2D_MS_ARRAY", 2, 4, true, false, false },
{ "CUBE_ARRAY", 2, 4, true, true, false },
{ "1D_ARRAY_SHADOW", 1, 2, true, false, true },
{ "2D_ARRAY_SHADOW", 2, 3, true, false, true },
{ "RECT", 2, 2, false, false, false },
{ "RECT_SHADOW", 2, 2, false, false, true },
{ "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
{ "BUFFER", 1, 1, false, false, false },
};
 
void
TexInstruction::setIndirectR(Value *v)
{
int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
if (p >= 0) {
tex.rIndirectSrc = p;
setSrc(p, v);
srcs[p].usedAsPtr = !!v;
}
}
 
void
TexInstruction::setIndirectS(Value *v)
{
int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
if (p >= 0) {
tex.sIndirectSrc = p;
setSrc(p, v);
srcs[p].usedAsPtr = !!v;
}
}
 
CmpInstruction::CmpInstruction(Function *fn, operation op)
: Instruction(fn, op, TYPE_F32)
{
setCond = CC_ALWAYS;
}
 
CmpInstruction *
CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
{
CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
new_CmpInstruction(pol.context(), op));
cmp->dType = dType;
Instruction::clone(pol, cmp);
cmp->setCond = setCond;
return cmp;
}
 
FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
: Instruction(fn, op, TYPE_NONE)
{
if (op == OP_CALL)
target.fn = reinterpret_cast<Function *>(targ);
else
target.bb = reinterpret_cast<BasicBlock *>(targ);
 
if (op == OP_BRA ||
op == OP_CONT || op == OP_BREAK ||
op == OP_RET || op == OP_EXIT)
terminator = 1;
else
if (op == OP_JOIN)
terminator = targ ? 1 : 0;
 
allWarp = absolute = limit = builtin = indirect = 0;
}
 
FlowInstruction *
FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
{
FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
new_FlowInstruction(pol.context(), op, NULL));
 
Instruction::clone(pol, flow);
flow->allWarp = allWarp;
flow->absolute = absolute;
flow->limit = limit;
flow->builtin = builtin;
 
if (builtin)
flow->target.builtin = target.builtin;
else
if (op == OP_CALL)
flow->target.fn = target.fn;
else
if (target.bb)
flow->target.bb = pol.get<BasicBlock>(target.bb);
 
return flow;
}
 
Program::Program(Type type, Target *arch)
: progType(type),
target(arch),
mem_Instruction(sizeof(Instruction), 6),
mem_CmpInstruction(sizeof(CmpInstruction), 4),
mem_TexInstruction(sizeof(TexInstruction), 4),
mem_FlowInstruction(sizeof(FlowInstruction), 4),
mem_LValue(sizeof(LValue), 8),
mem_Symbol(sizeof(Symbol), 7),
mem_ImmediateValue(sizeof(ImmediateValue), 7)
{
code = NULL;
binSize = 0;
 
maxGPR = -1;
 
main = new Function(this, "MAIN", ~0);
calls.insert(&main->call);
 
dbgFlags = 0;
optLevel = 0;
 
targetPriv = NULL;
}
 
Program::~Program()
{
for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
delete reinterpret_cast<Function *>(it.get());
 
for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
releaseValue(reinterpret_cast<Value *>(it.get()));
}
 
void Program::releaseInstruction(Instruction *insn)
{
// TODO: make this not suck so much
 
insn->~Instruction();
 
if (insn->asCmp())
mem_CmpInstruction.release(insn);
else
if (insn->asTex())
mem_TexInstruction.release(insn);
else
if (insn->asFlow())
mem_FlowInstruction.release(insn);
else
mem_Instruction.release(insn);
}
 
void Program::releaseValue(Value *value)
{
value->~Value();
 
if (value->asLValue())
mem_LValue.release(value);
else
if (value->asImm())
mem_ImmediateValue.release(value);
else
if (value->asSym())
mem_Symbol.release(value);
}
 
 
} // namespace nv50_ir
 
extern "C" {
 
static void
nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
{
#if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN)
if (info->type == PIPE_SHADER_HULL || info->type == PIPE_SHADER_DOMAIN) {
info->prop.tp.domain = PIPE_PRIM_MAX;
info->prop.tp.outputPrim = PIPE_PRIM_MAX;
}
#endif
if (info->type == PIPE_SHADER_GEOMETRY) {
info->prop.gp.instanceCount = 1;
info->prop.gp.maxVertices = 1;
}
info->io.clipDistance = 0xff;
info->io.pointSize = 0xff;
info->io.instanceId = 0xff;
info->io.vertexId = 0xff;
info->io.edgeFlagIn = 0xff;
info->io.edgeFlagOut = 0xff;
info->io.fragDepth = 0xff;
info->io.sampleMask = 0xff;
info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
}
 
int
nv50_ir_generate_code(struct nv50_ir_prog_info *info)
{
int ret = 0;
 
nv50_ir::Program::Type type;
 
nv50_ir_init_prog_info(info);
 
#define PROG_TYPE_CASE(a, b) \
case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
 
switch (info->type) {
PROG_TYPE_CASE(VERTEX, VERTEX);
// PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
// PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
PROG_TYPE_CASE(COMPUTE, COMPUTE);
default:
type = nv50_ir::Program::TYPE_COMPUTE;
break;
}
INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
 
nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
if (!targ)
return -1;
 
nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
if (!prog)
return -1;
prog->driver = info;
prog->dbgFlags = info->dbgFlags;
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
#if 0
case PIPE_IR_LLVM:
case PIPE_IR_GLSL:
return -1;
case PIPE_IR_SM4:
ret = prog->makeFromSM4(info) ? 0 : -2;
break;
case PIPE_IR_TGSI:
#endif
default:
ret = prog->makeFromTGSI(info) ? 0 : -2;
break;
}
if (ret < 0)
goto out;
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
prog->print();
 
targ->parseDriverInfo(info);
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
 
prog->convertToSSA();
 
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
prog->print();
 
prog->optimizeSSA(info->optLevel);
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
 
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
prog->print();
 
if (!prog->registerAllocation()) {
ret = -4;
goto out;
}
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
 
prog->optimizePostRA(info->optLevel);
 
if (!prog->emitBinary(info)) {
ret = -5;
goto out;
}
 
out:
INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
 
info->bin.maxGPR = prog->maxGPR;
info->bin.code = prog->code;
info->bin.codeSize = prog->binSize;
info->bin.tlsSpace = prog->tlsSize;
 
delete prog;
nv50_ir::Target::destroy(targ);
 
return ret;
}
 
} // extern "C"
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir.h
0,0 → 1,1227
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef __NV50_IR_H__
#define __NV50_IR_H__
 
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <deque>
#include <list>
#include <vector>
#include <tr1/unordered_set>
 
#include "codegen/nv50_ir_util.h"
#include "codegen/nv50_ir_graph.h"
 
#include "codegen/nv50_ir_driver.h"
 
namespace nv50_ir {
 
enum operation
{
OP_NOP = 0,
OP_PHI,
OP_UNION, // unify a new definition and several source values
OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)
OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value
OP_CONSTRAINT, // copy values into consecutive registers
OP_MOV, // simple copy, no modifiers allowed
OP_LOAD,
OP_STORE,
OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds
OP_SUB,
OP_MUL,
OP_DIV,
OP_MOD,
OP_MAD,
OP_FMA,
OP_SAD, // abs(src0 - src1) + src2
OP_ABS,
OP_NEG,
OP_NOT,
OP_AND,
OP_OR,
OP_XOR,
OP_SHL,
OP_SHR,
OP_MAX,
OP_MIN,
OP_SAT, // CLAMP(f32, 0.0, 1.0)
OP_CEIL,
OP_FLOOR,
OP_TRUNC,
OP_CVT,
OP_SET_AND, // dst = (src0 CMP src1) & src2
OP_SET_OR,
OP_SET_XOR,
OP_SET,
OP_SELP, // dst = src2 ? src0 : src1
OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1
OP_RCP,
OP_RSQ,
OP_LG2,
OP_SIN,
OP_COS,
OP_EX2,
OP_EXP, // exponential (base M_E)
OP_LOG, // natural logarithm
OP_PRESIN,
OP_PREEX2,
OP_SQRT,
OP_POW,
OP_BRA,
OP_CALL,
OP_RET,
OP_CONT,
OP_BREAK,
OP_PRERET,
OP_PRECONT,
OP_PREBREAK,
OP_BRKPT, // breakpoint (not related to loops)
OP_JOINAT, // push control flow convergence point
OP_JOIN, // converge
OP_DISCARD,
OP_EXIT,
OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
OP_EXPORT,
OP_LINTERP,
OP_PINTERP,
OP_EMIT, // emit vertex
OP_RESTART, // restart primitive
OP_TEX,
OP_TXB, // texture bias
OP_TXL, // texure lod
OP_TXF, // texel fetch
OP_TXQ, // texture size query
OP_TXD, // texture derivatives
OP_TXG, // texture gather
OP_TXLQ, // texture query lod
OP_TEXCSAA, // texture op for coverage sampling
OP_TEXPREP, // turn cube map array into 2d array coordinates
OP_SULDB, // surface load (raw)
OP_SULDP, // surface load (formatted)
OP_SUSTB, // surface store (raw)
OP_SUSTP, // surface store (formatted)
OP_SUREDB,
OP_SUREDP, // surface reduction (atomic op)
OP_SULEA, // surface load effective address
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,
OP_DFDY,
OP_RDSV, // read system value
OP_WRSV, // write system value
OP_PIXLD, // get info about raster object or surfaces
OP_QUADOP,
OP_QUADON,
OP_QUADPOP,
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
OP_BFIND, // find highest/lowest set bit
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
OP_VADD, // byte/word vector operations
OP_VAVG,
OP_VMIN,
OP_VMAX,
OP_VSAD,
OP_VSET,
OP_VSHR,
OP_VSHL,
OP_VSEL,
OP_CCTL, // cache control
OP_SHFL, // warp shuffle
OP_LAST
};
 
// various instruction-specific modifier definitions Instruction::subOp
// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs)
#define NV50_IR_SUBOP_MUL_HIGH 1
#define NV50_IR_SUBOP_EMIT_RESTART 1
#define NV50_IR_SUBOP_LDC_IL 1
#define NV50_IR_SUBOP_LDC_IS 2
#define NV50_IR_SUBOP_LDC_ISL 3
#define NV50_IR_SUBOP_SHIFT_WRAP 1
#define NV50_IR_SUBOP_EMU_PRERET 1
#define NV50_IR_SUBOP_TEXBAR(n) n
#define NV50_IR_SUBOP_MOV_FINAL 1
#define NV50_IR_SUBOP_EXTBF_REV 1
#define NV50_IR_SUBOP_BFIND_SAMT 1
#define NV50_IR_SUBOP_RCPRSQ_64H 1
#define NV50_IR_SUBOP_PERMT_F4E 1
#define NV50_IR_SUBOP_PERMT_B4E 2
#define NV50_IR_SUBOP_PERMT_RC8 3
#define NV50_IR_SUBOP_PERMT_ECL 4
#define NV50_IR_SUBOP_PERMT_ECR 5
#define NV50_IR_SUBOP_PERMT_RC16 6
#define NV50_IR_SUBOP_BAR_SYNC 0
#define NV50_IR_SUBOP_BAR_ARRIVE 1
#define NV50_IR_SUBOP_BAR_RED_AND 2
#define NV50_IR_SUBOP_BAR_RED_OR 3
#define NV50_IR_SUBOP_BAR_RED_POPC 4
#define NV50_IR_SUBOP_MEMBAR_L 1
#define NV50_IR_SUBOP_MEMBAR_S 2
#define NV50_IR_SUBOP_MEMBAR_M 3
#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)
#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)
#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)
#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)
#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)
#define NV50_IR_SUBOP_MEMBAR(d,s) \
(NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)
#define NV50_IR_SUBOP_ATOM_ADD 0
#define NV50_IR_SUBOP_ATOM_MIN 1
#define NV50_IR_SUBOP_ATOM_MAX 2
#define NV50_IR_SUBOP_ATOM_INC 3
#define NV50_IR_SUBOP_ATOM_DEC 4
#define NV50_IR_SUBOP_ATOM_AND 5
#define NV50_IR_SUBOP_ATOM_OR 6
#define NV50_IR_SUBOP_ATOM_XOR 7
#define NV50_IR_SUBOP_ATOM_CAS 8
#define NV50_IR_SUBOP_ATOM_EXCH 9
#define NV50_IR_SUBOP_CCTL_IV 5
#define NV50_IR_SUBOP_CCTL_IVALL 6
#define NV50_IR_SUBOP_SUST_IGN 0
#define NV50_IR_SUBOP_SUST_TRAP 1
#define NV50_IR_SUBOP_SUST_SDCL 3
#define NV50_IR_SUBOP_SULD_ZERO 0
#define NV50_IR_SUBOP_SULD_TRAP 1
#define NV50_IR_SUBOP_SULD_SDCL 3
#define NV50_IR_SUBOP_SUBFM_3D 1
#define NV50_IR_SUBOP_SUCLAMP_2D 0x10
#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_PIXLD_COUNT 0
#define NV50_IR_SUBOP_PIXLD_COVMASK 1
#define NV50_IR_SUBOP_PIXLD_COVERED 2
#define NV50_IR_SUBOP_PIXLD_OFFSET 3
#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4
#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5
#define NV50_IR_SUBOP_SHFL_IDX 0
#define NV50_IR_SUBOP_SHFL_UP 1
#define NV50_IR_SUBOP_SHFL_DOWN 2
#define NV50_IR_SUBOP_SHFL_BFLY 3
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.
// This will have to do for now.
// The bitfields are supposed to correspond to nve4 ISA.
#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
 
enum DataType
{
TYPE_NONE,
TYPE_U8,
TYPE_S8,
TYPE_U16,
TYPE_S16,
TYPE_U32,
TYPE_S32,
TYPE_U64, // 64 bit operations are only lowered after register allocation
TYPE_S64,
TYPE_F16,
TYPE_F32,
TYPE_F64,
TYPE_B96,
TYPE_B128
};
 
enum CondCode
{
CC_FL = 0,
CC_NEVER = CC_FL, // when used with FILE_FLAGS
CC_LT = 1,
CC_EQ = 2,
CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE
CC_LE = 3,
CC_GT = 4,
CC_NE = 5,
CC_P = CC_NE,
CC_GE = 6,
CC_TR = 7,
CC_ALWAYS = CC_TR,
CC_U = 8,
CC_LTU = 9,
CC_EQU = 10,
CC_LEU = 11,
CC_GTU = 12,
CC_NEU = 13,
CC_GEU = 14,
CC_NO = 0x10,
CC_NC = 0x11,
CC_NS = 0x12,
CC_NA = 0x13,
CC_A = 0x14,
CC_S = 0x15,
CC_C = 0x16,
CC_O = 0x17
};
 
enum RoundMode
{
ROUND_N, // nearest
ROUND_M, // towards -inf
ROUND_Z, // towards 0
ROUND_P, // towards +inf
ROUND_NI, // nearest integer
ROUND_MI, // to integer towards -inf
ROUND_ZI, // to integer towards 0
ROUND_PI, // to integer towards +inf
};
 
enum CacheMode
{
CACHE_CA, // cache at all levels
CACHE_WB = CACHE_CA, // cache write back
CACHE_CG, // cache at global level
CACHE_CS, // cache streaming
CACHE_CV, // cache as volatile
CACHE_WT = CACHE_CV // cache write-through
};
 
enum DataFile
{
FILE_NULL = 0,
FILE_GPR,
FILE_PREDICATE, // boolean predicate
FILE_FLAGS, // zero/sign/carry/overflow bits
FILE_ADDRESS,
LAST_REGISTER_FILE = FILE_ADDRESS,
FILE_IMMEDIATE,
FILE_MEMORY_CONST,
FILE_SHADER_INPUT,
FILE_SHADER_OUTPUT,
FILE_MEMORY_GLOBAL,
FILE_MEMORY_SHARED,
FILE_MEMORY_LOCAL,
FILE_SYSTEM_VALUE,
DATA_FILE_COUNT
};
 
enum TexTarget
{
TEX_TARGET_1D,
TEX_TARGET_2D,
TEX_TARGET_2D_MS,
TEX_TARGET_3D,
TEX_TARGET_CUBE,
TEX_TARGET_1D_SHADOW,
TEX_TARGET_2D_SHADOW,
TEX_TARGET_CUBE_SHADOW,
TEX_TARGET_1D_ARRAY,
TEX_TARGET_2D_ARRAY,
TEX_TARGET_2D_MS_ARRAY,
TEX_TARGET_CUBE_ARRAY,
TEX_TARGET_1D_ARRAY_SHADOW,
TEX_TARGET_2D_ARRAY_SHADOW,
TEX_TARGET_RECT,
TEX_TARGET_RECT_SHADOW,
TEX_TARGET_CUBE_ARRAY_SHADOW,
TEX_TARGET_BUFFER,
TEX_TARGET_COUNT
};
 
enum SVSemantic
{
SV_POSITION, // WPOS
SV_VERTEX_ID,
SV_INSTANCE_ID,
SV_INVOCATION_ID,
SV_PRIMITIVE_ID,
SV_VERTEX_COUNT, // gl_PatchVerticesIn
SV_LAYER,
SV_VIEWPORT_INDEX,
SV_YDIR,
SV_FACE,
SV_POINT_SIZE,
SV_POINT_COORD,
SV_CLIP_DISTANCE,
SV_SAMPLE_INDEX,
SV_SAMPLE_POS,
SV_SAMPLE_MASK,
SV_TESS_FACTOR,
SV_TESS_COORD,
SV_TID,
SV_CTAID,
SV_NTID,
SV_GRIDID,
SV_NCTAID,
SV_LANEID,
SV_PHYSID,
SV_NPHYSID,
SV_CLOCK,
SV_LBASE,
SV_SBASE,
SV_VERTEX_STRIDE,
SV_INVOCATION_INFO,
SV_UNDEFINED,
SV_LAST
};
 
class Program;
class Function;
class BasicBlock;
 
class Target;
 
class Instruction;
class CmpInstruction;
class TexInstruction;
class FlowInstruction;
 
class Value;
class LValue;
class Symbol;
class ImmediateValue;
 
struct Storage
{
DataFile file;
int8_t fileIndex; // signed, may be indirect for CONST[]
uint8_t size; // this should match the Instruction type's size
DataType type; // mainly for pretty printing
union {
uint64_t u64; // immediate values
uint32_t u32;
uint16_t u16;
uint8_t u8;
int64_t s64;
int32_t s32;
int16_t s16;
int8_t s8;
float f32;
double f64;
int32_t offset; // offset from 0 (base of address space)
int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)
struct {
SVSemantic sv;
int index;
} sv;
} data;
};
 
// precedence: NOT after SAT after NEG after ABS
#define NV50_IR_MOD_ABS (1 << 0)
#define NV50_IR_MOD_NEG (1 << 1)
#define NV50_IR_MOD_SAT (1 << 2)
#define NV50_IR_MOD_NOT (1 << 3)
#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)
 
#define NV50_IR_INTERP_MODE_MASK 0x3
#define NV50_IR_INTERP_LINEAR (0 << 0)
#define NV50_IR_INTERP_PERSPECTIVE (1 << 0)
#define NV50_IR_INTERP_FLAT (2 << 0)
#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ?
#define NV50_IR_INTERP_SAMPLE_MASK 0xc
#define NV50_IR_INTERP_DEFAULT (0 << 2)
#define NV50_IR_INTERP_CENTROID (1 << 2)
#define NV50_IR_INTERP_OFFSET (2 << 2)
#define NV50_IR_INTERP_SAMPLEID (3 << 2)
 
// do we really want this to be a class ?
class Modifier
{
public:
Modifier() : bits(0) { }
Modifier(unsigned int m) : bits(m) { }
Modifier(operation op);
 
// @return new Modifier applying a after b (asserts if unrepresentable)
Modifier operator*(const Modifier) const;
Modifier operator*=(const Modifier m) { *this = *this * m; return *this; }
Modifier operator==(const Modifier m) const { return m.bits == bits; }
Modifier operator!=(const Modifier m) const { return m.bits != bits; }
 
inline Modifier operator&(const Modifier m) const { return bits & m.bits; }
inline Modifier operator|(const Modifier m) const { return bits | m.bits; }
inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; }
 
operation getOp() const;
 
inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }
inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }
 
inline operator bool() const { return bits ? true : false; }
 
void applyTo(ImmediateValue &imm) const;
 
int print(char *buf, size_t size) const;
 
private:
uint8_t bits;
};
 
class ValueRef
{
public:
ValueRef(Value * = NULL);
ValueRef(const ValueRef&);
~ValueRef();
 
inline bool exists() const { return value != NULL; }
 
void set(Value *);
void set(const ValueRef&);
inline Value *get() const { return value; }
inline Value *rep() const;
 
inline Instruction *getInsn() const { return insn; }
inline void setInsn(Instruction *inst) { insn = inst; }
 
inline bool isIndirect(int dim) const { return indirect[dim] >= 0; }
inline const ValueRef *getIndirect(int dim) const;
 
inline DataFile getFile() const;
inline unsigned getSize() const;
 
// SSA: return eventual (traverse MOVs) literal value, if it exists
bool getImmediate(ImmediateValue&) const;
 
public:
Modifier mod;
int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i])
uint8_t swizzle;
 
bool usedAsPtr; // for printing
 
private:
Value *value;
Instruction *insn;
};
 
class ValueDef
{
public:
ValueDef(Value * = NULL);
ValueDef(const ValueDef&);
~ValueDef();
 
inline bool exists() const { return value != NULL; }
 
inline Value *get() const { return value; }
inline Value *rep() const;
void set(Value *);
bool mayReplace(const ValueRef &);
void replace(const ValueRef &, bool doSet); // replace all uses of the old value
 
inline Instruction *getInsn() const { return insn; }
inline void setInsn(Instruction *inst) { insn = inst; }
 
inline DataFile getFile() const;
inline unsigned getSize() const;
 
inline void setSSA(LValue *);
inline const LValue *preSSA() const;
 
private:
Value *value; // should make this LValue * ...
LValue *origin; // pre SSA value
Instruction *insn;
};
 
class Value
{
public:
Value();
virtual ~Value() { }
 
virtual Value *clone(ClonePolicy<Function>&) const = 0;
 
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;
 
virtual bool equals(const Value *, bool strict = false) const;
virtual bool interfers(const Value *) const;
virtual bool isUniform() const { return true; }
 
inline Value *rep() const { return join; }
 
inline Instruction *getUniqueInsn() const;
inline Instruction *getInsn() const; // use when uniqueness is certain
 
inline int refCount() { return uses.size(); }
 
inline LValue *asLValue();
inline Symbol *asSym();
inline ImmediateValue *asImm();
inline const Symbol *asSym() const;
inline const ImmediateValue *asImm() const;
 
inline bool inFile(DataFile f) { return reg.file == f; }
 
static inline Value *get(Iterator&);
 
std::tr1::unordered_set<ValueRef *> uses;
std::list<ValueDef *> defs;
typedef std::tr1::unordered_set<ValueRef *>::iterator UseIterator;
typedef std::tr1::unordered_set<ValueRef *>::const_iterator UseCIterator;
typedef std::list<ValueDef *>::iterator DefIterator;
typedef std::list<ValueDef *>::const_iterator DefCIterator;
 
int id;
Storage reg;
 
// TODO: these should be in LValue:
Interval livei;
Value *join;
};
 
class LValue : public Value
{
public:
LValue(Function *, DataFile file);
LValue(Function *, LValue *);
~LValue() { }
 
virtual bool isUniform() const;
 
virtual LValue *clone(ClonePolicy<Function>&) const;
 
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
 
public:
unsigned compMask : 8; // compound/component mask
unsigned compound : 1; // used by RA, value involved in split/merge
unsigned ssa : 1;
unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)
unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)
};
 
class Symbol : public Value
{
public:
Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);
~Symbol() { }
 
virtual Symbol *clone(ClonePolicy<Function>&) const;
 
virtual bool equals(const Value *that, bool strict) const;
 
virtual bool isUniform() const;
 
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
 
// print with indirect values
int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const;
 
inline void setFile(DataFile file, ubyte fileIndex = 0)
{
reg.file = file;
reg.fileIndex = fileIndex;
}
 
inline void setOffset(int32_t offset);
inline void setAddress(Symbol *base, int32_t offset);
inline void setSV(SVSemantic sv, uint32_t idx = 0);
 
inline const Symbol *getBase() const { return baseSym; }
 
private:
Symbol *baseSym; // array base for Symbols representing array elements
};
 
class ImmediateValue : public Value
{
public:
ImmediateValue() { }
ImmediateValue(Program *, uint32_t);
ImmediateValue(Program *, float);
ImmediateValue(Program *, double);
// NOTE: not added to program with
ImmediateValue(const ImmediateValue *, DataType ty);
~ImmediateValue() { };
 
virtual ImmediateValue *clone(ClonePolicy<Function>&) const;
 
virtual bool equals(const Value *that, bool strict) const;
 
// these only work if 'type' is valid (we mostly use untyped literals):
bool isInteger(const int ival) const; // ival is cast to this' type
bool isNegative() const;
bool isPow2() const;
 
void applyLog2();
 
// for constant folding:
ImmediateValue operator+(const ImmediateValue&) const;
ImmediateValue operator-(const ImmediateValue&) const;
ImmediateValue operator*(const ImmediateValue&) const;
ImmediateValue operator/(const ImmediateValue&) const;
 
ImmediateValue& operator=(const ImmediateValue&); // only sets value !
 
bool compare(CondCode cc, float fval) const;
 
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
};
 
class Instruction
{
public:
Instruction();
Instruction(Function *, operation, DataType);
virtual ~Instruction();
 
virtual Instruction *clone(ClonePolicy<Function>&,
Instruction * = NULL) const;
 
void setDef(int i, Value *);
void setSrc(int s, Value *);
void setSrc(int s, const ValueRef&);
void swapSources(int a, int b);
void moveSources(int s, int delta);
bool setIndirect(int s, int dim, Value *);
 
inline ValueRef& src(int s) { return srcs[s]; }
inline ValueDef& def(int s) { return defs[s]; }
inline const ValueRef& src(int s) const { return srcs[s]; }
inline const ValueDef& def(int s) const { return defs[s]; }
 
inline Value *getDef(int d) const { return defs[d].get(); }
inline Value *getSrc(int s) const { return srcs[s].get(); }
inline Value *getIndirect(int s, int dim) const;
 
inline bool defExists(unsigned d) const
{
return d < defs.size() && defs[d].exists();
}
inline bool srcExists(unsigned s) const
{
return s < srcs.size() && srcs[s].exists();
}
 
inline bool constrainedDefs() const;
 
bool setPredicate(CondCode ccode, Value *);
inline Value *getPredicate() const;
bool writesPredicate() const;
inline bool isPredicated() const { return predSrc >= 0; }
 
inline void setFlagsSrc(int s, Value *);
inline void setFlagsDef(int d, Value *);
inline bool usesFlags() const { return flagsSrc >= 0; }
 
unsigned int defCount() const { return defs.size(); };
unsigned int defCount(unsigned int mask, bool singleFile = false) const;
unsigned int srcCount() const { return srcs.size(); };
unsigned int srcCount(unsigned int mask, bool singleFile = false) const;
 
// save & remove / set indirect[0,1] and predicate source
void takeExtraSources(int s, Value *[3]);
void putExtraSources(int s, Value *[3]);
 
inline void setType(DataType type) { dType = sType = type; }
 
inline void setType(DataType dtype, DataType stype)
{
dType = dtype;
sType = stype;
}
 
inline bool isPseudo() const { return op < OP_MOV; }
bool isDead() const;
bool isNop() const;
bool isCommutationLegal(const Instruction *) const; // must be adjacent !
bool isActionEqual(const Instruction *) const;
bool isResultEqual(const Instruction *) const;
 
void print() const;
 
inline CmpInstruction *asCmp();
inline TexInstruction *asTex();
inline FlowInstruction *asFlow();
inline const TexInstruction *asTex() const;
inline const CmpInstruction *asCmp() const;
inline const FlowInstruction *asFlow() const;
 
public:
Instruction *next;
Instruction *prev;
int id;
int serial; // CFG order
 
operation op;
DataType dType; // destination or defining type
DataType sType; // source or secondary type
CondCode cc;
RoundMode rnd;
CacheMode cache;
 
uint16_t subOp; // quadop, 1 for mul-high, etc.
 
unsigned encSize : 4; // encoding size in bytes
unsigned saturate : 1; // to [0.0f, 1.0f]
unsigned join : 1; // converge control flow (use OP_JOIN until end)
unsigned fixed : 1; // prevent dead code elimination
unsigned terminator : 1; // end of basic block
unsigned ftz : 1; // flush denormal to zero
unsigned dnz : 1; // denormals, NaN are zero
unsigned ipa : 4; // interpolation mode
unsigned lanes : 4;
unsigned perPatch : 1;
unsigned exit : 1; // terminate program after insn
unsigned mask : 4; // for vector ops
 
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
 
int8_t predSrc;
int8_t flagsDef;
int8_t flagsSrc;
 
uint32_t sched; // scheduling data (NOTE: maybe move to separate storage)
 
BasicBlock *bb;
 
protected:
std::deque<ValueDef> defs; // no gaps !
std::deque<ValueRef> srcs; // no gaps !
 
// instruction specific methods:
// (don't want to subclass, would need more constructors and memory pools)
public:
inline void setInterpolate(unsigned int mode) { ipa = mode; }
 
unsigned int getInterpMode() const { return ipa & 0x3; }
unsigned int getSampleMode() const { return ipa & 0xc; }
 
private:
void init();
};
 
enum TexQuery
{
TXQ_DIMS,
TXQ_TYPE,
TXQ_SAMPLE_POSITION,
TXQ_FILTER,
TXQ_LOD,
TXQ_WRAP,
TXQ_BORDER_COLOUR
};
 
class TexInstruction : public Instruction
{
public:
class Target
{
public:
Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { }
 
const char *getName() const { return descTable[target].name; }
unsigned int getArgCount() const { return descTable[target].argc; }
unsigned int getDim() const { return descTable[target].dim; }
int isArray() const { return descTable[target].array ? 1 : 0; }
int isCube() const { return descTable[target].cube ? 1 : 0; }
int isShadow() const { return descTable[target].shadow ? 1 : 0; }
int isMS() const {
return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; }
void clearMS() {
if (isMS()) {
if (isArray())
target = TEX_TARGET_2D_ARRAY;
else
target = TEX_TARGET_2D;
}
}
 
Target& operator=(TexTarget targ)
{
assert(targ < TEX_TARGET_COUNT);
target = targ;
return *this;
}
 
inline bool operator==(TexTarget targ) const { return target == targ; }
inline bool operator!=(TexTarget targ) const { return target != targ; }
 
enum TexTarget getEnum() const { return target; }
 
private:
struct Desc
{
char name[19];
uint8_t dim;
uint8_t argc;
bool array;
bool cube;
bool shadow;
};
 
static const struct Desc descTable[TEX_TARGET_COUNT];
 
private:
enum TexTarget target;
};
 
public:
TexInstruction(Function *, operation);
virtual ~TexInstruction();
 
virtual TexInstruction *clone(ClonePolicy<Function>&,
Instruction * = NULL) const;
 
inline void setTexture(Target targ, uint8_t r, uint8_t s)
{
tex.r = r;
tex.s = s;
tex.target = targ;
}
 
void setIndirectR(Value *);
void setIndirectS(Value *);
inline Value *getIndirectR() const;
inline Value *getIndirectS() const;
 
public:
struct {
Target target;
 
uint16_t r;
uint16_t s;
int8_t rIndirectSrc;
int8_t sIndirectSrc;
 
uint8_t mask;
uint8_t gatherComp;
 
bool liveOnly; // only execute on live pixels of a quad (optimization)
bool levelZero;
bool derivAll;
 
int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
int8_t offset[3]; // only used on nv50
 
enum TexQuery query;
} tex;
 
ValueRef dPdx[3];
ValueRef dPdy[3];
ValueRef offset[4][3];
};
 
class CmpInstruction : public Instruction
{
public:
CmpInstruction(Function *, operation);
 
virtual CmpInstruction *clone(ClonePolicy<Function>&,
Instruction * = NULL) const;
 
void setCondition(CondCode cond) { setCond = cond; }
CondCode getCondition() const { return setCond; }
 
public:
CondCode setCond;
};
 
class FlowInstruction : public Instruction
{
public:
FlowInstruction(Function *, operation, void *target);
 
virtual FlowInstruction *clone(ClonePolicy<Function>&,
Instruction * = NULL) const;
 
public:
unsigned allWarp : 1;
unsigned absolute : 1;
unsigned limit : 1;
unsigned builtin : 1; // true for calls to emulation code
unsigned indirect : 1; // target in src(0)
 
union {
BasicBlock *bb;
int builtin;
Function *fn;
} target;
};
 
class BasicBlock
{
public:
BasicBlock(Function *);
~BasicBlock();
 
BasicBlock *clone(ClonePolicy<Function>&) const;
 
inline int getId() const { return id; }
inline unsigned int getInsnCount() const { return numInsns; }
inline bool isTerminated() const { return exit && exit->terminator; }
 
bool dominatedBy(BasicBlock *bb);
inline bool reachableBy(const BasicBlock *by, const BasicBlock *term);
 
// returns mask of conditional out blocks
// e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
unsigned int initiatesSimpleConditional() const;
 
public:
Function *getFunction() const { return func; }
Program *getProgram() const { return program; }
 
Instruction *getEntry() const { return entry; } // first non-phi instruction
Instruction *getPhi() const { return phi; }
Instruction *getFirst() const { return phi ? phi : entry; }
Instruction *getExit() const { return exit; }
 
void insertHead(Instruction *);
void insertTail(Instruction *);
void insertBefore(Instruction *, Instruction *);
void insertAfter(Instruction *, Instruction *);
void remove(Instruction *);
void permuteAdjacent(Instruction *, Instruction *);
 
BasicBlock *idom() const;
 
// NOTE: currently does not rebuild the dominator tree
BasicBlock *splitBefore(Instruction *, bool attach = true);
BasicBlock *splitAfter(Instruction *, bool attach = true);
 
DLList& getDF() { return df; }
DLList::Iterator iterDF() { return df.iterator(); }
 
static inline BasicBlock *get(Iterator&);
static inline BasicBlock *get(Graph::Node *);
 
public:
Graph::Node cfg; // first edge is branch *taken* (the ELSE branch)
Graph::Node dom;
 
BitSet liveSet;
BitSet defSet;
 
uint32_t binPos;
uint32_t binSize;
 
Instruction *joinAt; // for quick reference
 
bool explicitCont; // loop headers: true if loop contains continue stmts
 
private:
int id;
DLList df;
 
Instruction *phi;
Instruction *entry;
Instruction *exit;
 
unsigned int numInsns;
 
private:
Function *func;
Program *program;
 
void splitCommon(Instruction *, BasicBlock *, bool attach);
};
 
class Function
{
public:
Function(Program *, const char *name, uint32_t label);
~Function();
 
static inline Function *get(Graph::Node *node);
 
inline Program *getProgram() const { return prog; }
inline const char *getName() const { return name; }
inline int getId() const { return id; }
inline uint32_t getLabel() const { return label; }
 
void print();
void printLiveIntervals() const;
void printCFGraph(const char *filePath);
 
bool setEntry(BasicBlock *);
bool setExit(BasicBlock *);
 
unsigned int orderInstructions(ArrayList&);
 
inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); }
inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); }
inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); }
 
inline LValue *getLValue(int id);
 
void buildLiveSets();
void buildDefSets();
bool convertToSSA();
 
public:
std::deque<ValueDef> ins;
std::deque<ValueRef> outs;
std::deque<Value *> clobbers;
 
Graph cfg;
Graph::Node *cfgExit;
Graph *domTree;
Graph::Node call; // node in the call graph
 
BasicBlock **bbArray; // BBs in emission order
int bbCount;
 
unsigned int loopNestingBound;
int regClobberMax;
 
uint32_t binPos;
uint32_t binSize;
 
Value *stackPtr;
 
uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)
uint32_t tlsSize;
 
ArrayList allBBlocks;
ArrayList allInsns;
ArrayList allLValues;
 
private:
void buildLiveSetsPreSSA(BasicBlock *, const int sequence);
void buildDefSetsPreSSA(BasicBlock *bb, const int seq);
 
private:
uint32_t label;
int id;
const char *const name;
Program *prog;
};
 
enum CGStage
{
CG_STAGE_PRE_SSA,
CG_STAGE_SSA, // expected directly before register allocation
CG_STAGE_POST_RA
};
 
class Program
{
public:
enum Type
{
TYPE_VERTEX,
TYPE_TESSELLATION_CONTROL,
TYPE_TESSELLATION_EVAL,
TYPE_GEOMETRY,
TYPE_FRAGMENT,
TYPE_COMPUTE
};
 
Program(Type type, Target *targ);
~Program();
 
void print();
 
Type getType() const { return progType; }
 
inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }
inline void del(Function *fn, int& id) { allFuncs.remove(id); }
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
 
bool makeFromTGSI(struct nv50_ir_prog_info *);
bool makeFromSM4(struct nv50_ir_prog_info *);
bool convertToSSA();
bool optimizeSSA(int level);
bool optimizePostRA(int level);
bool registerAllocation();
bool emitBinary(struct nv50_ir_prog_info *);
 
const Target *getTarget() const { return target; }
 
private:
void emitSymbolTable(struct nv50_ir_prog_info *);
 
Type progType;
Target *target;
 
public:
Function *main;
Graph calls;
 
ArrayList allFuncs;
ArrayList allRValues;
 
uint32_t *code;
uint32_t binSize;
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
 
int maxGPR;
 
MemoryPool mem_Instruction;
MemoryPool mem_CmpInstruction;
MemoryPool mem_TexInstruction;
MemoryPool mem_FlowInstruction;
MemoryPool mem_LValue;
MemoryPool mem_Symbol;
MemoryPool mem_ImmediateValue;
 
uint32_t dbgFlags;
uint8_t optLevel;
 
void *targetPriv; // e.g. to carry information between passes
 
const struct nv50_ir_prog_info *driver; // for driver configuration
 
void releaseInstruction(Instruction *);
void releaseValue(Value *);
};
 
// TODO: add const version
class Pass
{
public:
bool run(Program *, bool ordered = false, bool skipPhi = false);
bool run(Function *, bool ordered = false, bool skipPhi = false);
 
private:
// return false to continue with next entity on next higher level
virtual bool visit(Function *) { return true; }
virtual bool visit(BasicBlock *) { return true; }
virtual bool visit(Instruction *) { return false; }
 
bool doRun(Program *, bool ordered, bool skipPhi);
bool doRun(Function *, bool ordered, bool skipPhi);
 
protected:
bool err;
Function *func;
Program *prog;
};
 
// =============================================================================
 
#include "codegen/nv50_ir_inlines.h"
 
} // namespace nv50_ir
 
#endif // __NV50_IR_H__
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
0,0 → 1,550
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
 
namespace nv50_ir {
 
Function::Function(Program *p, const char *fnName, uint32_t label)
: call(this),
label(label),
name(fnName),
prog(p)
{
cfgExit = NULL;
domTree = NULL;
 
bbArray = NULL;
bbCount = 0;
loopNestingBound = 0;
regClobberMax = 0;
 
binPos = 0;
binSize = 0;
 
stackPtr = NULL;
tlsBase = 0;
tlsSize = 0;
 
prog->add(this, id);
}
 
Function::~Function()
{
prog->del(this, id);
 
if (domTree)
delete domTree;
if (bbArray)
delete[] bbArray;
 
// clear value refs and defs
ins.clear();
outs.clear();
 
for (ArrayList::Iterator it = allInsns.iterator(); !it.end(); it.next())
delete_Instruction(prog, reinterpret_cast<Instruction *>(it.get()));
 
for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next())
delete_Value(prog, reinterpret_cast<LValue *>(it.get()));
 
for (ArrayList::Iterator BBs = allBBlocks.iterator(); !BBs.end(); BBs.next())
delete reinterpret_cast<BasicBlock *>(BBs.get());
}
 
BasicBlock::BasicBlock(Function *fn) : cfg(this), dom(this), func(fn)
{
program = func->getProgram();
 
joinAt = phi = entry = exit = NULL;
 
numInsns = 0;
binPos = 0;
binSize = 0;
 
explicitCont = false;
 
func->add(this, this->id);
}
 
BasicBlock::~BasicBlock()
{
// nothing yet
}
 
BasicBlock *
BasicBlock::clone(ClonePolicy<Function>& pol) const
{
BasicBlock *bb = new BasicBlock(pol.context());
 
pol.set(this, bb);
 
for (Instruction *i = getFirst(); i; i = i->next)
bb->insertTail(i->clone(pol));
 
pol.context()->cfg.insert(&bb->cfg);
 
for (Graph::EdgeIterator it = cfg.outgoing(); !it.end(); it.next()) {
BasicBlock *obb = BasicBlock::get(it.getNode());
bb->cfg.attach(&pol.get(obb)->cfg, it.getType());
}
 
return bb;
}
 
BasicBlock *
BasicBlock::idom() const
{
Graph::Node *dn = dom.parent();
return dn ? BasicBlock::get(dn) : NULL;
}
 
void
BasicBlock::insertHead(Instruction *inst)
{
assert(inst->next == 0 && inst->prev == 0);
 
if (inst->op == OP_PHI) {
if (phi) {
insertBefore(phi, inst);
} else {
if (entry) {
insertBefore(entry, inst);
} else {
assert(!exit);
phi = exit = inst;
inst->bb = this;
++numInsns;
}
}
} else {
if (entry) {
insertBefore(entry, inst);
} else {
if (phi) {
insertAfter(exit, inst); // after last phi
} else {
assert(!exit);
entry = exit = inst;
inst->bb = this;
++numInsns;
}
}
}
}
 
void
BasicBlock::insertTail(Instruction *inst)
{
assert(inst->next == 0 && inst->prev == 0);
 
if (inst->op == OP_PHI) {
if (entry) {
insertBefore(entry, inst);
} else
if (exit) {
assert(phi);
insertAfter(exit, inst);
} else {
assert(!phi);
phi = exit = inst;
inst->bb = this;
++numInsns;
}
} else {
if (exit) {
insertAfter(exit, inst);
} else {
assert(!phi);
entry = exit = inst;
inst->bb = this;
++numInsns;
}
}
}
 
void
BasicBlock::insertBefore(Instruction *q, Instruction *p)
{
assert(p && q);
 
assert(p->next == 0 && p->prev == 0);
 
if (q == entry) {
if (p->op == OP_PHI) {
if (!phi)
phi = p;
} else {
entry = p;
}
} else
if (q == phi) {
assert(p->op == OP_PHI);
phi = p;
}
 
p->next = q;
p->prev = q->prev;
if (p->prev)
p->prev->next = p;
q->prev = p;
 
p->bb = this;
++numInsns;
}
 
void
BasicBlock::insertAfter(Instruction *p, Instruction *q)
{
assert(p && q);
assert(q->op != OP_PHI || p->op == OP_PHI);
 
assert(q->next == 0 && q->prev == 0);
 
if (p == exit)
exit = q;
if (p->op == OP_PHI && q->op != OP_PHI)
entry = q;
 
q->prev = p;
q->next = p->next;
if (q->next)
q->next->prev = q;
p->next = q;
 
q->bb = this;
++numInsns;
}
 
void
BasicBlock::remove(Instruction *insn)
{
assert(insn->bb == this);
 
if (insn->prev)
insn->prev->next = insn->next;
 
if (insn->next)
insn->next->prev = insn->prev;
else
exit = insn->prev;
 
if (insn == entry) {
if (insn->next)
entry = insn->next;
else
if (insn->prev && insn->prev->op != OP_PHI)
entry = insn->prev;
else
entry = NULL;
}
 
if (insn == phi)
phi = (insn->next && insn->next->op == OP_PHI) ? insn->next : 0;
 
--numInsns;
insn->bb = NULL;
insn->next =
insn->prev = NULL;
}
 
void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b)
{
assert(a->bb == b->bb);
 
if (a->next != b) {
Instruction *i = a;
a = b;
b = i;
}
assert(a->next == b);
assert(a->op != OP_PHI && b->op != OP_PHI);
 
if (b == exit)
exit = a;
if (a == entry)
entry = b;
 
b->prev = a->prev;
a->next = b->next;
b->next = a;
a->prev = b;
 
if (b->prev)
b->prev->next = b;
if (a->prev)
a->next->prev = a;
}
 
void
BasicBlock::splitCommon(Instruction *insn, BasicBlock *bb, bool attach)
{
bb->entry = insn;
 
if (insn) {
exit = insn->prev;
insn->prev = NULL;
}
 
if (exit)
exit->next = NULL;
else
entry = NULL;
 
while (!cfg.outgoing(true).end()) {
Graph::Edge *e = cfg.outgoing(true).getEdge();
bb->cfg.attach(e->getTarget(), e->getType());
this->cfg.detach(e->getTarget());
}
 
for (; insn; insn = insn->next) {
this->numInsns--;
bb->numInsns++;
insn->bb = bb;
bb->exit = insn;
}
if (attach)
this->cfg.attach(&bb->cfg, Graph::Edge::TREE);
}
 
BasicBlock *
BasicBlock::splitBefore(Instruction *insn, bool attach)
{
BasicBlock *bb = new BasicBlock(func);
assert(!insn || insn->op != OP_PHI);
 
splitCommon(insn, bb, attach);
return bb;
}
 
BasicBlock *
BasicBlock::splitAfter(Instruction *insn, bool attach)
{
BasicBlock *bb = new BasicBlock(func);
assert(!insn || insn->op != OP_PHI);
 
bb->joinAt = joinAt;
joinAt = NULL;
 
splitCommon(insn ? insn->next : NULL, bb, attach);
return bb;
}
 
bool
BasicBlock::dominatedBy(BasicBlock *that)
{
Graph::Node *bn = &that->dom;
Graph::Node *dn = &this->dom;
 
while (dn && dn != bn)
dn = dn->parent();
 
return dn != NULL;
}
 
unsigned int
BasicBlock::initiatesSimpleConditional() const
{
Graph::Node *out[2];
int n;
Graph::Edge::Type eR;
 
if (cfg.outgoingCount() != 2) // -> if and -> else/endif
return false;
 
n = 0;
for (Graph::EdgeIterator ei = cfg.outgoing(); !ei.end(); ei.next())
out[n++] = ei.getNode();
eR = out[1]->outgoing().getType();
 
// IF block is out edge to the right
if (eR == Graph::Edge::CROSS || eR == Graph::Edge::BACK)
return 0x2;
 
if (out[1]->outgoingCount() != 1) // 0 is IF { RET; }, >1 is more divergence
return 0x0;
// do they reconverge immediately ?
if (out[1]->outgoing().getNode() == out[0])
return 0x1;
if (out[0]->outgoingCount() == 1)
if (out[0]->outgoing().getNode() == out[1]->outgoing().getNode())
return 0x3;
 
return 0x0;
}
 
bool
Function::setEntry(BasicBlock *bb)
{
if (cfg.getRoot())
return false;
cfg.insert(&bb->cfg);
return true;
}
 
bool
Function::setExit(BasicBlock *bb)
{
if (cfgExit)
return false;
cfgExit = &bb->cfg;
return true;
}
 
unsigned int
Function::orderInstructions(ArrayList &result)
{
result.clear();
 
for (IteratorRef it = cfg.iteratorCFG(); !it->end(); it->next()) {
BasicBlock *bb =
BasicBlock::get(reinterpret_cast<Graph::Node *>(it->get()));
 
for (Instruction *insn = bb->getFirst(); insn; insn = insn->next)
result.insert(insn, insn->serial);
}
 
return result.getSize();
}
 
void
Function::buildLiveSets()
{
for (unsigned i = 0; i <= loopNestingBound; ++i)
buildLiveSetsPreSSA(BasicBlock::get(cfg.getRoot()), cfg.nextSequence());
 
for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next())
BasicBlock::get(bi)->liveSet.marker = false;
}
 
void
Function::buildDefSets()
{
for (unsigned i = 0; i <= loopNestingBound; ++i)
buildDefSetsPreSSA(BasicBlock::get(cfgExit), cfg.nextSequence());
 
for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next())
BasicBlock::get(bi)->liveSet.marker = false;
}
 
bool
Pass::run(Program *prog, bool ordered, bool skipPhi)
{
this->prog = prog;
err = false;
return doRun(prog, ordered, skipPhi);
}
 
bool
Pass::doRun(Program *prog, bool ordered, bool skipPhi)
{
for (IteratorRef it = prog->calls.iteratorDFS(false);
!it->end(); it->next()) {
Graph::Node *n = reinterpret_cast<Graph::Node *>(it->get());
if (!doRun(Function::get(n), ordered, skipPhi))
return false;
}
return !err;
}
 
bool
Pass::run(Function *func, bool ordered, bool skipPhi)
{
prog = func->getProgram();
err = false;
return doRun(func, ordered, skipPhi);
}
 
bool
Pass::doRun(Function *func, bool ordered, bool skipPhi)
{
IteratorRef bbIter;
BasicBlock *bb;
Instruction *insn, *next;
 
this->func = func;
if (!visit(func))
return false;
 
bbIter = ordered ? func->cfg.iteratorCFG() : func->cfg.iteratorDFS();
 
for (; !bbIter->end(); bbIter->next()) {
bb = BasicBlock::get(reinterpret_cast<Graph::Node *>(bbIter->get()));
if (!visit(bb))
break;
for (insn = skipPhi ? bb->getEntry() : bb->getFirst(); insn != NULL;
insn = next) {
next = insn->next;
if (!visit(insn))
break;
}
}
 
return !err;
}
 
void
Function::printCFGraph(const char *filePath)
{
FILE *out = fopen(filePath, "a");
if (!out) {
ERROR("failed to open file: %s\n", filePath);
return;
}
INFO("printing control flow graph to: %s\n", filePath);
 
fprintf(out, "digraph G {\n");
 
for (IteratorRef it = cfg.iteratorDFS(); !it->end(); it->next()) {
BasicBlock *bb = BasicBlock::get(
reinterpret_cast<Graph::Node *>(it->get()));
int idA = bb->getId();
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
int idB = BasicBlock::get(ei.getNode())->getId();
switch (ei.getType()) {
case Graph::Edge::TREE:
fprintf(out, "\t%i -> %i;\n", idA, idB);
break;
case Graph::Edge::FORWARD:
fprintf(out, "\t%i -> %i [color=green];\n", idA, idB);
break;
case Graph::Edge::CROSS:
fprintf(out, "\t%i -> %i [color=red];\n", idA, idB);
break;
case Graph::Edge::BACK:
fprintf(out, "\t%i -> %i;\n", idA, idB);
break;
case Graph::Edge::DUMMY:
fprintf(out, "\t%i -> %i [style=dotted];\n", idA, idB);
break;
default:
assert(0);
break;
}
}
}
 
fprintf(out, "}\n");
fclose(out);
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
0,0 → 1,615
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
 
namespace nv50_ir {
 
BuildUtil::BuildUtil()
{
init(NULL);
}
 
BuildUtil::BuildUtil(Program *prog)
{
init(prog);
}
 
void
BuildUtil::init(Program *prog)
{
this->prog = prog;
 
func = NULL;
bb = NULL;
pos = NULL;
 
memset(imms, 0, sizeof(imms));
immCount = 0;
}
 
void
BuildUtil::addImmediate(ImmediateValue *imm)
{
if (immCount > (NV50_IR_BUILD_IMM_HT_SIZE * 3) / 4)
return;
 
unsigned int pos = u32Hash(imm->reg.data.u32);
 
while (imms[pos])
pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE;
imms[pos] = imm;
immCount++;
}
 
Instruction *
BuildUtil::mkOp1(operation op, DataType ty, Value *dst, Value *src)
{
Instruction *insn = new_Instruction(func, op, ty);
 
insn->setDef(0, dst);
insn->setSrc(0, src);
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkOp2(operation op, DataType ty, Value *dst,
Value *src0, Value *src1)
{
Instruction *insn = new_Instruction(func, op, ty);
 
insn->setDef(0, dst);
insn->setSrc(0, src0);
insn->setSrc(1, src1);
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkOp3(operation op, DataType ty, Value *dst,
Value *src0, Value *src1, Value *src2)
{
Instruction *insn = new_Instruction(func, op, ty);
 
insn->setDef(0, dst);
insn->setSrc(0, src0);
insn->setSrc(1, src1);
insn->setSrc(2, src2);
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkLoad(DataType ty, Value *dst, Symbol *mem, Value *ptr)
{
Instruction *insn = new_Instruction(func, OP_LOAD, ty);
 
insn->setDef(0, dst);
insn->setSrc(0, mem);
if (ptr)
insn->setIndirect(0, 0, ptr);
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkStore(operation op, DataType ty, Symbol *mem, Value *ptr,
Value *stVal)
{
Instruction *insn = new_Instruction(func, op, ty);
 
insn->setSrc(0, mem);
insn->setSrc(1, stVal);
if (ptr)
insn->setIndirect(0, 0, ptr);
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkFetch(Value *dst, DataType ty, DataFile file, int32_t offset,
Value *attrRel, Value *primRel)
{
Symbol *sym = mkSymbol(file, 0, ty, offset);
 
Instruction *insn = mkOp1(OP_VFETCH, ty, dst, sym);
 
insn->setIndirect(0, 0, attrRel);
insn->setIndirect(0, 1, primRel);
 
// already inserted
return insn;
}
 
Instruction *
BuildUtil::mkInterp(unsigned mode, Value *dst, int32_t offset, Value *rel)
{
operation op = OP_LINTERP;
DataType ty = TYPE_F32;
 
if ((mode & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_FLAT)
ty = TYPE_U32;
else
if ((mode & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_PERSPECTIVE)
op = OP_PINTERP;
 
Symbol *sym = mkSymbol(FILE_SHADER_INPUT, 0, ty, offset);
 
Instruction *insn = mkOp1(op, ty, dst, sym);
insn->setIndirect(0, 0, rel);
insn->setInterpolate(mode);
return insn;
}
 
Instruction *
BuildUtil::mkMov(Value *dst, Value *src, DataType ty)
{
Instruction *insn = new_Instruction(func, OP_MOV, ty);
 
insn->setDef(0, dst);
insn->setSrc(0, src);
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkMovToReg(int id, Value *src)
{
Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(src->reg.size));
 
insn->setDef(0, new_LValue(func, FILE_GPR));
insn->getDef(0)->reg.data.id = id;
insn->setSrc(0, src);
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkMovFromReg(Value *dst, int id)
{
Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(dst->reg.size));
 
insn->setDef(0, dst);
insn->setSrc(0, new_LValue(func, FILE_GPR));
insn->getSrc(0)->reg.data.id = id;
 
insert(insn);
return insn;
}
 
Instruction *
BuildUtil::mkCvt(operation op,
DataType dstTy, Value *dst, DataType srcTy, Value *src)
{
Instruction *insn = new_Instruction(func, op, dstTy);
 
insn->setType(dstTy, srcTy);
insn->setDef(0, dst);
insn->setSrc(0, src);
 
insert(insn);
return insn;
}
 
CmpInstruction *
BuildUtil::mkCmp(operation op, CondCode cc, DataType dstTy, Value *dst,
DataType srcTy, Value *src0, Value *src1, Value *src2)
{
CmpInstruction *insn = new_CmpInstruction(func, op);
 
insn->setType((dst->reg.file == FILE_PREDICATE ||
dst->reg.file == FILE_FLAGS) ? TYPE_U8 : dstTy, srcTy);
insn->setCondition(cc);
insn->setDef(0, dst);
insn->setSrc(0, src0);
insn->setSrc(1, src1);
if (src2)
insn->setSrc(2, src2);
 
if (dst->reg.file == FILE_FLAGS)
insn->flagsDef = 0;
 
insert(insn);
return insn;
}
 
TexInstruction *
BuildUtil::mkTex(operation op, TexTarget targ,
uint16_t tic, uint16_t tsc,
const std::vector<Value *> &def,
const std::vector<Value *> &src)
{
TexInstruction *tex = new_TexInstruction(func, op);
 
for (size_t d = 0; d < def.size() && def[d]; ++d)
tex->setDef(d, def[d]);
for (size_t s = 0; s < src.size() && src[s]; ++s)
tex->setSrc(s, src[s]);
 
tex->setTexture(targ, tic, tsc);
 
insert(tex);
return tex;
}
 
Instruction *
BuildUtil::mkQuadop(uint8_t q, Value *def, uint8_t l, Value *src0, Value *src1)
{
Instruction *quadop = mkOp2(OP_QUADOP, TYPE_F32, def, src0, src1);
quadop->subOp = q;
quadop->lanes = l;
return quadop;
}
 
Instruction *
BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc)
{
LValue *def0 = getSSA();
LValue *def1 = getSSA();
 
mkMov(def0, trSrc)->setPredicate(CC_P, pred);
mkMov(def1, flSrc)->setPredicate(CC_NOT_P, pred);
 
return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1);
}
 
Instruction *
BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val)
{
Instruction *insn = NULL;
 
const DataType fTy = typeOfSize(halfSize * 2);
 
if (val->reg.file == FILE_IMMEDIATE)
val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0);
 
if (isMemoryFile(val->reg.file)) {
h[0] = cloneShallow(getFunction(), val);
h[1] = cloneShallow(getFunction(), val);
h[0]->reg.size = halfSize;
h[1]->reg.size = halfSize;
h[1]->reg.data.offset += halfSize;
} else {
h[0] = getSSA(halfSize, val->reg.file);
h[1] = getSSA(halfSize, val->reg.file);
insn = mkOp1(OP_SPLIT, fTy, h[0], val);
insn->setDef(1, h[1]);
}
return insn;
}
 
FlowInstruction *
BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred)
{
FlowInstruction *insn = new_FlowInstruction(func, op, targ);
 
if (pred)
insn->setPredicate(cc, pred);
 
insert(insn);
return insn;
}
 
void
BuildUtil::mkClobber(DataFile f, uint32_t rMask, int unit)
{
static const uint16_t baseSize2[16] =
{
0x0000, 0x0010, 0x0011, 0x0020, 0x0012, 0x1210, 0x1211, 0x1220,
0x0013, 0x1310, 0x1311, 0x1320, 0x0022, 0x2210, 0x2211, 0x0040,
};
 
int base = 0;
 
for (; rMask; rMask >>= 4, base += 4) {
const uint32_t mask = rMask & 0xf;
if (!mask)
continue;
int base1 = (baseSize2[mask] >> 0) & 0xf;
int size1 = (baseSize2[mask] >> 4) & 0xf;
int base2 = (baseSize2[mask] >> 8) & 0xf;
int size2 = (baseSize2[mask] >> 12) & 0xf;
Instruction *insn = mkOp(OP_NOP, TYPE_NONE, NULL);
if (1) { // size1 can't be 0
LValue *reg = new_LValue(func, f);
reg->reg.size = size1 << unit;
reg->reg.data.id = base + base1;
insn->setDef(0, reg);
}
if (size2) {
LValue *reg = new_LValue(func, f);
reg->reg.size = size2 << unit;
reg->reg.data.id = base + base2;
insn->setDef(1, reg);
}
}
}
 
ImmediateValue *
BuildUtil::mkImm(uint32_t u)
{
unsigned int pos = u32Hash(u);
 
while (imms[pos] && imms[pos]->reg.data.u32 != u)
pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE;
 
ImmediateValue *imm = imms[pos];
if (!imm) {
imm = new_ImmediateValue(prog, u);
addImmediate(imm);
}
return imm;
}
 
ImmediateValue *
BuildUtil::mkImm(uint64_t u)
{
ImmediateValue *imm = new_ImmediateValue(prog, (uint32_t)0);
 
imm->reg.size = 8;
imm->reg.type = TYPE_U64;
imm->reg.data.u64 = u;
 
return imm;
}
 
ImmediateValue *
BuildUtil::mkImm(float f)
{
union {
float f32;
uint32_t u32;
} u;
u.f32 = f;
return mkImm(u.u32);
}
 
Value *
BuildUtil::loadImm(Value *dst, float f)
{
return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f));
}
 
Value *
BuildUtil::loadImm(Value *dst, uint32_t u)
{
return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u));
}
 
Value *
BuildUtil::loadImm(Value *dst, uint64_t u)
{
return mkOp1v(OP_MOV, TYPE_U64, dst ? dst : getScratch(8), mkImm(u));
}
 
Symbol *
BuildUtil::mkSymbol(DataFile file, int8_t fileIndex, DataType ty,
uint32_t baseAddr)
{
Symbol *sym = new_Symbol(prog, file, fileIndex);
 
sym->setOffset(baseAddr);
sym->reg.type = ty;
sym->reg.size = typeSizeof(ty);
 
return sym;
}
 
Symbol *
BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex)
{
Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0);
 
assert(svIndex < 4 ||
(svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR));
 
switch (svName) {
case SV_POSITION:
case SV_FACE:
case SV_YDIR:
case SV_POINT_SIZE:
case SV_POINT_COORD:
case SV_CLIP_DISTANCE:
case SV_TESS_FACTOR:
sym->reg.type = TYPE_F32;
break;
default:
sym->reg.type = TYPE_U32;
break;
}
sym->reg.size = typeSizeof(sym->reg.type);
 
sym->reg.data.sv.sv = svName;
sym->reg.data.sv.index = svIndex;
 
return sym;
}
 
void
BuildUtil::DataArray::setup(unsigned array, unsigned arrayIdx,
uint32_t base, int len, int vecDim, int eltSize,
DataFile file, int8_t fileIdx)
{
this->array = array;
this->arrayIdx = arrayIdx;
this->baseAddr = base;
this->arrayLen = len;
this->vecDim = vecDim;
this->eltSize = eltSize;
this->file = file;
this->regOnly = !isMemoryFile(file);
 
if (!regOnly) {
baseSym = new_Symbol(up->getProgram(), file, fileIdx);
baseSym->setOffset(baseAddr);
baseSym->reg.size = eltSize;
} else {
baseSym = NULL;
}
}
 
Value *
BuildUtil::DataArray::acquire(ValueMap &m, int i, int c)
{
if (regOnly) {
Value *v = lookup(m, i, c);
if (!v)
v = insert(m, i, c, new_LValue(up->getFunction(), file));
 
return v;
} else {
return up->getScratch();
}
}
 
Value *
BuildUtil::DataArray::load(ValueMap &m, int i, int c, Value *ptr)
{
if (regOnly) {
Value *v = lookup(m, i, c);
if (!v)
v = insert(m, i, c, new_LValue(up->getFunction(), file));
 
return v;
} else {
Value *sym = lookup(m, i, c);
if (!sym)
sym = insert(m, i, c, mkSymbol(i, c));
 
return up->mkLoadv(typeOfSize(eltSize), static_cast<Symbol *>(sym), ptr);
}
}
 
void
BuildUtil::DataArray::store(ValueMap &m, int i, int c, Value *ptr, Value *value)
{
if (regOnly) {
assert(!ptr);
if (!lookup(m, i, c))
insert(m, i, c, value);
 
assert(lookup(m, i, c) == value);
} else {
Value *sym = lookup(m, i, c);
if (!sym)
sym = insert(m, i, c, mkSymbol(i, c));
 
const DataType stTy = typeOfSize(value->reg.size);
 
up->mkStore(OP_STORE, stTy, static_cast<Symbol *>(sym), ptr, value);
}
}
 
Symbol *
BuildUtil::DataArray::mkSymbol(int i, int c)
{
const unsigned int idx = i * vecDim + c;
Symbol *sym = new_Symbol(up->getProgram(), file, 0);
 
assert(baseSym || (idx < arrayLen && c < vecDim));
 
sym->reg.size = eltSize;
sym->reg.type = typeOfSize(eltSize);
sym->setAddress(baseSym, baseAddr + idx * eltSize);
return sym;
}
 
 
Instruction *
BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
Value *zero,
Value *carry)
{
DataType hTy;
int srcNr;
 
switch (i->dType) {
case TYPE_U64: hTy = TYPE_U32; break;
case TYPE_S64: hTy = TYPE_S32; break;
default:
return NULL;
}
 
switch (i->op) {
case OP_MOV: srcNr = 1; break;
case OP_ADD:
case OP_SUB:
if (!carry)
return NULL;
srcNr = 2;
break;
default:
// TODO when needed
return NULL;
}
 
i->setType(hTy);
i->setDef(0, cloneShallow(fn, i->getDef(0)));
i->getDef(0)->reg.size = 4;
Instruction *lo = i;
Instruction *hi = cloneForward(fn, i);
lo->bb->insertAfter(lo, hi);
 
hi->getDef(0)->reg.data.id++;
 
for (int s = 0; s < srcNr; ++s) {
if (lo->getSrc(s)->reg.size < 8) {
hi->setSrc(s, zero);
} else {
if (lo->getSrc(s)->refCount() > 1)
lo->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
lo->getSrc(s)->reg.size /= 2;
hi->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
 
switch (hi->src(s).getFile()) {
case FILE_IMMEDIATE:
hi->getSrc(s)->reg.data.u64 >>= 32;
break;
case FILE_MEMORY_CONST:
case FILE_MEMORY_SHARED:
case FILE_SHADER_INPUT:
hi->getSrc(s)->reg.data.offset += 4;
break;
default:
assert(hi->src(s).getFile() == FILE_GPR);
hi->getSrc(s)->reg.data.id++;
break;
}
}
}
if (srcNr == 2) {
lo->setDef(1, carry);
hi->setFlagsSrc(hi->srcCount(), carry);
}
return hi;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
0,0 → 1,324
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef __NV50_IR_BUILD_UTIL__
#define __NV50_IR_BUILD_UTIL__
 
namespace nv50_ir {
 
class BuildUtil
{
public:
BuildUtil();
BuildUtil(Program *);
 
inline void setProgram(Program *);
inline Program *getProgram() const { return prog; }
inline Function *getFunction() const { return func; }
 
// keeps inserting at head/tail of block
inline void setPosition(BasicBlock *, bool tail);
// position advances only if @after is true
inline void setPosition(Instruction *, bool after);
 
inline BasicBlock *getBB() { return bb; }
 
inline void insert(Instruction *);
inline void remove(Instruction *i) { assert(i->bb == bb); bb->remove(i); }
 
inline LValue *getScratch(int size = 4, DataFile = FILE_GPR);
// scratch value for a single assignment:
inline LValue *getSSA(int size = 4, DataFile = FILE_GPR);
 
inline Instruction *mkOp(operation, DataType, Value *);
Instruction *mkOp1(operation, DataType, Value *, Value *);
Instruction *mkOp2(operation, DataType, Value *, Value *, Value *);
Instruction *mkOp3(operation, DataType, Value *, Value *, Value *, Value *);
 
LValue *mkOp1v(operation, DataType, Value *, Value *);
LValue *mkOp2v(operation, DataType, Value *, Value *, Value *);
LValue *mkOp3v(operation, DataType, Value *, Value *, Value *, Value *);
 
Instruction *mkLoad(DataType, Value *dst, Symbol *, Value *ptr);
Instruction *mkStore(operation, DataType, Symbol *, Value *ptr, Value *val);
 
LValue *mkLoadv(DataType, Symbol *, Value *ptr);
 
Instruction *mkMov(Value *, Value *, DataType = TYPE_U32);
Instruction *mkMovToReg(int id, Value *);
Instruction *mkMovFromReg(Value *, int id);
 
Instruction *mkInterp(unsigned mode, Value *, int32_t offset, Value *rel);
Instruction *mkFetch(Value *, DataType, DataFile, int32_t offset,
Value *attrRel, Value *primRel);
 
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
CmpInstruction *mkCmp(operation, CondCode, DataType,
Value *,
DataType, Value *, Value *, Value * = NULL);
TexInstruction *mkTex(operation, TexTarget,
uint16_t tic, uint16_t tsc,
const std::vector<Value *> &def,
const std::vector<Value *> &src);
Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
 
FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred);
 
Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc);
 
Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *);
 
void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
 
ImmediateValue *mkImm(float);
ImmediateValue *mkImm(uint32_t);
ImmediateValue *mkImm(uint64_t);
 
ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); }
 
Value *loadImm(Value *dst, float);
Value *loadImm(Value *dst, uint32_t);
Value *loadImm(Value *dst, uint64_t);
 
Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); }
 
// returns high part of the operation
static Instruction *split64BitOpPostRA(Function *, Instruction *,
Value *zero, Value *carry);
 
struct Location
{
Location(unsigned array, unsigned arrayIdx, unsigned i, unsigned c)
: array(array), arrayIdx(arrayIdx), i(i), c(c) { }
Location(const Location &l)
: array(l.array), arrayIdx(l.arrayIdx), i(l.i), c(l.c) { }
 
bool operator==(const Location &l) const
{
return
array == l.array && arrayIdx == l.arrayIdx && i == l.i && c == l.c;
}
 
bool operator<(const Location &l) const
{
return array != l.array ? array < l.array :
arrayIdx != l.arrayIdx ? arrayIdx < l.arrayIdx :
i != l.i ? i < l.i :
c != l.c ? c < l.c :
false;
}
 
unsigned array, arrayIdx, i, c;
};
 
typedef bimap<Location, Value *> ValueMap;
 
class DataArray
{
public:
DataArray(BuildUtil *bld) : up(bld) { }
 
void setup(unsigned array, unsigned arrayIdx,
uint32_t base, int len, int vecDim, int eltSize,
DataFile file, int8_t fileIdx);
 
inline bool exists(ValueMap&, unsigned int i, unsigned int c);
 
Value *load(ValueMap&, int i, int c, Value *ptr);
void store(ValueMap&, int i, int c, Value *ptr, Value *value);
Value *acquire(ValueMap&, int i, int c);
 
private:
inline Value *lookup(ValueMap&, unsigned i, unsigned c);
inline Value *insert(ValueMap&, unsigned i, unsigned c, Value *v);
 
Symbol *mkSymbol(int i, int c);
 
private:
BuildUtil *up;
unsigned array, arrayIdx;
 
uint32_t baseAddr;
uint32_t arrayLen;
Symbol *baseSym;
 
uint8_t vecDim;
uint8_t eltSize; // in bytes
 
DataFile file;
bool regOnly;
};
 
Symbol *mkSymbol(DataFile file, int8_t fileIndex,
DataType ty, uint32_t baseAddress);
 
Symbol *mkSysVal(SVSemantic svName, uint32_t svIndex);
 
private:
void init(Program *);
void addImmediate(ImmediateValue *);
inline unsigned int u32Hash(uint32_t);
 
protected:
Program *prog;
Function *func;
Instruction *pos;
BasicBlock *bb;
bool tail;
 
#define NV50_IR_BUILD_IMM_HT_SIZE 256
 
ImmediateValue *imms[NV50_IR_BUILD_IMM_HT_SIZE];
unsigned int immCount;
};
 
unsigned int BuildUtil::u32Hash(uint32_t u)
{
return (u % 273) % NV50_IR_BUILD_IMM_HT_SIZE;
}
 
void BuildUtil::setProgram(Program *program)
{
prog = program;
}
 
void
BuildUtil::setPosition(BasicBlock *block, bool atTail)
{
bb = block;
prog = bb->getProgram();
func = bb->getFunction();
pos = NULL;
tail = atTail;
}
 
void
BuildUtil::setPosition(Instruction *i, bool after)
{
bb = i->bb;
prog = bb->getProgram();
func = bb->getFunction();
pos = i;
tail = after;
assert(bb);
}
 
LValue *
BuildUtil::getScratch(int size, DataFile f)
{
LValue *lval = new_LValue(func, f);
lval->reg.size = size;
return lval;
}
 
LValue *
BuildUtil::getSSA(int size, DataFile f)
{
LValue *lval = new_LValue(func, f);
lval->ssa = 1;
lval->reg.size = size;
return lval;
}
 
void BuildUtil::insert(Instruction *i)
{
if (!pos) {
tail ? bb->insertTail(i) : bb->insertHead(i);
} else {
if (tail) {
bb->insertAfter(pos, i);
pos = i;
} else {
bb->insertBefore(pos, i);
}
}
}
 
Instruction *
BuildUtil::mkOp(operation op, DataType ty, Value *dst)
{
Instruction *insn = new_Instruction(func, op, ty);
insn->setDef(0, dst);
insert(insn);
if (op == OP_DISCARD || op == OP_EXIT ||
op == OP_JOIN ||
op == OP_QUADON || op == OP_QUADPOP ||
op == OP_EMIT || op == OP_RESTART)
insn->fixed = 1;
return insn;
}
 
inline LValue *
BuildUtil::mkOp1v(operation op, DataType ty, Value *dst, Value *src)
{
mkOp1(op, ty, dst, src);
return dst->asLValue();
}
 
inline LValue *
BuildUtil::mkOp2v(operation op, DataType ty, Value *dst,
Value *src0, Value *src1)
{
mkOp2(op, ty, dst, src0, src1);
return dst->asLValue();
}
 
inline LValue *
BuildUtil::mkOp3v(operation op, DataType ty, Value *dst,
Value *src0, Value *src1, Value *src2)
{
mkOp3(op, ty, dst, src0, src1, src2);
return dst->asLValue();
}
 
inline LValue *
BuildUtil::mkLoadv(DataType ty, Symbol *mem, Value *ptr)
{
LValue *dst = getScratch();
mkLoad(ty, dst, mem, ptr);
return dst;
}
 
bool
BuildUtil::DataArray::exists(ValueMap &m, unsigned int i, unsigned int c)
{
assert(i < arrayLen && c < vecDim);
return !regOnly || m.r.count(Location(array, arrayIdx, i, c));
}
 
Value *
BuildUtil::DataArray::lookup(ValueMap &m, unsigned i, unsigned c)
{
ValueMap::r_iterator it = m.r.find(Location(array, arrayIdx, i, c));
return it != m.r.end() ? it->second : NULL;
}
 
Value *
BuildUtil::DataArray::insert(ValueMap &m, unsigned i, unsigned c, Value *v)
{
m.insert(Location(array, arrayIdx, i, c), v);
return v;
}
 
} // namespace nv50_ir
 
#endif // __NV50_IR_BUILD_UTIL_H__
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
0,0 → 1,221
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef __NV50_IR_DRIVER_H__
#define __NV50_IR_DRIVER_H__
 
#include "pipe/p_shader_tokens.h"
 
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
 
/*
* This struct constitutes linkage information in TGSI terminology.
*
* It is created by the code generator and handed to the pipe driver
* for input/output slot assignment.
*/
struct nv50_ir_varying
{
uint8_t slot[4]; /* native slots for xyzw (addresses in 32-bit words) */
 
unsigned mask : 4; /* vec4 mask */
unsigned linear : 1; /* linearly interpolated if true (and not flat) */
unsigned flat : 1;
unsigned sc : 1; /* special colour interpolation mode (SHADE_MODEL) */
unsigned centroid : 1;
unsigned patch : 1; /* patch constant value */
unsigned regular : 1; /* driver-specific meaning (e.g. input in sreg) */
unsigned input : 1; /* indicates direction of system values */
unsigned oread : 1; /* true if output is read from parallel TCP */
 
ubyte id; /* TGSI register index */
ubyte sn; /* TGSI semantic name */
ubyte si; /* TGSI semantic index */
};
 
#define NV50_PROGRAM_IR_TGSI 0
#define NV50_PROGRAM_IR_SM4 1
#define NV50_PROGRAM_IR_GLSL 2
#define NV50_PROGRAM_IR_LLVM 3
 
#ifdef DEBUG
# define NV50_IR_DEBUG_BASIC (1 << 0)
# define NV50_IR_DEBUG_VERBOSE (2 << 0)
# define NV50_IR_DEBUG_REG_ALLOC (1 << 2)
#else
# define NV50_IR_DEBUG_BASIC 0
# define NV50_IR_DEBUG_VERBOSE 0
# define NV50_IR_DEBUG_REG_ALLOC 0
#endif
 
#define NV50_SEMANTIC_CLIPDISTANCE (TGSI_SEMANTIC_COUNT + 0)
#define NV50_SEMANTIC_TESSFACTOR (TGSI_SEMANTIC_COUNT + 7)
#define NV50_SEMANTIC_TESSCOORD (TGSI_SEMANTIC_COUNT + 8)
#define NV50_SEMANTIC_COUNT (TGSI_SEMANTIC_COUNT + 10)
 
#define NV50_TESS_PART_FRACT_ODD 0
#define NV50_TESS_PART_FRACT_EVEN 1
#define NV50_TESS_PART_POW2 2
#define NV50_TESS_PART_INTEGER 3
 
#define NV50_PRIM_PATCHES PIPE_PRIM_MAX
 
struct nv50_ir_prog_symbol
{
uint32_t label;
uint32_t offset;
};
 
#define NVISA_GF100_CHIPSET_C0 0xc0
#define NVISA_GF100_CHIPSET_D0 0xd0
#define NVISA_GK104_CHIPSET 0xe0
#define NVISA_GK20A_CHIPSET 0xea
#define NVISA_GM107_CHIPSET 0x110
 
struct nv50_ir_prog_info
{
uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */
 
uint8_t type; /* PIPE_SHADER */
 
uint8_t optLevel; /* optimization level (0 to 3) */
uint8_t dbgFlags;
 
struct {
int16_t maxGPR; /* may be -1 if none used */
int16_t maxOutput;
uint32_t tlsSpace; /* required local memory per thread */
uint32_t *code;
uint32_t codeSize;
uint8_t sourceRep; /* NV50_PROGRAM_IR */
const void *source;
void *relocData;
struct nv50_ir_prog_symbol *syms;
uint16_t numSyms;
} bin;
 
struct nv50_ir_varying sv[PIPE_MAX_SHADER_INPUTS];
struct nv50_ir_varying in[PIPE_MAX_SHADER_INPUTS];
struct nv50_ir_varying out[PIPE_MAX_SHADER_OUTPUTS];
uint8_t numInputs;
uint8_t numOutputs;
uint8_t numPatchConstants; /* also included in numInputs/numOutputs */
uint8_t numSysVals;
 
struct {
uint32_t *buf; /* for IMMEDIATE_ARRAY */
uint16_t bufSize; /* size of immediate array */
uint16_t count; /* count of inline immediates */
uint32_t *data; /* inline immediate data */
uint8_t *type; /* for each vec4 (128 bit) */
} immd;
 
union {
struct {
uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */
} vp;
struct {
uint8_t inputPatchSize;
uint8_t outputPatchSize;
uint8_t partitioning; /* PIPE_TESS_PART */
int8_t winding; /* +1 (clockwise) / -1 (counter-clockwise) */
uint8_t domain; /* PIPE_PRIM_{QUADS,TRIANGLES,LINES} */
uint8_t outputPrim; /* PIPE_PRIM_{TRIANGLES,LINES,POINTS} */
} tp;
struct {
uint8_t inputPrim;
uint8_t outputPrim;
unsigned instanceCount;
unsigned maxVertices;
} gp;
struct {
unsigned numColourResults;
boolean writesDepth;
boolean earlyFragTests;
boolean separateFragData;
boolean usesDiscard;
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
uint32_t sharedOffset; /* reserved space in s[] */
uint32_t gridInfoBase; /* base address for NTID,NCTAID */
} cp;
} prop;
 
uint8_t numBarriers;
 
struct {
uint8_t clipDistance; /* index of first clip distance output */
uint8_t clipDistanceMask; /* mask of clip distances defined */
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
int8_t genUserClip; /* request user clip planes for ClipVertex */
uint16_t ucpBase; /* base address for UCPs */
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
uint8_t pointSize; /* output index for PointSize */
uint8_t instanceId; /* system value index of InstanceID */
uint8_t vertexId; /* system value index of VertexID */
uint8_t edgeFlagIn;
uint8_t edgeFlagOut;
int8_t viewportId; /* output index of ViewportIndex */
uint8_t fragDepth; /* output index of FragDepth */
uint8_t sampleMask; /* output index of SampleMask */
boolean sampleInterp; /* perform sample interp on all fp inputs */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
boolean fp64; /* program uses fp64 math */
boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
uint16_t sampleInfoBase; /* base address for sample positions */
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
uint16_t msInfoBase; /* base address for multisample info */
} io;
 
/* driver callback to assign input/output locations */
int (*assignSlots)(struct nv50_ir_prog_info *);
 
void *driverPriv;
};
 
#ifdef __cplusplus
extern "C" {
#endif
 
extern int nv50_ir_generate_code(struct nv50_ir_prog_info *);
 
extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
uint32_t codePos,
uint32_t libPos,
uint32_t dataPos);
 
/* obtain code that will be shared among programs */
extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);
 
#ifdef __cplusplus
}
#endif
 
#endif // __NV50_IR_DRIVER_H__
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
0,0 → 1,1913
/*
* Copyright 2012 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_target_nvc0.h"
 
// CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
 
namespace nv50_ir {
 
class CodeEmitterGK110 : public CodeEmitter
{
public:
CodeEmitterGK110(const TargetNVC0 *);
 
virtual bool emitInstruction(Instruction *);
virtual uint32_t getMinEncodingSize(const Instruction *) const;
virtual void prepareEmission(Function *);
 
inline void setProgramType(Program::Type pType) { progType = pType; }
 
private:
const TargetNVC0 *targNVC0;
 
Program::Type progType;
 
const bool writeIssueDelays;
 
private:
void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier);
 
void emitPredicate(const Instruction *);
 
void setCAddress14(const ValueRef&);
void setShortImmediate(const Instruction *, const int s);
void setImmediate32(const Instruction *, const int s, Modifier);
 
void modNegAbsF32_3b(const Instruction *, const int s);
 
void emitCondCode(CondCode cc, int pos, uint8_t mask);
void emitInterpMode(const Instruction *);
void emitLoadStoreType(DataType ty, const int pos);
void emitCachingMode(CacheMode c, const int pos);
 
inline uint8_t getSRegEncoding(const ValueRef&);
 
void emitRoundMode(RoundMode, const int pos, const int rintPos);
void emitRoundModeF(RoundMode, const int pos);
void emitRoundModeI(RoundMode, const int pos);
 
void emitNegAbs12(const Instruction *);
 
void emitNOP(const Instruction *);
 
void emitLOAD(const Instruction *);
void emitSTORE(const Instruction *);
void emitMOV(const Instruction *);
 
void emitINTERP(const Instruction *);
void emitPFETCH(const Instruction *);
void emitVFETCH(const Instruction *);
void emitEXPORT(const Instruction *);
void emitOUT(const Instruction *);
 
void emitUADD(const Instruction *);
void emitFADD(const Instruction *);
void emitDADD(const Instruction *);
void emitIMUL(const Instruction *);
void emitFMUL(const Instruction *);
void emitDMUL(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
void emitFMAD(const Instruction *);
void emitDMAD(const Instruction *);
 
void emitNOT(const Instruction *);
void emitLogicOp(const Instruction *, uint8_t subOp);
void emitPOPC(const Instruction *);
void emitINSBF(const Instruction *);
void emitEXTBF(const Instruction *);
void emitBFIND(const Instruction *);
void emitShift(const Instruction *);
 
void emitSFnOp(const Instruction *, uint8_t subOp);
 
void emitCVT(const Instruction *);
void emitMINMAX(const Instruction *);
void emitPreOp(const Instruction *);
 
void emitSET(const CmpInstruction *);
void emitSLCT(const CmpInstruction *);
void emitSELP(const Instruction *);
 
void emitTEXBAR(const Instruction *);
void emitTEX(const TexInstruction *);
void emitTEXCSAA(const TexInstruction *);
void emitTXQ(const TexInstruction *);
 
void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
 
void emitPIXLD(const Instruction *);
 
void emitFlow(const Instruction *);
 
inline void defId(const ValueDef&, const int pos);
inline void srcId(const ValueRef&, const int pos);
inline void srcId(const ValueRef *, const int pos);
inline void srcId(const Instruction *, int s, const int pos);
 
inline void srcAddr32(const ValueRef&, const int pos); // address / 4
 
inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
};
 
#define GK110_GPR_ZERO 255
 
#define NEG_(b, s) \
if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
#define ABS_(b, s) \
if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
 
#define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \
code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
 
#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
#define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
 
#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
 
#define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
 
#define SDATA(a) ((a).rep()->reg.data)
#define DDATA(a) ((a).rep()->reg.data)
 
void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
{
code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
}
 
void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
{
code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
}
 
void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
{
int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
code[pos / 32] |= r << (pos % 32);
}
 
void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
{
code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
}
 
void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
{
code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
}
 
bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
{
const ImmediateValue *imm = ref.get()->asImm();
 
return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
}
 
void
CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
{
bool rint = false;
uint8_t n;
 
switch (rnd) {
case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
default:
rint = rnd == ROUND_NI;
n = 0;
assert(rnd == ROUND_N || rnd == ROUND_NI);
break;
}
code[pos / 32] |= n << (pos % 32);
if (rint && rintPos >= 0)
code[rintPos / 32] |= 1 << (rintPos % 32);
}
 
void
CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
{
uint8_t n;
 
switch (rnd) {
case ROUND_M: n = 1; break;
case ROUND_P: n = 2; break;
case ROUND_Z: n = 3; break;
default:
n = 0;
assert(rnd == ROUND_N);
break;
}
code[pos / 32] |= n << (pos % 32);
}
 
void
CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
{
uint8_t n;
 
switch (rnd) {
case ROUND_MI: n = 1; break;
case ROUND_PI: n = 2; break;
case ROUND_ZI: n = 3; break;
default:
n = 0;
assert(rnd == ROUND_NI);
break;
}
code[pos / 32] |= n << (pos % 32);
}
 
void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
{
uint8_t n;
 
switch (cc) {
case CC_FL: n = 0x00; break;
case CC_LT: n = 0x01; break;
case CC_EQ: n = 0x02; break;
case CC_LE: n = 0x03; break;
case CC_GT: n = 0x04; break;
case CC_NE: n = 0x05; break;
case CC_GE: n = 0x06; break;
case CC_LTU: n = 0x09; break;
case CC_EQU: n = 0x0a; break;
case CC_LEU: n = 0x0b; break;
case CC_GTU: n = 0x0c; break;
case CC_NEU: n = 0x0d; break;
case CC_GEU: n = 0x0e; break;
case CC_TR: n = 0x0f; break;
case CC_NO: n = 0x10; break;
case CC_NC: n = 0x11; break;
case CC_NS: n = 0x12; break;
case CC_NA: n = 0x13; break;
case CC_A: n = 0x14; break;
case CC_S: n = 0x15; break;
case CC_C: n = 0x16; break;
case CC_O: n = 0x17; break;
default:
n = 0;
assert(!"invalid condition code");
break;
}
code[pos / 32] |= (n & mask) << (pos % 32);
}
 
void
CodeEmitterGK110::emitPredicate(const Instruction *i)
{
if (i->predSrc >= 0) {
srcId(i->src(i->predSrc), 18);
if (i->cc == CC_NOT_P)
code[0] |= 8 << 18; // negate
assert(i->getPredicate()->reg.file == FILE_PREDICATE);
} else {
code[0] |= 7 << 18;
}
}
 
void
CodeEmitterGK110::setCAddress14(const ValueRef& src)
{
const Storage& res = src.get()->asSym()->reg;
const int32_t addr = res.data.offset / 4;
 
code[0] |= (addr & 0x01ff) << 23;
code[1] |= (addr & 0x3e00) >> 9;
code[1] |= res.fileIndex << 5;
}
 
void
CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
{
const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
 
if (i->sType == TYPE_F32) {
assert(!(u32 & 0x00000fff));
code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
code[1] |= ((u32 & 0x7fe00000) >> 21);
code[1] |= ((u32 & 0x80000000) >> 4);
} else
if (i->sType == TYPE_F64) {
assert(!(u64 & 0x00000fffffffffffULL));
code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
} else {
assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
code[0] |= (u32 & 0x001ff) << 23;
code[1] |= (u32 & 0x7fe00) >> 9;
code[1] |= (u32 & 0x80000) << 8;
}
}
 
void
CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
Modifier mod)
{
uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
 
if (mod) {
ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
mod.applyTo(imm);
u32 = imm.reg.data.u32;
}
 
code[0] |= u32 << 23;
code[1] |= u32 >> 9;
}
 
void
CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
Modifier mod)
{
code[0] = ctg;
code[1] = opc << 20;
 
emitPredicate(i);
 
defId(i->def(0), 2);
 
for (int s = 0; s < 3 && i->srcExists(s); ++s) {
switch (i->src(s).getFile()) {
case FILE_GPR:
srcId(i->src(s), s ? 42 : 10);
break;
case FILE_IMMEDIATE:
setImmediate32(i, s, mod);
break;
default:
break;
}
}
}
 
 
void
CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
{
code[0] = ctg;
code[1] = opc << 20;
 
emitPredicate(i);
 
defId(i->def(0), 2);
 
switch (i->src(0).getFile()) {
case FILE_MEMORY_CONST:
code[1] |= 0x4 << 28;
setCAddress14(i->src(0));
break;
case FILE_GPR:
code[1] |= 0xc << 28;
srcId(i->src(0), 23);
break;
default:
assert(0);
break;
}
}
 
// 0x2 for GPR, c[] and 0x1 for short immediate
void
CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
uint32_t opc1)
{
const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
 
int s1 = 23;
if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
s1 = 42;
 
if (imm) {
code[0] = 0x1;
code[1] = opc1 << 20;
} else {
code[0] = 0x2;
code[1] = (0xc << 28) | (opc2 << 20);
}
 
emitPredicate(i);
 
defId(i->def(0), 2);
 
for (int s = 0; s < 3 && i->srcExists(s); ++s) {
switch (i->src(s).getFile()) {
case FILE_MEMORY_CONST:
code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
setCAddress14(i->src(s));
break;
case FILE_IMMEDIATE:
setShortImmediate(i, s);
break;
case FILE_GPR:
srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
break;
default:
// ignore here, can be predicate or flags, but must not be address
break;
}
}
// 0x0 = invalid
// 0xc = rrr
// 0x8 = rrc
// 0x4 = rcr
assert(imm || (code[1] & (0xc << 28)));
}
 
inline void
CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
{
if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
if (i->src(s).mod.neg()) code[1] ^= (1 << 27);
}
 
void
CodeEmitterGK110::emitNOP(const Instruction *i)
{
code[0] = 0x00003c02;
code[1] = 0x85800000;
 
if (i)
emitPredicate(i);
else
code[0] = 0x001c3c02;
}
 
void
CodeEmitterGK110::emitFMAD(const Instruction *i)
{
assert(!isLIMM(i->src(1), TYPE_F32));
 
emitForm_21(i, 0x0c0, 0x940);
 
NEG_(34, 2);
SAT_(35);
RND_(36, F);
FTZ_(38);
DNZ_(39);
 
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
 
if (code[0] & 0x1) {
if (neg1)
code[1] ^= 1 << 27;
} else
if (neg1) {
code[1] |= 1 << 19;
}
}
 
void
CodeEmitterGK110::emitDMAD(const Instruction *i)
{
assert(!i->saturate);
assert(!i->ftz);
 
emitForm_21(i, 0x1b8, 0xb38);
 
NEG_(34, 2);
RND_(36, F);
 
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
 
if (code[0] & 0x1) {
if (neg1)
code[1] ^= 1 << 27;
} else
if (neg1) {
code[1] |= 1 << 19;
}
}
 
void
CodeEmitterGK110::emitFMUL(const Instruction *i)
{
bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
 
assert(i->postFactor >= -3 && i->postFactor <= 3);
 
if (isLIMM(i->src(1), TYPE_F32)) {
emitForm_L(i, 0x200, 0x2, Modifier(0));
 
FTZ_(38);
DNZ_(39);
SAT_(3a);
if (neg)
code[1] ^= 1 << 22;
 
assert(i->postFactor == 0);
} else {
emitForm_21(i, 0x234, 0xc34);
code[1] |= ((i->postFactor > 0) ?
(7 - i->postFactor) : (0 - i->postFactor)) << 12;
 
RND_(2a, F);
FTZ_(2f);
DNZ_(30);
SAT_(35);
 
if (code[0] & 0x1) {
if (neg)
code[1] ^= 1 << 27;
} else
if (neg) {
code[1] |= 1 << 19;
}
}
}
 
void
CodeEmitterGK110::emitDMUL(const Instruction *i)
{
bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
 
assert(!i->postFactor);
assert(!i->saturate);
assert(!i->ftz);
assert(!i->dnz);
 
emitForm_21(i, 0x240, 0xc40);
 
RND_(2a, F);
 
if (code[0] & 0x1) {
if (neg)
code[1] ^= 1 << 27;
} else
if (neg) {
code[1] |= 1 << 19;
}
}
 
void
CodeEmitterGK110::emitIMUL(const Instruction *i)
{
assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
 
if (isLIMM(i->src(1), TYPE_S32)) {
emitForm_L(i, 0x280, 2, Modifier(0));
 
assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH);
 
if (i->sType == TYPE_S32)
code[1] |= 3 << 25;
} else {
emitForm_21(i, 0x21c, 0xc1c);
 
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
code[1] |= 1 << 10;
if (i->sType == TYPE_S32)
code[1] |= 3 << 11;
}
}
 
void
CodeEmitterGK110::emitFADD(const Instruction *i)
{
if (isLIMM(i->src(1), TYPE_F32)) {
assert(i->rnd == ROUND_N);
assert(!i->saturate);
 
Modifier mod = i->src(1).mod ^
Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
 
emitForm_L(i, 0x400, 0, mod);
 
FTZ_(3a);
NEG_(3b, 0);
ABS_(39, 0);
} else {
emitForm_21(i, 0x22c, 0xc2c);
 
FTZ_(2f);
RND_(2a, F);
ABS_(31, 0);
NEG_(33, 0);
SAT_(35);
 
if (code[0] & 0x1) {
modNegAbsF32_3b(i, 1);
if (i->op == OP_SUB) code[1] ^= 1 << 27;
} else {
ABS_(34, 1);
NEG_(30, 1);
if (i->op == OP_SUB) code[1] ^= 1 << 16;
}
}
}
 
void
CodeEmitterGK110::emitDADD(const Instruction *i)
{
assert(!i->saturate);
assert(!i->ftz);
 
emitForm_21(i, 0x238, 0xc38);
RND_(2a, F);
ABS_(31, 0);
NEG_(33, 0);
if (code[0] & 0x1) {
modNegAbsF32_3b(i, 1);
if (i->op == OP_SUB) code[1] ^= 1 << 27;
} else {
NEG_(30, 1);
ABS_(34, 1);
if (i->op == OP_SUB) code[1] ^= 1 << 16;
}
}
 
void
CodeEmitterGK110::emitUADD(const Instruction *i)
{
uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
 
if (i->op == OP_SUB)
addOp ^= 1;
 
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
 
if (isLIMM(i->src(1), TYPE_S32)) {
emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
 
if (addOp & 2)
code[1] |= 1 << 27;
 
assert(!i->defExists(1));
assert(i->flagsSrc < 0);
 
SAT_(39);
} else {
emitForm_21(i, 0x208, 0xc08);
 
assert(addOp != 3); // would be add-plus-one
 
code[1] |= addOp << 19;
 
if (i->defExists(1))
code[1] |= 1 << 18; // write carry
if (i->flagsSrc >= 0)
code[1] |= 1 << 14; // add carry
 
SAT_(35);
}
}
 
// TODO: shl-add
void
CodeEmitterGK110::emitIMAD(const Instruction *i)
{
uint8_t addOp =
(i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
 
emitForm_21(i, 0x100, 0xa00);
 
assert(addOp != 3);
code[1] |= addOp << 26;
 
if (i->sType == TYPE_S32)
code[1] |= (1 << 19) | (1 << 24);
 
if (code[0] & 0x1) {
assert(!i->subOp);
SAT_(39);
} else {
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
code[1] |= 1 << 25;
SAT_(35);
}
}
 
void
CodeEmitterGK110::emitISAD(const Instruction *i)
{
assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
 
emitForm_21(i, 0x1f4, 0xb74);
 
if (i->dType == TYPE_S32)
code[1] |= 1 << 19;
}
 
void
CodeEmitterGK110::emitNOT(const Instruction *i)
{
code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
code[1] = 0x22003800;
 
emitPredicate(i);
 
defId(i->def(0), 2);
 
switch (i->src(0).getFile()) {
case FILE_GPR:
code[1] |= 0xc << 28;
srcId(i->src(0), 23);
break;
case FILE_MEMORY_CONST:
code[1] |= 0x4 << 28;
setCAddress14(i->src(1));
break;
default:
assert(0);
break;
}
}
 
void
CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
{
if (isLIMM(i->src(1), TYPE_S32)) {
emitForm_L(i, 0x200, 0, i->src(1).mod);
code[1] |= subOp << 24;
NOT_(3a, 0);
} else {
emitForm_21(i, 0x220, 0xc20);
code[1] |= subOp << 12;
NOT_(2a, 0);
NOT_(2b, 1);
}
}
 
void
CodeEmitterGK110::emitPOPC(const Instruction *i)
{
assert(!isLIMM(i->src(1), TYPE_S32, true));
 
emitForm_21(i, 0x204, 0xc04);
 
NOT_(2a, 0);
if (!(code[0] & 0x1))
NOT_(2b, 1);
}
 
void
CodeEmitterGK110::emitINSBF(const Instruction *i)
{
emitForm_21(i, 0x1f8, 0xb78);
}
 
void
CodeEmitterGK110::emitEXTBF(const Instruction *i)
{
emitForm_21(i, 0x600, 0xc00);
 
if (i->dType == TYPE_S32)
code[1] |= 0x80000;
if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
code[1] |= 0x800;
}
 
void
CodeEmitterGK110::emitBFIND(const Instruction *i)
{
emitForm_C(i, 0x218, 0x2);
 
if (i->dType == TYPE_S32)
code[1] |= 0x80000;
if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
code[1] |= 0x800;
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
code[1] |= 0x1000;
}
 
void
CodeEmitterGK110::emitShift(const Instruction *i)
{
if (i->op == OP_SHR) {
emitForm_21(i, 0x214, 0xc14);
if (isSignedType(i->dType))
code[1] |= 1 << 19;
} else {
emitForm_21(i, 0x224, 0xc24);
}
 
if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
code[1] |= 1 << 10;
}
 
void
CodeEmitterGK110::emitPreOp(const Instruction *i)
{
emitForm_C(i, 0x248, 0x2);
 
if (i->op == OP_PREEX2)
code[1] |= 1 << 10;
 
NEG_(30, 0);
ABS_(34, 0);
}
 
void
CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
{
code[0] = 0x00000002 | (subOp << 23);
code[1] = 0x84000000;
 
emitPredicate(i);
 
defId(i->def(0), 2);
srcId(i->src(0), 10);
 
NEG_(33, 0);
ABS_(31, 0);
SAT_(35);
}
 
void
CodeEmitterGK110::emitMINMAX(const Instruction *i)
{
uint32_t op2, op1;
 
switch (i->dType) {
case TYPE_U32:
case TYPE_S32:
op2 = 0x210;
op1 = 0xc10;
break;
case TYPE_F32:
op2 = 0x230;
op1 = 0xc30;
break;
case TYPE_F64:
op2 = 0x228;
op1 = 0xc28;
break;
default:
assert(0);
op2 = 0;
op1 = 0;
break;
}
emitForm_21(i, op2, op1);
 
if (i->dType == TYPE_S32)
code[1] |= 1 << 19;
code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
 
FTZ_(2f);
ABS_(31, 0);
NEG_(33, 0);
if (code[0] & 0x1) {
modNegAbsF32_3b(i, 1);
} else {
ABS_(34, 1);
NEG_(30, 1);
}
}
 
void
CodeEmitterGK110::emitCVT(const Instruction *i)
{
const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
 
bool sat = i->saturate;
bool abs = i->src(0).mod.abs();
bool neg = i->src(0).mod.neg();
 
RoundMode rnd = i->rnd;
 
switch (i->op) {
case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
case OP_SAT: sat = true; break;
case OP_NEG: neg = !neg; break;
case OP_ABS: abs = true; neg = false; break;
default:
break;
}
 
DataType dType;
 
if (i->op == OP_NEG && i->dType == TYPE_U32)
dType = TYPE_S32;
else
dType = i->dType;
 
 
uint32_t op;
 
if (f2f) op = 0x254;
else if (f2i) op = 0x258;
else if (i2f) op = 0x25c;
else op = 0x260;
 
emitForm_C(i, op, 0x2);
 
FTZ_(2f);
if (neg) code[1] |= 1 << 16;
if (abs) code[1] |= 1 << 20;
if (sat) code[1] |= 1 << 21;
 
emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
 
code[0] |= typeSizeofLog2(dType) << 10;
code[0] |= typeSizeofLog2(i->sType) << 12;
 
if (isSignedIntType(dType))
code[0] |= 0x4000;
if (isSignedIntType(i->sType))
code[0] |= 0x8000;
}
 
void
CodeEmitterGK110::emitSET(const CmpInstruction *i)
{
uint16_t op1, op2;
 
if (i->def(0).getFile() == FILE_PREDICATE) {
switch (i->sType) {
case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
default:
op2 = 0x1b0;
op1 = 0xb30;
break;
}
emitForm_21(i, op2, op1);
 
NEG_(2e, 0);
ABS_(9, 0);
if (!(code[0] & 0x1)) {
NEG_(8, 1);
ABS_(2f, 1);
} else {
modNegAbsF32_3b(i, 1);
}
FTZ_(32);
 
// normal DST field is negated predicate result
code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
if (i->defExists(1))
defId(i->def(1), 2);
else
code[0] |= 0x1c;
} else {
switch (i->sType) {
case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
default:
op2 = 0x1a8;
op1 = 0xb28;
break;
}
emitForm_21(i, op2, op1);
 
NEG_(2e, 0);
ABS_(39, 0);
if (!(code[0] & 0x1)) {
NEG_(38, 1);
ABS_(2f, 1);
} else {
modNegAbsF32_3b(i, 1);
}
FTZ_(3a);
 
if (i->dType == TYPE_F32)
code[1] |= 1 << 23;
}
if (i->sType == TYPE_S32)
code[1] |= 1 << 19;
 
if (i->op != OP_SET) {
switch (i->op) {
case OP_SET_AND: code[1] |= 0x0 << 16; break;
case OP_SET_OR: code[1] |= 0x1 << 16; break;
case OP_SET_XOR: code[1] |= 0x2 << 16; break;
default:
assert(0);
break;
}
srcId(i->src(2), 0x2a);
} else {
code[1] |= 0x7 << 10;
}
emitCondCode(i->setCond,
isFloatType(i->sType) ? 0x33 : 0x34,
isFloatType(i->sType) ? 0xf : 0x7);
}
 
void
CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
{
CondCode cc = i->setCond;
if (i->src(2).mod.neg())
cc = reverseCondCode(cc);
 
if (i->dType == TYPE_F32) {
emitForm_21(i, 0x1d0, 0xb50);
FTZ_(32);
emitCondCode(cc, 0x33, 0xf);
} else {
emitForm_21(i, 0x1a0, 0xb20);
emitCondCode(cc, 0x34, 0x7);
}
}
 
void CodeEmitterGK110::emitSELP(const Instruction *i)
{
emitForm_21(i, 0x250, 0x050);
 
if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
code[1] |= 1 << 13;
}
 
void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
{
code[0] = 0x0000003e | (i->subOp << 23);
code[1] = 0x77000000;
 
emitPredicate(i);
}
 
void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
{
code[0] = 0x00000002;
code[1] = 0x76c00000;
 
code[1] |= i->tex.r << 9;
// code[1] |= i->tex.s << (9 + 8);
 
if (i->tex.liveOnly)
code[0] |= 0x80000000;
 
defId(i->def(0), 2);
srcId(i->src(0), 10);
}
 
static inline bool
isNextIndependentTex(const TexInstruction *i)
{
if (!i->next || !isTextureOp(i->next->op))
return false;
if (i->getDef(0)->interfers(i->next->getSrc(0)))
return false;
return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
}
 
void
CodeEmitterGK110::emitTEX(const TexInstruction *i)
{
const bool ind = i->tex.rIndirectSrc >= 0;
 
if (ind) {
code[0] = 0x00000002;
switch (i->op) {
case OP_TXD:
code[1] = 0x7e000000;
break;
case OP_TXLQ:
code[1] = 0x7e800000;
break;
case OP_TXF:
code[1] = 0x78000000;
break;
case OP_TXG:
code[1] = 0x7dc00000;
break;
default:
code[1] = 0x7d800000;
break;
}
} else {
switch (i->op) {
case OP_TXD:
code[0] = 0x00000002;
code[1] = 0x76000000;
code[1] |= i->tex.r << 9;
break;
case OP_TXLQ:
code[0] = 0x00000002;
code[1] = 0x76800000;
code[1] |= i->tex.r << 9;
break;
case OP_TXF:
code[0] = 0x00000002;
code[1] = 0x70000000;
code[1] |= i->tex.r << 13;
break;
case OP_TXG:
code[0] = 0x00000001;
code[1] = 0x70000000;
code[1] |= i->tex.r << 15;
break;
default:
code[0] = 0x00000001;
code[1] = 0x60000000;
code[1] |= i->tex.r << 15;
break;
}
}
 
code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
 
if (i->tex.liveOnly)
code[0] |= 0x80000000;
 
switch (i->op) {
case OP_TEX: break;
case OP_TXB: code[1] |= 0x2000; break;
case OP_TXL: code[1] |= 0x3000; break;
case OP_TXF: break;
case OP_TXG: break;
case OP_TXD: break;
case OP_TXLQ: break;
default:
assert(!"invalid texture op");
break;
}
 
if (i->op == OP_TXF) {
if (!i->tex.levelZero)
code[1] |= 0x1000;
} else
if (i->tex.levelZero) {
code[1] |= 0x1000;
}
 
if (i->op != OP_TXD && i->tex.derivAll)
code[1] |= 0x200;
 
emitPredicate(i);
 
code[1] |= i->tex.mask << 2;
 
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 
defId(i->def(0), 2);
srcId(i->src(0), 10);
srcId(i, src1, 23);
 
if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
 
// texture target:
code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
if (i->tex.target.isArray())
code[1] |= 0x40;
if (i->tex.target.isShadow())
code[1] |= 0x400;
if (i->tex.target == TEX_TARGET_2D_MS ||
i->tex.target == TEX_TARGET_2D_MS_ARRAY)
code[1] |= 0x800;
 
if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
// ?
}
 
if (i->tex.useOffsets == 1) {
switch (i->op) {
case OP_TXF: code[1] |= 0x200; break;
case OP_TXD: code[1] |= 0x00400000; break;
default: code[1] |= 0x800; break;
}
}
if (i->tex.useOffsets == 4)
code[1] |= 0x1000;
}
 
void
CodeEmitterGK110::emitTXQ(const TexInstruction *i)
{
code[0] = 0x00000002;
code[1] = 0x75400001;
 
switch (i->tex.query) {
case TXQ_DIMS: code[0] |= 0x01 << 25; break;
case TXQ_TYPE: code[0] |= 0x02 << 25; break;
case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
case TXQ_FILTER: code[0] |= 0x10 << 25; break;
case TXQ_LOD: code[0] |= 0x12 << 25; break;
case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break;
default:
assert(!"invalid texture query");
break;
}
 
code[1] |= i->tex.mask << 2;
code[1] |= i->tex.r << 9;
if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
code[1] |= 0x08000000;
 
defId(i->def(0), 2);
srcId(i->src(0), 10);
 
emitPredicate(i);
}
 
void
CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
code[0] = 0x00000002 | ((qOp & 1) << 31);
code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
 
defId(i->def(0), 2);
srcId(i->src(0), 10);
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
 
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[1] |= 1 << 9; // dall
 
emitPredicate(i);
}
 
void
CodeEmitterGK110::emitPIXLD(const Instruction *i)
{
emitForm_L(i, 0x7f4, 2, Modifier(0));
code[1] |= i->subOp << 2;
code[1] |= 0x00070000;
}
 
void
CodeEmitterGK110::emitFlow(const Instruction *i)
{
const FlowInstruction *f = i->asFlow();
 
unsigned mask; // bit 0: predicate, bit 1: target
 
code[0] = 0x00000000;
 
switch (i->op) {
case OP_BRA:
code[1] = f->absolute ? 0x10800000 : 0x12000000;
if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
code[0] |= 0x80;
mask = 3;
break;
case OP_CALL:
code[1] = f->absolute ? 0x11000000 : 0x13000000;
if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
code[0] |= 0x80;
mask = 2;
break;
 
case OP_EXIT: code[1] = 0x18000000; mask = 1; break;
case OP_RET: code[1] = 0x19000000; mask = 1; break;
case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
case OP_BREAK: code[1] = 0x1a000000; mask = 1; break;
case OP_CONT: code[1] = 0x1a800000; mask = 1; break;
 
case OP_JOINAT: code[1] = 0x14800000; mask = 2; break;
case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;
case OP_PRERET: code[1] = 0x13800000; mask = 2; break;
 
case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;
case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;
default:
assert(!"invalid flow operation");
return;
}
 
if (mask & 1) {
emitPredicate(i);
if (i->flagsSrc < 0)
code[0] |= 0x3c;
}
 
if (!f)
return;
 
if (f->allWarp)
code[0] |= 1 << 9;
if (f->limit)
code[0] |= 1 << 8;
 
if (f->op == OP_CALL) {
if (f->builtin) {
assert(f->absolute);
uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
} else {
assert(!f->absolute);
int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
code[0] |= (pcRel & 0x1ff) << 23;
code[1] |= (pcRel >> 9) & 0x7fff;
}
} else
if (mask & 2) {
int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
// currently we don't want absolute branches
assert(!f->absolute);
code[0] |= (pcRel & 0x1ff) << 23;
code[1] |= (pcRel >> 9) & 0x7fff;
}
}
 
void
CodeEmitterGK110::emitPFETCH(const Instruction *i)
{
uint32_t prim = i->src(0).get()->reg.data.u32;
 
code[0] = 0x00000002 | ((prim & 0xff) << 23);
code[1] = 0x7f800000;
 
emitPredicate(i);
 
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 
defId(i->def(0), 2);
srcId(i, src1, 10);
}
 
void
CodeEmitterGK110::emitVFETCH(const Instruction *i)
{
unsigned int size = typeSizeof(i->dType);
uint32_t offset = i->src(0).get()->reg.data.offset;
 
code[0] = 0x00000002 | (offset << 23);
code[1] = 0x7ec00000 | (offset >> 9);
code[1] |= (size / 4 - 1) << 18;
 
if (i->perPatch)
code[1] |= 0x4;
if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
 
emitPredicate(i);
 
defId(i->def(0), 2);
srcId(i->src(0).getIndirect(0), 10);
srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
}
 
void
CodeEmitterGK110::emitEXPORT(const Instruction *i)
{
unsigned int size = typeSizeof(i->dType);
uint32_t offset = i->src(0).get()->reg.data.offset;
 
code[0] = 0x00000002 | (offset << 23);
code[1] = 0x7f000000 | (offset >> 9);
code[1] |= (size / 4 - 1) << 18;
 
if (i->perPatch)
code[1] |= 0x4;
 
emitPredicate(i);
 
assert(i->src(1).getFile() == FILE_GPR);
 
srcId(i->src(0).getIndirect(0), 10);
srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
srcId(i->src(1), 2);
}
 
void
CodeEmitterGK110::emitOUT(const Instruction *i)
{
assert(i->src(0).getFile() == FILE_GPR);
 
emitForm_21(i, 0x1f0, 0xb70);
 
if (i->op == OP_EMIT)
code[1] |= 1 << 10;
if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
code[1] |= 1 << 11;
}
 
void
CodeEmitterGK110::emitInterpMode(const Instruction *i)
{
code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
code[1] |= (i->ipa & 0xc) << (19 - 2);
}
 
void
CodeEmitterGK110::emitINTERP(const Instruction *i)
{
const uint32_t base = i->getSrc(0)->reg.data.offset;
 
code[0] = 0x00000002 | (base << 31);
code[1] = 0x74800000 | (base >> 1);
 
if (i->saturate)
code[1] |= 1 << 18;
 
if (i->op == OP_PINTERP)
srcId(i->src(1), 23);
else
code[0] |= 0xff << 23;
 
srcId(i->src(0).getIndirect(0), 10);
emitInterpMode(i);
 
emitPredicate(i);
defId(i->def(0), 2);
 
if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
else
code[1] |= 0xff << 10;
}
 
void
CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
{
uint8_t n;
 
switch (ty) {
case TYPE_U8:
n = 0;
break;
case TYPE_S8:
n = 1;
break;
case TYPE_U16:
n = 2;
break;
case TYPE_S16:
n = 3;
break;
case TYPE_F32:
case TYPE_U32:
case TYPE_S32:
n = 4;
break;
case TYPE_F64:
case TYPE_U64:
case TYPE_S64:
n = 5;
break;
case TYPE_B128:
n = 6;
break;
default:
n = 0;
assert(!"invalid ld/st type");
break;
}
code[pos / 32] |= n << (pos % 32);
}
 
void
CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
{
uint8_t n;
 
switch (c) {
case CACHE_CA:
// case CACHE_WB:
n = 0;
break;
case CACHE_CG:
n = 1;
break;
case CACHE_CS:
n = 2;
break;
case CACHE_CV:
// case CACHE_WT:
n = 3;
break;
default:
n = 0;
assert(!"invalid caching mode");
break;
}
code[pos / 32] |= n << (pos % 32);
}
 
void
CodeEmitterGK110::emitSTORE(const Instruction *i)
{
int32_t offset = SDATA(i->src(0)).offset;
 
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;
case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
default:
assert(!"invalid memory file");
break;
}
 
if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
offset &= 0xffffff;
 
if (code[0] & 0x2) {
emitLoadStoreType(i->dType, 0x33);
if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
emitCachingMode(i->cache, 0x2f);
} else {
emitLoadStoreType(i->dType, 0x38);
emitCachingMode(i->cache, 0x3b);
}
code[0] |= offset << 23;
code[1] |= offset >> 9;
 
emitPredicate(i);
 
srcId(i->src(1), 2);
srcId(i->src(0).getIndirect(0), 10);
}
 
void
CodeEmitterGK110::emitLOAD(const Instruction *i)
{
int32_t offset = SDATA(i->src(0)).offset;
 
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i);
return;
}
offset &= 0xffff;
code[0] = 0x00000002;
code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
code[1] |= i->subOp << 15;
break;
default:
assert(!"invalid memory file");
break;
}
 
if (code[0] & 0x2) {
offset &= 0xffffff;
emitLoadStoreType(i->dType, 0x33);
if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
emitCachingMode(i->cache, 0x2f);
} else {
emitLoadStoreType(i->dType, 0x38);
emitCachingMode(i->cache, 0x3b);
}
code[0] |= offset << 23;
code[1] |= offset >> 9;
 
emitPredicate(i);
 
defId(i->def(0), 2);
srcId(i->src(0).getIndirect(0), 10);
}
 
uint8_t
CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
{
switch (SDATA(ref).sv.sv) {
case SV_LANEID: return 0x00;
case SV_PHYSID: return 0x03;
case SV_VERTEX_COUNT: return 0x10;
case SV_INVOCATION_ID: return 0x11;
case SV_YDIR: return 0x12;
case SV_TID: return 0x21 + SDATA(ref).sv.index;
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
case SV_GRIDID: return 0x2c;
case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
case SV_LBASE: return 0x34;
case SV_SBASE: return 0x30;
case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
default:
assert(!"no sreg for system value");
return 0;
}
}
 
void
CodeEmitterGK110::emitMOV(const Instruction *i)
{
if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
code[1] = 0x86400000;
emitPredicate(i);
defId(i->def(0), 2);
} else
if (i->src(0).getFile() == FILE_IMMEDIATE) {
code[0] = 0x00000002 | (i->lanes << 14);
code[1] = 0x74000000;
emitPredicate(i);
defId(i->def(0), 2);
setImmediate32(i, 0, Modifier(0));
} else
if (i->src(0).getFile() == FILE_PREDICATE) {
code[0] = 0x00000002;
code[1] = 0x84401c07;
emitPredicate(i);
defId(i->def(0), 2);
srcId(i->src(0), 14);
} else {
emitForm_C(i, 0x24c, 2);
code[1] |= i->lanes << 10;
}
}
 
bool
CodeEmitterGK110::emitInstruction(Instruction *insn)
{
const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
 
if (insn->encSize != 8) {
ERROR("skipping unencodable instruction: ");
insn->print();
return false;
} else
if (codeSize + size > codeSizeLimit) {
ERROR("code emitter output buffer too small\n");
return false;
}
 
if (writeIssueDelays) {
int id = (codeSize & 0x3f) / 8 - 1;
if (id < 0) {
id += 1;
code[0] = 0x00000000; // cf issue delay "instruction"
code[1] = 0x08000000;
code += 2;
codeSize += 8;
}
uint32_t *data = code - (id * 2 + 2);
 
switch (id) {
case 0: data[0] |= insn->sched << 2; break;
case 1: data[0] |= insn->sched << 10; break;
case 2: data[0] |= insn->sched << 18; break;
case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
case 4: data[1] |= insn->sched << 2; break;
case 5: data[1] |= insn->sched << 10; break;
case 6: data[1] |= insn->sched << 18; break;
default:
assert(0);
break;
}
}
 
// assert that instructions with multiple defs don't corrupt registers
for (int d = 0; insn->defExists(d); ++d)
assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
 
switch (insn->op) {
case OP_MOV:
case OP_RDSV:
emitMOV(insn);
break;
case OP_NOP:
break;
case OP_LOAD:
emitLOAD(insn);
break;
case OP_STORE:
emitSTORE(insn);
break;
case OP_LINTERP:
case OP_PINTERP:
emitINTERP(insn);
break;
case OP_VFETCH:
emitVFETCH(insn);
break;
case OP_EXPORT:
emitEXPORT(insn);
break;
case OP_PFETCH:
emitPFETCH(insn);
break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
break;
case OP_ADD:
case OP_SUB:
if (insn->dType == TYPE_F64)
emitDADD(insn);
else if (isFloatType(insn->dType))
emitFADD(insn);
else
emitUADD(insn);
break;
case OP_MUL:
if (insn->dType == TYPE_F64)
emitDMUL(insn);
else if (isFloatType(insn->dType))
emitFMUL(insn);
else
emitIMUL(insn);
break;
case OP_MAD:
case OP_FMA:
if (insn->dType == TYPE_F64)
emitDMAD(insn);
else if (isFloatType(insn->dType))
emitFMAD(insn);
else
emitIMAD(insn);
break;
case OP_SAD:
emitISAD(insn);
break;
case OP_NOT:
emitNOT(insn);
break;
case OP_AND:
emitLogicOp(insn, 0);
break;
case OP_OR:
emitLogicOp(insn, 1);
break;
case OP_XOR:
emitLogicOp(insn, 2);
break;
case OP_SHL:
case OP_SHR:
emitShift(insn);
break;
case OP_SET:
case OP_SET_AND:
case OP_SET_OR:
case OP_SET_XOR:
emitSET(insn->asCmp());
break;
case OP_SELP:
emitSELP(insn);
break;
case OP_SLCT:
emitSLCT(insn->asCmp());
break;
case OP_MIN:
case OP_MAX:
emitMINMAX(insn);
break;
case OP_ABS:
case OP_NEG:
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
case OP_RCP:
emitSFnOp(insn, 4 + 2 * insn->subOp);
break;
case OP_LG2:
emitSFnOp(insn, 3);
break;
case OP_EX2:
emitSFnOp(insn, 2);
break;
case OP_SIN:
emitSFnOp(insn, 1);
break;
case OP_COS:
emitSFnOp(insn, 0);
break;
case OP_PRESIN:
case OP_PREEX2:
emitPreOp(insn);
break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXD:
case OP_TXF:
case OP_TXG:
case OP_TXLQ:
emitTEX(insn->asTex());
break;
case OP_TXQ:
emitTXQ(insn->asTex());
break;
case OP_TEXBAR:
emitTEXBAR(insn);
break;
case OP_PIXLD:
emitPIXLD(insn);
break;
case OP_BRA:
case OP_CALL:
case OP_PRERET:
case OP_RET:
case OP_DISCARD:
case OP_EXIT:
case OP_PRECONT:
case OP_CONT:
case OP_PREBREAK:
case OP_BREAK:
case OP_JOINAT:
case OP_BRKPT:
case OP_QUADON:
case OP_QUADPOP:
emitFlow(insn);
break;
case OP_QUADOP:
emitQUADOP(insn, insn->subOp, insn->lanes);
break;
case OP_DFDX:
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
break;
case OP_DFDY:
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
break;
case OP_POPCNT:
emitPOPC(insn);
break;
case OP_INSBF:
emitINSBF(insn);
break;
case OP_EXTBF:
emitEXTBF(insn);
break;
case OP_BFIND:
emitBFIND(insn);
break;
case OP_JOIN:
emitNOP(insn);
insn->join = 1;
break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:
ERROR("operation should have been eliminated");
return false;
case OP_EXP:
case OP_LOG:
case OP_SQRT:
case OP_POW:
ERROR("operation should have been lowered\n");
return false;
default:
ERROR("unknow op\n");
return false;
}
 
if (insn->join)
code[0] |= 1 << 22;
 
code += 2;
codeSize += 8;
return true;
}
 
uint32_t
CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
{
// No more short instruction encodings.
return 8;
}
 
void
CodeEmitterGK110::prepareEmission(Function *func)
{
const Target *targ = func->getProgram()->getTarget();
 
CodeEmitter::prepareEmission(func);
 
if (targ->hasSWSched)
calculateSchedDataNVC0(targ, func);
}
 
CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
: CodeEmitter(target),
targNVC0(target),
writeIssueDelays(target->hasSWSched)
{
code = NULL;
codeSize = codeSizeLimit = 0;
relocInfo = NULL;
}
 
CodeEmitter *
TargetNVC0::createCodeEmitterGK110(Program::Type type)
{
CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
emit->setProgramType(type);
return emit;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
0,0 → 1,2910
/*
* Copyright 2014 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Ben Skeggs <bskeggs@redhat.com>
*/
 
#include "codegen/nv50_ir_target_gm107.h"
 
namespace nv50_ir {
 
class CodeEmitterGM107 : public CodeEmitter
{
public:
CodeEmitterGM107(const TargetGM107 *);
 
virtual bool emitInstruction(Instruction *);
virtual uint32_t getMinEncodingSize(const Instruction *) const;
 
virtual void prepareEmission(Program *);
virtual void prepareEmission(Function *);
 
inline void setProgramType(Program::Type pType) { progType = pType; }
 
private:
const TargetGM107 *targGM107;
 
Program::Type progType;
 
const Instruction *insn;
const bool writeIssueDelays;
uint32_t *data;
 
private:
inline void emitField(uint32_t *, int, int, uint32_t);
inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
 
inline void emitInsn(uint32_t, bool);
inline void emitInsn(uint32_t o) { emitInsn(o, true); }
inline void emitPred();
inline void emitGPR(int, const Value *);
inline void emitGPR(int pos) {
emitGPR(pos, (const Value *)NULL);
}
inline void emitGPR(int pos, const ValueRef &ref) {
emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
}
inline void emitGPR(int pos, const ValueRef *ref) {
emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
}
inline void emitGPR(int pos, const ValueDef &def) {
emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
}
inline void emitSYS(int, const Value *);
inline void emitSYS(int pos, const ValueRef &ref) {
emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
}
inline void emitPRED(int, const Value *);
inline void emitPRED(int pos) {
emitPRED(pos, (const Value *)NULL);
}
inline void emitPRED(int pos, const ValueRef &ref) {
emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
}
inline void emitPRED(int pos, const ValueDef &def) {
emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
}
inline void emitADDR(int, int, int, int, const ValueRef &);
inline void emitCBUF(int, int, int, int, int, const ValueRef &);
inline bool longIMMD(const ValueRef &);
inline void emitIMMD(int, int, const ValueRef &);
 
void emitCond3(int, CondCode);
void emitCond4(int, CondCode);
void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
inline void emitO(int);
inline void emitP(int);
inline void emitSAT(int);
inline void emitCC(int);
inline void emitX(int);
inline void emitABS(int, const ValueRef &);
inline void emitNEG(int, const ValueRef &);
inline void emitNEG2(int, const ValueRef &, const ValueRef &);
inline void emitFMZ(int, int);
inline void emitRND(int, RoundMode, int);
inline void emitRND(int pos) {
emitRND(pos, insn->rnd, -1);
}
inline void emitPDIV(int);
inline void emitINV(int, const ValueRef &);
 
void emitEXIT();
void emitBRA();
void emitCAL();
void emitPCNT();
void emitCONT();
void emitPBK();
void emitBRK();
void emitPRET();
void emitRET();
void emitSSY();
void emitSYNC();
void emitSAM();
void emitRAM();
 
void emitMOV();
void emitS2R();
void emitF2F();
void emitF2I();
void emitI2F();
void emitI2I();
void emitSHFL();
 
void emitDADD();
void emitDMUL();
void emitDFMA();
void emitDMNMX();
void emitDSET();
void emitDSETP();
 
void emitFADD();
void emitFMUL();
void emitFFMA();
void emitMUFU();
void emitFMNMX();
void emitRRO();
void emitFCMP();
void emitFSET();
void emitFSETP();
void emitFSWZADD();
 
void emitLOP();
void emitNOT();
void emitIADD();
void emitIMUL();
void emitIMAD();
void emitIMNMX();
void emitICMP();
void emitISET();
void emitISETP();
void emitSHL();
void emitSHR();
void emitPOPC();
void emitBFI();
void emitBFE();
void emitFLO();
 
void emitLDSTs(int, DataType);
void emitLDSTc(int);
void emitLDC();
void emitLDL();
void emitLDS();
void emitLD();
void emitSTL();
void emitSTS();
void emitST();
void emitALD();
void emitAST();
void emitISBERD();
void emitIPA();
 
void emitPIXLD();
 
void emitTEXs(int);
void emitTEX();
void emitTLD();
void emitTLD4();
void emitTXD();
void emitTXQ();
void emitTMML();
void emitDEPBAR();
 
void emitNOP();
void emitKIL();
void emitOUT();
};
 
/*******************************************************************************
* general instruction layout/fields
******************************************************************************/
 
void
CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
{
if (b >= 0) {
uint32_t m = ((1ULL << s) - 1);
uint64_t d = (uint64_t)(v & m) << b;
assert(!(v & ~m) || (v & ~m) == ~m);
data[1] |= d >> 32;
data[0] |= d;
}
}
 
void
CodeEmitterGM107::emitPred()
{
if (insn->predSrc >= 0) {
emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
emitField(19, 1, insn->cc == CC_NOT_P);
} else {
emitField(16, 3, 7);
}
}
 
void
CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
{
code[0] = 0x00000000;
code[1] = hi;
if (pred)
emitPred();
}
 
void
CodeEmitterGM107::emitGPR(int pos, const Value *val)
{
emitField(pos, 8, val ? val->reg.data.id : 255);
}
 
void
CodeEmitterGM107::emitSYS(int pos, const Value *val)
{
int id = val ? val->reg.data.id : -1;
 
switch (id) {
case SV_LANEID : id = 0x00; break;
case SV_VERTEX_COUNT : id = 0x10; break;
case SV_INVOCATION_ID : id = 0x11; break;
case SV_INVOCATION_INFO: id = 0x1d; break;
default:
assert(!"invalid system value");
id = 0;
break;
}
 
emitField(pos, 8, id);
}
 
void
CodeEmitterGM107::emitPRED(int pos, const Value *val)
{
emitField(pos, 3, val ? val->reg.data.id : 7);
}
 
void
CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
const ValueRef &ref)
{
const Value *v = ref.get();
assert(!(v->reg.data.offset & ((1 << shr) - 1)));
if (gpr >= 0)
emitGPR(gpr, ref.getIndirect(0));
emitField(off, len, v->reg.data.offset >> shr);
}
 
void
CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
const ValueRef &ref)
{
const Value *v = ref.get();
const Symbol *s = v->asSym();
 
assert(!(s->reg.data.offset & ((1 << shr) - 1)));
 
emitField(buf, 5, v->reg.fileIndex);
if (gpr >= 0)
emitGPR(gpr, ref.getIndirect(0));
emitField(off, 16, s->reg.data.offset >> shr);
}
 
bool
CodeEmitterGM107::longIMMD(const ValueRef &ref)
{
if (ref.getFile() == FILE_IMMEDIATE) {
const ImmediateValue *imm = ref.get()->asImm();
if (isFloatType(insn->sType)) {
if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
return true;
} else {
if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
(imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
return true;
}
}
return false;
}
 
void
CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
{
const ImmediateValue *imm = ref.get()->asImm();
uint32_t val = imm->reg.data.u32;
 
if (len == 19) {
if (isFloatType(insn->sType)) {
assert(!(val & 0x00000fff));
val >>= 12;
}
assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
emitField( 56, 1, (val & 0x80000) >> 19);
emitField(pos, len, (val & 0x7ffff));
} else {
emitField(pos, len, val);
}
}
 
/*******************************************************************************
* modifiers
******************************************************************************/
 
void
CodeEmitterGM107::emitCond3(int pos, CondCode code)
{
int data = 0;
 
switch (code) {
case CC_FL : data = 0x00; break;
case CC_LTU:
case CC_LT : data = 0x01; break;
case CC_EQU:
case CC_EQ : data = 0x02; break;
case CC_LEU:
case CC_LE : data = 0x03; break;
case CC_GTU:
case CC_GT : data = 0x04; break;
case CC_NEU:
case CC_NE : data = 0x05; break;
case CC_GEU:
case CC_GE : data = 0x06; break;
case CC_TR : data = 0x07; break;
default:
assert(!"invalid cond3");
break;
}
 
emitField(pos, 3, data);
}
 
void
CodeEmitterGM107::emitCond4(int pos, CondCode code)
{
int data = 0;
 
switch (code) {
case CC_FL: data = 0x00; break;
case CC_LT: data = 0x01; break;
case CC_EQ: data = 0x02; break;
case CC_LE: data = 0x03; break;
case CC_GT: data = 0x04; break;
case CC_NE: data = 0x05; break;
case CC_GE: data = 0x06; break;
// case CC_NUM: data = 0x07; break;
// case CC_NAN: data = 0x08; break;
case CC_LTU: data = 0x09; break;
case CC_EQU: data = 0x0a; break;
case CC_LEU: data = 0x0b; break;
case CC_GTU: data = 0x0c; break;
case CC_NEU: data = 0x0d; break;
case CC_GEU: data = 0x0e; break;
case CC_TR: data = 0x0f; break;
default:
assert(!"invalid cond4");
break;
}
 
emitField(pos, 4, data);
}
 
void
CodeEmitterGM107::emitO(int pos)
{
emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
}
 
void
CodeEmitterGM107::emitP(int pos)
{
emitField(pos, 1, insn->perPatch);
}
 
void
CodeEmitterGM107::emitSAT(int pos)
{
emitField(pos, 1, insn->saturate);
}
 
void
CodeEmitterGM107::emitCC(int pos)
{
emitField(pos, 1, insn->defExists(1));
}
 
void
CodeEmitterGM107::emitX(int pos)
{
emitField(pos, 1, insn->flagsSrc >= 0);
}
 
void
CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
{
emitField(pos, 1, ref.mod.abs());
}
 
void
CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
{
emitField(pos, 1, ref.mod.neg());
}
 
void
CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
{
emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
}
 
void
CodeEmitterGM107::emitFMZ(int pos, int len)
{
emitField(pos, len, insn->dnz << 1 | insn->ftz);
}
 
void
CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
{
int rm = 0, ri = 0;
switch (rnd) {
case ROUND_NI: ri = 1;
case ROUND_N : rm = 0; break;
case ROUND_MI: ri = 1;
case ROUND_M : rm = 1; break;
case ROUND_PI: ri = 1;
case ROUND_P : rm = 2; break;
case ROUND_ZI: ri = 1;
case ROUND_Z : rm = 3; break;
default:
assert(!"invalid round mode");
break;
}
emitField(rip, 1, ri);
emitField(rmp, 2, rm);
}
 
void
CodeEmitterGM107::emitPDIV(int pos)
{
assert(insn->postFactor >= -3 && insn->postFactor <= 3);
if (insn->postFactor > 0)
emitField(pos, 3, 7 - insn->postFactor);
else
emitField(pos, 3, 0 - insn->postFactor);
}
 
void
CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
{
emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
}
 
/*******************************************************************************
* control flow
******************************************************************************/
 
void
CodeEmitterGM107::emitEXIT()
{
emitInsn (0xe3000000);
emitCond5(0x00, CC_TR);
}
 
void
CodeEmitterGM107::emitBRA()
{
const FlowInstruction *insn = this->insn->asFlow();
int gpr = -1;
 
if (insn->indirect) {
if (insn->absolute)
emitInsn(0xe2000000); // JMX
else
emitInsn(0xe2500000); // BRX
gpr = 0x08;
} else {
if (insn->absolute)
emitInsn(0xe2100000); // JMP
else
emitInsn(0xe2400000); // BRA
emitField(0x07, 1, insn->allWarp);
}
 
emitField(0x06, 1, insn->limit);
emitCond5(0x00, CC_TR);
 
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
if (!insn->absolute)
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
else
emitField(0x14, 32, insn->target.bb->binPos);
} else {
emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
emitField(0x05, 1, 1);
}
}
 
void
CodeEmitterGM107::emitCAL()
{
const FlowInstruction *insn = this->insn->asFlow();
 
if (insn->absolute) {
emitInsn(0xe2200000, 0); // JCAL
} else {
emitInsn(0xe2600000, 0); // CAL
}
 
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
if (!insn->absolute)
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
else {
if (insn->builtin) {
int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
} else {
emitField(0x14, 32, insn->target.bb->binPos);
}
}
} else {
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
emitField(0x05, 1, 1);
}
}
 
void
CodeEmitterGM107::emitPCNT()
{
const FlowInstruction *insn = this->insn->asFlow();
 
emitInsn(0xe2b00000, 0);
 
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
} else {
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
emitField(0x05, 1, 1);
}
}
 
void
CodeEmitterGM107::emitCONT()
{
emitInsn (0xe3500000);
emitCond5(0x00, CC_TR);
}
 
void
CodeEmitterGM107::emitPBK()
{
const FlowInstruction *insn = this->insn->asFlow();
 
emitInsn(0xe2a00000, 0);
 
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
} else {
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
emitField(0x05, 1, 1);
}
}
 
void
CodeEmitterGM107::emitBRK()
{
emitInsn (0xe3400000);
emitCond5(0x00, CC_TR);
}
 
void
CodeEmitterGM107::emitPRET()
{
const FlowInstruction *insn = this->insn->asFlow();
 
emitInsn(0xe2700000, 0);
 
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
} else {
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
emitField(0x05, 1, 1);
}
}
 
void
CodeEmitterGM107::emitRET()
{
emitInsn (0xe3200000);
emitCond5(0x00, CC_TR);
}
 
void
CodeEmitterGM107::emitSSY()
{
const FlowInstruction *insn = this->insn->asFlow();
 
emitInsn(0xe2900000, 0);
 
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
} else {
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
emitField(0x05, 1, 1);
}
}
 
void
CodeEmitterGM107::emitSYNC()
{
emitInsn (0xf0f80000);
emitCond5(0x00, CC_TR);
}
 
void
CodeEmitterGM107::emitSAM()
{
emitInsn(0xe3700000, 0);
}
 
void
CodeEmitterGM107::emitRAM()
{
emitInsn(0xe3800000, 0);
}
 
/*******************************************************************************
* predicate/cc
******************************************************************************/
 
/*******************************************************************************
* movement / conversion
******************************************************************************/
 
void
CodeEmitterGM107::emitMOV()
{
if ( insn->src(0).getFile() != FILE_IMMEDIATE ||
(insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) {
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5c980000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c980000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38980000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src file");
break;
}
emitField(0x27, 4, insn->lanes);
} else {
emitInsn (0x01000000);
emitIMMD (0x14, 32, insn->src(0));
emitField(0x0c, 4, insn->lanes);
}
 
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitS2R()
{
emitInsn(0xf0c80000);
emitSYS (0x14, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitF2F()
{
RoundMode rnd = insn->rnd;
 
switch (insn->op) {
case OP_FLOOR: rnd = ROUND_MI; break;
case OP_CEIL : rnd = ROUND_PI; break;
case OP_TRUNC: rnd = ROUND_ZI; break;
default:
break;
}
 
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5ca80000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4ca80000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38a80000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src0 file");
break;
}
 
emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
emitFMZ (0x2c, 1);
emitRND (0x27, rnd, 0x2a);
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitF2I()
{
RoundMode rnd = insn->rnd;
 
switch (insn->op) {
case OP_FLOOR: rnd = ROUND_M; break;
case OP_CEIL : rnd = ROUND_P; break;
case OP_TRUNC: rnd = ROUND_Z; break;
default:
break;
}
 
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5cb00000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4cb00000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38b00000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src0 file");
break;
}
 
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
emitFMZ (0x2c, 1);
emitRND (0x27, rnd, 0x2a);
emitField(0x0c, 1, isSignedType(insn->dType));
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitI2F()
{
RoundMode rnd = insn->rnd;
 
switch (insn->op) {
case OP_FLOOR: rnd = ROUND_M; break;
case OP_CEIL : rnd = ROUND_P; break;
case OP_TRUNC: rnd = ROUND_Z; break;
default:
break;
}
 
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5cb80000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4cb80000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38b80000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src0 file");
break;
}
 
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
emitRND (0x27, rnd, -1);
emitField(0x0d, 1, isSignedType(insn->sType));
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitI2I()
{
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5ce00000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4ce00000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38e00000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src0 file");
break;
}
 
emitSAT (0x32);
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
emitField(0x0d, 1, isSignedType(insn->sType));
emitField(0x0c, 1, isSignedType(insn->dType));
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitSHFL()
{
int type = 0;
 
emitInsn (0xef100000);
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitGPR(0x14, insn->src(1));
break;
case FILE_IMMEDIATE:
emitIMMD(0x14, 5, insn->src(1));
type |= 1;
break;
default:
assert(!"invalid src1 file");
break;
}
 
/*XXX: what is this arg? hardcode immediate for now */
emitField(0x22, 13, 0x1c03);
type |= 2;
 
emitPRED (0x30);
emitField(0x1e, 2, insn->subOp);
emitField(0x1c, 2, type);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
/*******************************************************************************
* double
******************************************************************************/
 
void
CodeEmitterGM107::emitDADD()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c700000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c700000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38700000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitABS(0x31, insn->src(1));
emitNEG(0x30, insn->src(0));
emitCC (0x2f);
emitABS(0x2e, insn->src(0));
emitNEG(0x2d, insn->src(1));
 
if (insn->op == OP_SUB)
code[1] ^= 0x00002000;
 
emitGPR(0x08, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitDMUL()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c800000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c800000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38800000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitNEG2(0x30, insn->src(0), insn->src(1));
emitCC (0x2f);
emitRND (0x27);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitDFMA()
{
switch(insn->src(2).getFile()) {
case FILE_GPR:
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5b700000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4b700000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36700000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitGPR (0x27, insn->src(2));
break;
case FILE_MEMORY_CONST:
emitInsn(0x53700000);
emitGPR (0x27, insn->src(1));
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
break;
default:
assert(!"bad src2 file");
break;
}
 
emitRND (0x32);
emitNEG (0x31, insn->src(2));
emitNEG2(0x30, insn->src(0), insn->src(1));
emitCC (0x2f);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitDMNMX()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c500000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c500000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38500000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitABS (0x31, insn->src(1));
emitNEG (0x30, insn->src(0));
emitCC (0x2f);
emitABS (0x2e, insn->src(0));
emitNEG (0x2d, insn->src(1));
emitField(0x2a, 1, insn->op == OP_MAX);
emitPRED (0x27);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitDSET()
{
const CmpInstruction *insn = this->insn->asCmp();
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x59000000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x49000000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x32000000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
if (insn->op != OP_SET) {
switch (insn->op) {
case OP_SET_AND: emitField(0x2d, 2, 0); break;
case OP_SET_OR : emitField(0x2d, 2, 1); break;
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
default:
assert(!"invalid set op");
break;
}
emitPRED(0x27, insn->src(2));
} else {
emitPRED(0x27);
}
 
emitABS (0x36, insn->src(0));
emitNEG (0x35, insn->src(1));
emitField(0x34, 1, insn->dType == TYPE_F32);
emitCond4(0x30, insn->setCond);
emitCC (0x2f);
emitABS (0x2c, insn->src(1));
emitNEG (0x2b, insn->src(0));
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitDSETP()
{
const CmpInstruction *insn = this->insn->asCmp();
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5b800000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4b800000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36800000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
if (insn->op != OP_SET) {
switch (insn->op) {
case OP_SET_AND: emitField(0x2d, 2, 0); break;
case OP_SET_OR : emitField(0x2d, 2, 1); break;
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
default:
assert(!"invalid set op");
break;
}
emitPRED(0x27, insn->src(2));
} else {
emitPRED(0x27);
}
 
emitCond4(0x30, insn->setCond);
emitABS (0x2c, insn->src(1));
emitNEG (0x2b, insn->src(0));
emitGPR (0x08, insn->src(0));
emitABS (0x07, insn->src(0));
emitNEG (0x06, insn->src(1));
emitPRED (0x03, insn->def(0));
if (insn->defExists(1))
emitPRED(0x00, insn->def(1));
else
emitPRED(0x00);
}
 
/*******************************************************************************
* float
******************************************************************************/
 
void
CodeEmitterGM107::emitFADD()
{
if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c580000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c580000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38580000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitSAT(0x32);
emitABS(0x31, insn->src(1));
emitNEG(0x30, insn->src(0));
emitCC (0x2f);
emitABS(0x2e, insn->src(0));
emitNEG(0x2d, insn->src(1));
emitFMZ(0x2c, 1);
} else {
emitInsn(0x08000000);
emitABS(0x39, insn->src(1));
emitNEG(0x38, insn->src(0));
emitFMZ(0x37, 1);
emitABS(0x36, insn->src(0));
emitNEG(0x35, insn->src(1));
emitCC (0x34);
emitIMMD(0x14, 32, insn->src(1));
}
 
if (insn->op == OP_SUB)
code[1] ^= 0x00002000;
 
emitGPR(0x08, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitFMUL()
{
if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c680000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c680000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38680000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitSAT (0x32);
emitNEG2(0x30, insn->src(0), insn->src(1));
emitCC (0x2f);
emitFMZ (0x2c, 2);
emitPDIV(0x29);
emitRND (0x27);
} else {
emitInsn(0x1e000000);
emitSAT (0x37);
emitFMZ (0x35, 2);
emitCC (0x34);
emitIMMD(0x14, 32, insn->src(1));
if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
code[1] ^= 0x00080000; /* flip immd sign bit */
}
 
emitGPR(0x08, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitFFMA()
{
/*XXX: ffma32i exists, but not using it as third src overlaps dst */
switch(insn->src(2).getFile()) {
case FILE_GPR:
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x59800000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x49800000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x32800000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitGPR (0x27, insn->src(2));
break;
case FILE_MEMORY_CONST:
emitInsn(0x51800000);
emitGPR (0x27, insn->src(1));
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
break;
default:
assert(!"bad src2 file");
break;
}
emitRND (0x33);
emitSAT (0x32);
emitNEG (0x31, insn->src(2));
emitNEG2(0x30, insn->src(0), insn->src(1));
emitCC (0x2f);
 
emitFMZ(0x35, 2);
emitGPR(0x08, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitMUFU()
{
int mufu = 0;
 
switch (insn->op) {
case OP_COS: mufu = 0; break;
case OP_SIN: mufu = 1; break;
case OP_EX2: mufu = 2; break;
case OP_LG2: mufu = 3; break;
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
default:
assert(!"invalid mufu");
break;
}
 
emitInsn (0x50800000);
emitSAT (0x32);
emitNEG (0x30, insn->src(0));
emitABS (0x2e, insn->src(0));
emitField(0x14, 3, mufu);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitFMNMX()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c600000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c600000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38600000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitField(0x2a, 1, insn->op == OP_MAX);
emitPRED (0x27);
 
emitABS(0x31, insn->src(1));
emitNEG(0x30, insn->src(0));
emitCC (0x2f);
emitABS(0x2e, insn->src(0));
emitNEG(0x2d, insn->src(1));
emitFMZ(0x2c, 1);
emitGPR(0x08, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitRRO()
{
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5c900000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c900000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38900000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src file");
break;
}
 
emitABS (0x31, insn->src(0));
emitNEG (0x2d, insn->src(0));
emitField(0x27, 1, insn->op == OP_PREEX2);
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitFCMP()
{
const CmpInstruction *insn = this->insn->asCmp();
CondCode cc = insn->setCond;
 
if (insn->src(2).mod.neg())
cc = reverseCondCode(cc);
 
switch(insn->src(2).getFile()) {
case FILE_GPR:
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5ba00000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4ba00000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36a00000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitGPR (0x27, insn->src(2));
break;
case FILE_MEMORY_CONST:
emitInsn(0x53a00000);
emitGPR (0x27, insn->src(1));
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
break;
default:
assert(!"bad src2 file");
break;
}
 
emitCond4(0x30, cc);
emitFMZ (0x2f, 1);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitFSET()
{
const CmpInstruction *insn = this->insn->asCmp();
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x58000000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x48000000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x30000000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
if (insn->op != OP_SET) {
switch (insn->op) {
case OP_SET_AND: emitField(0x2d, 2, 0); break;
case OP_SET_OR : emitField(0x2d, 2, 1); break;
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
default:
assert(!"invalid set op");
break;
}
emitPRED(0x27, insn->src(2));
} else {
emitPRED(0x27);
}
 
emitFMZ (0x37, 1);
emitABS (0x36, insn->src(0));
emitNEG (0x35, insn->src(1));
emitField(0x34, 1, insn->dType == TYPE_F32);
emitCond4(0x30, insn->setCond);
emitCC (0x2f);
emitABS (0x2c, insn->src(1));
emitNEG (0x2b, insn->src(0));
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitFSETP()
{
const CmpInstruction *insn = this->insn->asCmp();
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5bb00000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4bb00000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36b00000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
if (insn->op != OP_SET) {
switch (insn->op) {
case OP_SET_AND: emitField(0x2d, 2, 0); break;
case OP_SET_OR : emitField(0x2d, 2, 1); break;
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
default:
assert(!"invalid set op");
break;
}
emitPRED(0x27, insn->src(2));
} else {
emitPRED(0x27);
}
 
emitCond4(0x30, insn->setCond);
emitFMZ (0x2f, 1);
emitABS (0x2c, insn->src(1));
emitNEG (0x2b, insn->src(0));
emitGPR (0x08, insn->src(0));
emitABS (0x07, insn->src(0));
emitNEG (0x06, insn->src(1));
emitPRED (0x03, insn->def(0));
if (insn->defExists(1))
emitPRED(0x00, insn->def(1));
else
emitPRED(0x00);
}
 
void
CodeEmitterGM107::emitFSWZADD()
{
emitInsn (0x50f80000);
emitCC (0x2f);
emitFMZ (0x2c, 1);
emitRND (0x27);
emitField(0x26, 1, insn->lanes); /* abused for .ndv */
emitField(0x1c, 8, insn->subOp);
emitGPR (0x14, insn->src(1));
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
/*******************************************************************************
* integer
******************************************************************************/
 
void
CodeEmitterGM107::emitLOP()
{
int lop = 0;
 
switch (insn->op) {
case OP_AND: lop = 0; break;
case OP_OR : lop = 1; break;
case OP_XOR: lop = 2; break;
default:
assert(!"invalid lop");
break;
}
 
if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c400000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c400000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38400000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitPRED (0x30);
emitField(0x29, 2, lop);
emitINV (0x28, insn->src(1));
emitINV (0x27, insn->src(0));
} else {
emitInsn (0x04000000);
emitINV (0x38, insn->src(1));
emitINV (0x37, insn->src(0));
emitField(0x35, 2, lop);
emitIMMD (0x14, 32, insn->src(1));
}
 
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
/* special-case of emitLOP(): lop pass_b dst 0 ~src */
void
CodeEmitterGM107::emitNOT()
{
if (!longIMMD(insn->src(0))) {
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5c400700);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c400700);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38400700);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src1 file");
break;
}
emitPRED (0x30);
} else {
emitInsn (0x05600000);
emitIMMD (0x14, 32, insn->src(1));
}
 
emitGPR(0x08);
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitIADD()
{
if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c100000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c100000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38100000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitSAT(0x32);
emitNEG(0x31, insn->src(0));
emitNEG(0x30, insn->src(1));
emitCC (0x2f);
} else {
emitInsn(0x1c000000);
emitSAT (0x36);
emitCC (0x34);
emitIMMD(0x14, 32, insn->src(1));
}
 
if (insn->op == OP_SUB)
code[1] ^= 0x00010000;
 
emitGPR(0x08, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitIMUL()
{
if (!longIMMD(insn->src(1))) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c380000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c380000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38380000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitCC (0x2f);
emitField(0x29, 1, isSignedType(insn->sType));
emitField(0x28, 1, isSignedType(insn->dType));
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
} else {
emitInsn (0x1f000000);
emitField(0x37, 1, isSignedType(insn->sType));
emitField(0x36, 1, isSignedType(insn->dType));
emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
emitCC (0x34);
emitIMMD (0x14, 32, insn->src(1));
}
 
emitGPR(0x08, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitIMAD()
{
/*XXX: imad32i exists, but not using it as third src overlaps dst */
switch(insn->src(2).getFile()) {
case FILE_GPR:
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5a000000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4a000000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x34000000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitGPR (0x27, insn->src(2));
break;
case FILE_MEMORY_CONST:
emitInsn(0x52000000);
emitGPR (0x27, insn->src(1));
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
break;
default:
assert(!"bad src2 file");
break;
}
 
emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
emitField(0x35, 1, isSignedType(insn->sType));
emitNEG (0x34, insn->src(2));
emitNEG2 (0x33, insn->src(0), insn->src(1));
emitSAT (0x32);
emitX (0x31);
emitField(0x30, 1, isSignedType(insn->dType));
emitCC (0x2f);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitIMNMX()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c200000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c200000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38200000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitField(0x30, 1, isSignedType(insn->dType));
emitCC (0x2f);
emitField(0x2a, 1, insn->op == OP_MAX);
emitPRED (0x27);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitICMP()
{
const CmpInstruction *insn = this->insn->asCmp();
CondCode cc = insn->setCond;
 
if (insn->src(2).mod.neg())
cc = reverseCondCode(cc);
 
switch(insn->src(2).getFile()) {
case FILE_GPR:
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5b400000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4b400000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36400000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitGPR (0x27, insn->src(2));
break;
case FILE_MEMORY_CONST:
emitInsn(0x53400000);
emitGPR (0x27, insn->src(1));
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
break;
default:
assert(!"bad src2 file");
break;
}
 
emitCond3(0x31, cc);
emitField(0x30, 1, isSignedType(insn->sType));
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitISET()
{
const CmpInstruction *insn = this->insn->asCmp();
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5b500000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4b500000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36500000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
if (insn->op != OP_SET) {
switch (insn->op) {
case OP_SET_AND: emitField(0x2d, 2, 0); break;
case OP_SET_OR : emitField(0x2d, 2, 1); break;
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
default:
assert(!"invalid set op");
break;
}
emitPRED(0x27, insn->src(2));
} else {
emitPRED(0x27);
}
 
emitCond3(0x31, insn->setCond);
emitField(0x30, 1, isSignedType(insn->sType));
emitCC (0x2f);
emitX (0x2b);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitISETP()
{
const CmpInstruction *insn = this->insn->asCmp();
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5b600000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4b600000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36600000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
if (insn->op != OP_SET) {
switch (insn->op) {
case OP_SET_AND: emitField(0x2d, 2, 0); break;
case OP_SET_OR : emitField(0x2d, 2, 1); break;
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
default:
assert(!"invalid set op");
break;
}
emitPRED(0x27, insn->src(2));
} else {
emitPRED(0x27);
}
 
emitCond3(0x31, insn->setCond);
emitField(0x30, 1, isSignedType(insn->sType));
emitX (0x2b);
emitGPR (0x08, insn->src(0));
emitPRED (0x03, insn->def(0));
if (insn->defExists(1))
emitPRED(0x00, insn->def(1));
else
emitPRED(0x00);
}
 
void
CodeEmitterGM107::emitSHL()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c480000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c480000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38480000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitCC (0x2f);
emitX (0x2b);
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitSHR()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c280000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c280000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38280000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitField(0x30, 1, isSignedType(insn->dType));
emitCC (0x2f);
emitX (0x2c);
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitPOPC()
{
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5c080000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c080000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38080000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitINV(0x28, insn->src(0));
emitGPR(0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitBFI()
{
switch(insn->src(2).getFile()) {
case FILE_GPR:
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5bf00000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4bf00000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x36f00000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitGPR (0x27, insn->src(2));
break;
case FILE_MEMORY_CONST:
emitInsn(0x53f00000);
emitGPR (0x27, insn->src(1));
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
break;
default:
assert(!"bad src2 file");
break;
}
 
emitCC (0x2f);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitBFE()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c000000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c000000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38000000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitField(0x30, 1, isSignedType(insn->dType));
emitCC (0x2f);
emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitFLO()
{
switch (insn->src(0).getFile()) {
case FILE_GPR:
emitInsn(0x5c300000);
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4c300000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
break;
case FILE_IMMEDIATE:
emitInsn(0x38300000);
emitIMMD(0x14, 19, insn->src(0));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitField(0x30, 1, isSignedType(insn->dType));
emitCC (0x2f);
emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
emitINV (0x28, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
/*******************************************************************************
* memory
******************************************************************************/
 
void
CodeEmitterGM107::emitLDSTs(int pos, DataType type)
{
int data = 0;
 
switch (typeSizeof(type)) {
case 1: data = isSignedType(type) ? 1 : 0; break;
case 2: data = isSignedType(type) ? 3 : 2; break;
case 4: data = 4; break;
case 8: data = 5; break;
case 16: data = 6; break;
default:
assert(!"bad type");
break;
}
 
emitField(pos, 3, data);
}
 
void
CodeEmitterGM107::emitLDSTc(int pos)
{
int mode = 0;
 
switch (insn->cache) {
case CACHE_CA: mode = 0; break;
case CACHE_CG: mode = 1; break;
case CACHE_CS: mode = 2; break;
case CACHE_CV: mode = 3; break;
default:
assert(!"invalid caching mode");
break;
}
 
emitField(pos, 2, mode);
}
 
void
CodeEmitterGM107::emitLDC()
{
emitInsn (0xef900000);
emitLDSTs(0x30, insn->dType);
emitField(0x2c, 2, insn->subOp);
emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitLDL()
{
emitInsn (0xef400000);
emitLDSTs(0x30, insn->dType);
emitLDSTc(0x2c);
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitLDS()
{
emitInsn (0xef480000);
emitLDSTs(0x30, insn->dType);
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitLD()
{
emitInsn (0x80000000);
emitPRED (0x3a);
emitLDSTc(0x38);
emitLDSTs(0x35, insn->dType);
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitSTL()
{
emitInsn (0xef500000);
emitLDSTs(0x30, insn->dType);
emitLDSTc(0x2c);
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
emitGPR (0x00, insn->src(1));
}
 
void
CodeEmitterGM107::emitSTS()
{
emitInsn (0xef580000);
emitLDSTs(0x30, insn->dType);
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
emitGPR (0x00, insn->src(1));
}
 
void
CodeEmitterGM107::emitST()
{
emitInsn (0xa0000000);
emitPRED (0x3a);
emitLDSTc(0x38);
emitLDSTs(0x35, insn->dType);
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
emitGPR (0x00, insn->src(1));
}
 
void
CodeEmitterGM107::emitALD()
{
emitInsn (0xefd80000);
emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
emitGPR (0x27, insn->src(0).getIndirect(1));
emitO (0x20);
emitP (0x1f);
emitADDR (0x08, 20, 10, 0, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitAST()
{
emitInsn (0xeff00000);
emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
emitGPR (0x27, insn->src(0).getIndirect(1));
emitP (0x1f);
emitADDR (0x08, 20, 10, 0, insn->src(0));
emitGPR (0x00, insn->src(1));
}
 
void
CodeEmitterGM107::emitISBERD()
{
emitInsn(0xefd00000);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitIPA()
{
int ipam = 0, ipas = 0;
 
switch (insn->getInterpMode()) {
case NV50_IR_INTERP_LINEAR : ipam = 0; break;
case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
case NV50_IR_INTERP_FLAT : ipam = 2; break;
case NV50_IR_INTERP_SC : ipam = 3; break;
default:
assert(!"invalid ipa mode");
break;
}
 
switch (insn->getSampleMode()) {
case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
case NV50_IR_INTERP_CENTROID: ipas = 1; break;
case NV50_IR_INTERP_OFFSET : ipas = 2; break;
default:
assert(!"invalid ipa sample mode");
break;
}
 
emitInsn (0xe0000000);
emitField(0x36, 2, ipam);
emitField(0x34, 2, ipas);
emitSAT (0x33);
emitField(0x2f, 3, 7);
emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
if ((code[0] & 0x0000ff00) != 0x0000ff00)
code[1] |= 0x00000040; /* .idx */
emitGPR(0x00, insn->def(0));
 
if (insn->op == OP_PINTERP) {
emitGPR(0x14, insn->src(1));
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(2));
} else {
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(1));
emitGPR(0x14);
}
 
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
emitGPR(0x27);
}
 
/*******************************************************************************
* surface
******************************************************************************/
 
void
CodeEmitterGM107::emitPIXLD()
{
emitInsn (0xefe80000);
emitPRED (0x2d);
emitField(0x1f, 3, insn->subOp);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
/*******************************************************************************
* texture
******************************************************************************/
 
void
CodeEmitterGM107::emitTEXs(int pos)
{
int src1 = insn->predSrc == 1 ? 2 : 1;
if (insn->srcExists(src1))
emitGPR(pos, insn->src(src1));
else
emitGPR(pos);
}
 
void
CodeEmitterGM107::emitTEX()
{
const TexInstruction *insn = this->insn->asTex();
int lodm = 0;
 
if (!insn->tex.levelZero) {
switch (insn->op) {
case OP_TEX: lodm = 0; break;
case OP_TXB: lodm = 2; break;
case OP_TXL: lodm = 3; break;
default:
assert(!"invalid tex op");
break;
}
} else {
lodm = 1;
}
 
if (insn->tex.rIndirectSrc >= 0) {
emitInsn (0xdeb80000);
emitField(0x35, 2, lodm);
emitField(0x24, 1, insn->tex.useOffsets == 1);
} else {
emitInsn (0xc0380000);
emitField(0x37, 2, lodm);
emitField(0x36, 1, insn->tex.useOffsets == 1);
emitField(0x24, 13, insn->tex.r);
}
 
emitField(0x32, 1, insn->tex.target.isShadow());
emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x23, 1, insn->tex.derivAll);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
insn->tex.target.getDim() - 1);
emitField(0x1c, 1, insn->tex.target.isArray());
emitTEXs (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitTLD()
{
const TexInstruction *insn = this->insn->asTex();
 
if (insn->tex.rIndirectSrc >= 0) {
emitInsn (0xdd380000);
} else {
emitInsn (0xdc380000);
emitField(0x24, 13, insn->tex.r);
}
 
emitField(0x37, 1, insn->tex.levelZero == 0);
emitField(0x32, 1, insn->tex.target.isMS());
emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x23, 1, insn->tex.useOffsets == 1);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
insn->tex.target.getDim() - 1);
emitField(0x1c, 1, insn->tex.target.isArray());
emitTEXs (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitTLD4()
{
const TexInstruction *insn = this->insn->asTex();
 
if (insn->tex.rIndirectSrc >= 0) {
emitInsn (0xdef80000);
emitField(0x26, 2, insn->tex.gatherComp);
emitField(0x25, 2, insn->tex.useOffsets == 4);
emitField(0x24, 2, insn->tex.useOffsets == 1);
} else {
emitInsn (0xc8380000);
emitField(0x38, 2, insn->tex.gatherComp);
emitField(0x37, 2, insn->tex.useOffsets == 4);
emitField(0x36, 2, insn->tex.useOffsets == 1);
emitField(0x24, 13, insn->tex.r);
}
 
emitField(0x32, 1, insn->tex.target.isShadow());
emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x23, 1, insn->tex.derivAll);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
insn->tex.target.getDim() - 1);
emitField(0x1c, 1, insn->tex.target.isArray());
emitTEXs (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitTXD()
{
const TexInstruction *insn = this->insn->asTex();
 
if (insn->tex.rIndirectSrc >= 0) {
emitInsn (0xde780000);
} else {
emitInsn (0xde380000);
emitField(0x24, 13, insn->tex.r);
}
 
emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x23, 1, insn->tex.useOffsets == 1);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
insn->tex.target.getDim() - 1);
emitField(0x1c, 1, insn->tex.target.isArray());
emitTEXs (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitTMML()
{
const TexInstruction *insn = this->insn->asTex();
 
if (insn->tex.rIndirectSrc >= 0) {
emitInsn (0xdf600000);
} else {
emitInsn (0xdf580000);
emitField(0x24, 13, insn->tex.r);
}
 
emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x23, 1, insn->tex.derivAll);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
insn->tex.target.getDim() - 1);
emitField(0x1c, 1, insn->tex.target.isArray());
emitTEXs (0x14);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitTXQ()
{
const TexInstruction *insn = this->insn->asTex();
int type = 0;
 
switch (insn->tex.query) {
case TXQ_DIMS : type = 0x01; break;
case TXQ_TYPE : type = 0x02; break;
case TXQ_SAMPLE_POSITION: type = 0x05; break;
case TXQ_FILTER : type = 0x10; break;
case TXQ_LOD : type = 0x12; break;
case TXQ_WRAP : type = 0x14; break;
case TXQ_BORDER_COLOUR : type = 0x16; break;
default:
assert(!"invalid txq query");
break;
}
 
emitInsn (0xdf4a0000);
emitField(0x24, 13, insn->tex.r);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x16, 6, type);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
void
CodeEmitterGM107::emitDEPBAR()
{
emitInsn (0xf0f00000);
emitField(0x1d, 1, 1); /* le */
emitField(0x1a, 3, 5);
emitField(0x14, 6, insn->subOp);
emitField(0x00, 6, insn->subOp);
}
 
/*******************************************************************************
* misc
******************************************************************************/
 
void
CodeEmitterGM107::emitNOP()
{
emitInsn(0x50b00000);
}
 
void
CodeEmitterGM107::emitKIL()
{
emitInsn (0xe3300000);
emitCond5(0x00, CC_TR);
}
 
void
CodeEmitterGM107::emitOUT()
{
const int cut = insn->op == OP_RESTART || insn->subOp;
const int emit = insn->op == OP_EMIT;
 
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0xfbe00000);
emitGPR (0x14, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0xf6e00000);
emitIMMD(0x14, 19, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0xebe00000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
 
emitField(0x27, 2, (cut << 1) | emit);
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
 
/*******************************************************************************
* assembler front-end
******************************************************************************/
 
bool
CodeEmitterGM107::emitInstruction(Instruction *i)
{
const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
bool ret = true;
 
insn = i;
 
if (insn->encSize != 8) {
ERROR("skipping undecodable instruction: "); insn->print();
return false;
} else
if (codeSize + size > codeSizeLimit) {
ERROR("code emitter output buffer too small\n");
return false;
}
 
if (writeIssueDelays) {
int n = ((codeSize & 0x1f) / 8) - 1;
if (n < 0) {
data = code;
data[0] = 0x00000000;
data[1] = 0x00000000;
code += 2;
codeSize += 8;
n++;
}
 
emitField(data, n * 21, 21, insn->sched);
}
 
switch (insn->op) {
case OP_EXIT:
emitEXIT();
break;
case OP_BRA:
emitBRA();
break;
case OP_CALL:
emitCAL();
break;
case OP_PRECONT:
emitPCNT();
break;
case OP_CONT:
emitCONT();
break;
case OP_PREBREAK:
emitPBK();
break;
case OP_BREAK:
emitBRK();
break;
case OP_PRERET:
emitPRET();
break;
case OP_RET:
emitRET();
break;
case OP_JOINAT:
emitSSY();
break;
case OP_JOIN:
emitSYNC();
break;
case OP_QUADON:
emitSAM();
break;
case OP_QUADPOP:
emitRAM();
break;
case OP_MOV:
if (insn->def(0).getFile() == FILE_GPR &&
insn->src(0).getFile() != FILE_PREDICATE)
emitMOV();
else
assert(!"R2P/P2R");
break;
case OP_RDSV:
emitS2R();
break;
case OP_ABS:
case OP_NEG:
case OP_SAT:
case OP_FLOOR:
case OP_CEIL:
case OP_TRUNC:
case OP_CVT:
if (isFloatType(insn->dType)) {
if (isFloatType(insn->sType))
emitF2F();
else
emitI2F();
} else {
if (isFloatType(insn->sType))
emitF2I();
else
emitI2I();
}
break;
case OP_SHFL:
emitSHFL();
break;
case OP_ADD:
case OP_SUB:
if (isFloatType(insn->dType)) {
if (insn->dType == TYPE_F64)
emitDADD();
else
emitFADD();
} else {
emitIADD();
}
break;
case OP_MUL:
if (isFloatType(insn->dType)) {
if (insn->dType == TYPE_F64)
emitDMUL();
else
emitFMUL();
} else {
emitIMUL();
}
break;
case OP_MAD:
case OP_FMA:
if (isFloatType(insn->dType)) {
if (insn->dType == TYPE_F64)
emitDFMA();
else
emitFFMA();
} else {
emitIMAD();
}
break;
case OP_MIN:
case OP_MAX:
if (isFloatType(insn->dType)) {
if (insn->dType == TYPE_F64)
emitDMNMX();
else
emitFMNMX();
} else {
emitIMNMX();
}
break;
case OP_SHL:
emitSHL();
break;
case OP_SHR:
emitSHR();
break;
case OP_POPCNT:
emitPOPC();
break;
case OP_INSBF:
emitBFI();
break;
case OP_EXTBF:
emitBFE();
break;
case OP_BFIND:
emitFLO();
break;
case OP_SLCT:
if (isFloatType(insn->dType))
emitFCMP();
else
emitICMP();
break;
case OP_SET:
case OP_SET_AND:
case OP_SET_OR:
case OP_SET_XOR:
if (insn->def(0).getFile() != FILE_PREDICATE) {
if (isFloatType(insn->sType))
if (insn->sType == TYPE_F64)
emitDSET();
else
emitFSET();
else
emitISET();
} else {
if (isFloatType(insn->sType))
if (insn->sType == TYPE_F64)
emitDSETP();
else
emitFSETP();
else
emitISETP();
}
break;
case OP_PRESIN:
case OP_PREEX2:
emitRRO();
break;
case OP_COS:
case OP_SIN:
case OP_EX2:
case OP_LG2:
case OP_RCP:
case OP_RSQ:
emitMUFU();
break;
case OP_AND:
case OP_OR:
case OP_XOR:
emitLOP();
break;
case OP_NOT:
emitNOT();
break;
case OP_LOAD:
switch (insn->src(0).getFile()) {
case FILE_MEMORY_CONST : emitLDC(); break;
case FILE_MEMORY_LOCAL : emitLDL(); break;
case FILE_MEMORY_SHARED: emitLDS(); break;
case FILE_MEMORY_GLOBAL: emitLD(); break;
default:
assert(!"invalid load");
emitNOP();
break;
}
break;
case OP_STORE:
switch (insn->src(0).getFile()) {
case FILE_MEMORY_LOCAL : emitSTL(); break;
case FILE_MEMORY_SHARED: emitSTS(); break;
case FILE_MEMORY_GLOBAL: emitST(); break;
default:
assert(!"invalid load");
emitNOP();
break;
}
break;
case OP_VFETCH:
emitALD();
break;
case OP_EXPORT:
emitAST();
break;
case OP_PFETCH:
emitISBERD();
break;
case OP_LINTERP:
case OP_PINTERP:
emitIPA();
break;
case OP_PIXLD:
emitPIXLD();
break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
emitTEX();
break;
case OP_TXF:
emitTLD();
break;
case OP_TXG:
emitTLD4();
break;
case OP_TXD:
emitTXD();
break;
case OP_TXQ:
emitTXQ();
break;
case OP_TXLQ:
emitTMML();
break;
case OP_TEXBAR:
emitDEPBAR();
break;
case OP_QUADOP:
emitFSWZADD();
break;
case OP_NOP:
emitNOP();
break;
case OP_DISCARD:
emitKIL();
break;
case OP_EMIT:
case OP_RESTART:
emitOUT();
break;
default:
assert(!"invalid opcode");
emitNOP();
ret = false;
break;
}
 
if (insn->join) {
/*XXX*/
}
 
code += 2;
codeSize += 8;
return ret;
}
 
uint32_t
CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
{
return 8;
}
 
/*******************************************************************************
* sched data calculator
******************************************************************************/
 
class SchedDataCalculatorGM107 : public Pass
{
public:
SchedDataCalculatorGM107(const Target *targ) : targ(targ) {}
private:
const Target *targ;
bool visit(BasicBlock *bb);
};
 
bool
SchedDataCalculatorGM107::visit(BasicBlock *bb)
{
for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
/*XXX*/
insn->sched = 0x7e0;
}
 
return true;
}
 
/*******************************************************************************
* main
******************************************************************************/
 
void
CodeEmitterGM107::prepareEmission(Function *func)
{
SchedDataCalculatorGM107 sched(targ);
CodeEmitter::prepareEmission(func);
sched.run(func, true, true);
}
 
static inline uint32_t sizeToBundlesGM107(uint32_t size)
{
return (size + 23) / 24;
}
 
void
CodeEmitterGM107::prepareEmission(Program *prog)
{
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
!fi.end(); fi.next()) {
Function *func = reinterpret_cast<Function *>(fi.get());
func->binPos = prog->binSize;
prepareEmission(func);
 
// adjust sizes & positions for schedulding info:
if (prog->getTarget()->hasSWSched) {
uint32_t adjPos = func->binPos;
BasicBlock *bb = NULL;
for (int i = 0; i < func->bbCount; ++i) {
bb = func->bbArray[i];
int32_t adjSize = bb->binSize;
if (adjPos % 32) {
adjSize -= 32 - adjPos % 32;
if (adjSize < 0)
adjSize = 0;
}
adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
bb->binPos = adjPos;
bb->binSize = adjSize;
adjPos += adjSize;
}
if (bb)
func->binSize = adjPos - func->binPos;
}
 
prog->binSize += func->binSize;
}
}
 
CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
: CodeEmitter(target),
targGM107(target),
writeIssueDelays(target->hasSWSched)
{
code = NULL;
codeSize = codeSizeLimit = 0;
relocInfo = NULL;
}
 
CodeEmitter *
TargetGM107::createCodeEmitterGM107(Program::Type type)
{
CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
emit->setProgramType(type);
return emit;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
0,0 → 1,2058
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target_nv50.h"
 
namespace nv50_ir {
 
#define NV50_OP_ENC_LONG 0
#define NV50_OP_ENC_SHORT 1
#define NV50_OP_ENC_IMM 2
#define NV50_OP_ENC_LONG_ALT 3
 
class CodeEmitterNV50 : public CodeEmitter
{
public:
CodeEmitterNV50(const TargetNV50 *);
 
virtual bool emitInstruction(Instruction *);
 
virtual uint32_t getMinEncodingSize(const Instruction *) const;
 
inline void setProgramType(Program::Type pType) { progType = pType; }
 
virtual void prepareEmission(Function *);
 
private:
Program::Type progType;
 
const TargetNV50 *targNV50;
 
private:
inline void defId(const ValueDef&, const int pos);
inline void srcId(const ValueRef&, const int pos);
inline void srcId(const ValueRef *, const int pos);
 
inline void srcAddr16(const ValueRef&, bool adj, const int pos);
inline void srcAddr8(const ValueRef&, const int pos);
 
void emitFlagsRd(const Instruction *);
void emitFlagsWr(const Instruction *);
 
void emitCondCode(CondCode cc, DataType ty, int pos);
 
inline void setARegBits(unsigned int);
 
void setAReg16(const Instruction *, int s);
void setImmediate(const Instruction *, int s);
 
void setDst(const Value *);
void setDst(const Instruction *, int d);
void setSrcFileBits(const Instruction *, int enc);
void setSrc(const Instruction *, unsigned int s, int slot);
 
void emitForm_MAD(const Instruction *);
void emitForm_ADD(const Instruction *);
void emitForm_MUL(const Instruction *);
void emitForm_IMM(const Instruction *);
 
void emitLoadStoreSizeLG(DataType ty, int pos);
void emitLoadStoreSizeCS(DataType ty);
 
void roundMode_MAD(const Instruction *);
void roundMode_CVT(RoundMode);
 
void emitMNeg12(const Instruction *);
 
void emitLOAD(const Instruction *);
void emitSTORE(const Instruction *);
void emitMOV(const Instruction *);
void emitRDSV(const Instruction *);
void emitNOP();
void emitINTERP(const Instruction *);
void emitPFETCH(const Instruction *);
void emitOUT(const Instruction *);
 
void emitUADD(const Instruction *);
void emitAADD(const Instruction *);
void emitFADD(const Instruction *);
void emitIMUL(const Instruction *);
void emitFMUL(const Instruction *);
void emitFMAD(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
 
void emitMINMAX(const Instruction *);
 
void emitPreOp(const Instruction *);
void emitSFnOp(const Instruction *, uint8_t subOp);
 
void emitShift(const Instruction *);
void emitARL(const Instruction *, unsigned int shl);
void emitLogicOp(const Instruction *);
void emitNOT(const Instruction *);
 
void emitCVT(const Instruction *);
void emitSET(const Instruction *);
 
void emitTEX(const TexInstruction *);
void emitTXQ(const TexInstruction *);
void emitTEXPREP(const TexInstruction *);
 
void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
 
void emitFlow(const Instruction *, uint8_t flowOp);
void emitPRERETEmu(const FlowInstruction *);
void emitBAR(const Instruction *);
 
void emitATOM(const Instruction *);
};
 
#define SDATA(a) ((a).rep()->reg.data)
#define DDATA(a) ((a).rep()->reg.data)
 
void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
{
assert(src.get());
code[pos / 32] |= SDATA(src).id << (pos % 32);
}
 
void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
{
assert(src->get());
code[pos / 32] |= SDATA(*src).id << (pos % 32);
}
 
void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
{
assert(src.get());
 
int32_t offset = SDATA(src).offset;
 
assert(!adj || src.get()->reg.size <= 4);
if (adj)
offset /= src.get()->reg.size;
 
assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
 
if (offset < 0)
offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
 
code[pos / 32] |= offset << (pos % 32);
}
 
void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
{
assert(src.get());
 
uint32_t offset = SDATA(src).offset;
 
assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
 
code[pos / 32] |= (offset >> 2) << (pos % 32);
}
 
void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
{
assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
 
code[pos / 32] |= DDATA(def).id << (pos % 32);
}
 
void
CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
{
switch (insn->rnd) {
case ROUND_M: code[1] |= 1 << 22; break;
case ROUND_P: code[1] |= 2 << 22; break;
case ROUND_Z: code[1] |= 3 << 22; break;
default:
assert(insn->rnd == ROUND_N);
break;
}
}
 
void
CodeEmitterNV50::emitMNeg12(const Instruction *i)
{
code[1] |= i->src(0).mod.neg() << 26;
code[1] |= i->src(1).mod.neg() << 27;
}
 
void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
{
uint8_t enc;
 
assert(pos >= 32 || pos <= 27);
 
switch (cc) {
case CC_LT: enc = 0x1; break;
case CC_LTU: enc = 0x9; break;
case CC_EQ: enc = 0x2; break;
case CC_EQU: enc = 0xa; break;
case CC_LE: enc = 0x3; break;
case CC_LEU: enc = 0xb; break;
case CC_GT: enc = 0x4; break;
case CC_GTU: enc = 0xc; break;
case CC_NE: enc = 0x5; break;
case CC_NEU: enc = 0xd; break;
case CC_GE: enc = 0x6; break;
case CC_GEU: enc = 0xe; break;
case CC_TR: enc = 0xf; break;
case CC_FL: enc = 0x0; break;
 
case CC_O: enc = 0x10; break;
case CC_C: enc = 0x11; break;
case CC_A: enc = 0x12; break;
case CC_S: enc = 0x13; break;
case CC_NS: enc = 0x1c; break;
case CC_NA: enc = 0x1d; break;
case CC_NC: enc = 0x1e; break;
case CC_NO: enc = 0x1f; break;
 
default:
enc = 0;
assert(!"invalid condition code");
break;
}
if (ty != TYPE_NONE && !isFloatType(ty))
enc &= ~0x8; // unordered only exists for float types
 
code[pos / 32] |= enc << (pos % 32);
}
 
void
CodeEmitterNV50::emitFlagsRd(const Instruction *i)
{
int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
 
assert(!(code[1] & 0x00003f80));
 
if (s >= 0) {
assert(i->getSrc(s)->reg.file == FILE_FLAGS);
emitCondCode(i->cc, TYPE_NONE, 32 + 7);
srcId(i->src(s), 32 + 12);
} else {
code[1] |= 0x0780;
}
}
 
void
CodeEmitterNV50::emitFlagsWr(const Instruction *i)
{
assert(!(code[1] & 0x70));
 
int flagsDef = i->flagsDef;
 
// find flags definition and check that it is the last def
if (flagsDef < 0) {
for (int d = 0; i->defExists(d); ++d)
if (i->def(d).getFile() == FILE_FLAGS)
flagsDef = d;
if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
WARN("Instruction::flagsDef was not set properly\n");
}
if (flagsDef == 0 && i->defExists(1))
WARN("flags def should not be the primary definition\n");
 
if (flagsDef >= 0)
code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
 
}
 
void
CodeEmitterNV50::setARegBits(unsigned int u)
{
code[0] |= (u & 3) << 26;
code[1] |= (u & 4);
}
 
void
CodeEmitterNV50::setAReg16(const Instruction *i, int s)
{
if (i->srcExists(s)) {
s = i->src(s).indirect[0];
if (s >= 0)
setARegBits(SDATA(i->src(s)).id + 1);
}
}
 
void
CodeEmitterNV50::setImmediate(const Instruction *i, int s)
{
const ImmediateValue *imm = i->src(s).get()->asImm();
assert(imm);
 
uint32_t u = imm->reg.data.u32;
 
if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
u = ~u;
 
code[1] |= 3;
code[0] |= (u & 0x3f) << 16;
code[1] |= (u >> 6) << 2;
}
 
void
CodeEmitterNV50::setDst(const Value *dst)
{
const Storage *reg = &dst->join->reg;
 
assert(reg->file != FILE_ADDRESS);
 
if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
code[0] |= (127 << 2) | 1;
code[1] |= 8;
} else {
int id;
if (reg->file == FILE_SHADER_OUTPUT) {
code[1] |= 8;
id = reg->data.offset / 4;
} else {
id = reg->data.id;
}
code[0] |= id << 2;
}
}
 
void
CodeEmitterNV50::setDst(const Instruction *i, int d)
{
if (i->defExists(d)) {
setDst(i->getDef(d));
} else
if (!d) {
code[0] |= 0x01fc; // bit bucket
code[1] |= 0x0008;
}
}
 
// 3 * 2 bits:
// 0: r
// 1: a/s
// 2: c
// 3: i
void
CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
{
uint8_t mode = 0;
 
for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
switch (i->src(s).getFile()) {
case FILE_GPR:
break;
case FILE_MEMORY_SHARED:
case FILE_SHADER_INPUT:
mode |= 1 << (s * 2);
break;
case FILE_MEMORY_CONST:
mode |= 2 << (s * 2);
break;
case FILE_IMMEDIATE:
mode |= 3 << (s * 2);
break;
default:
ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
assert(0);
break;
}
}
switch (mode) {
case 0x00: // rrr
break;
case 0x01: // arr/grr
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
code[0] |= 0x01800000;
if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
code[1] |= 0x00200000;
} else {
if (enc == NV50_OP_ENC_SHORT)
code[0] |= 0x01000000;
else
code[1] |= 0x00200000;
}
break;
case 0x03: // irr
assert(i->op == OP_MOV);
return;
case 0x0c: // rir
break;
case 0x0d: // gir
assert(progType == Program::TYPE_GEOMETRY ||
progType == Program::TYPE_COMPUTE);
code[0] |= 0x01000000;
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
assert(reg < 3);
code[0] |= (reg + 1) << 26;
}
break;
case 0x08: // rcr
code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
break;
case 0x09: // acr/gcr
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
code[0] |= 0x01800000;
} else {
code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
code[1] |= 0x00200000;
}
code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
break;
case 0x20: // rrc
code[0] |= 0x01000000;
code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
break;
case 0x21: // arc
code[0] |= 0x01000000;
code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
assert(progType != Program::TYPE_GEOMETRY);
break;
default:
ERROR("not encodable: %x\n", mode);
assert(0);
break;
}
if (progType != Program::TYPE_COMPUTE)
return;
 
if ((mode & 3) == 1) {
const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
 
switch (i->getSrc(0)->reg.type) {
case TYPE_U8:
break;
case TYPE_U16:
code[0] |= 1 << pos;
break;
case TYPE_S16:
code[0] |= 2 << pos;
break;
default:
code[0] |= 3 << pos;
assert(i->getSrc(0)->reg.size == 4);
break;
}
}
}
 
void
CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
{
if (Target::operationSrcNr[i->op] <= s)
return;
const Storage *reg = &i->src(s).rep()->reg;
 
unsigned int id = (reg->file == FILE_GPR) ?
reg->data.id :
reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
 
switch (slot) {
case 0: code[0] |= id << 9; break;
case 1: code[0] |= id << 16; break;
case 2: code[1] |= id << 14; break;
default:
assert(0);
break;
}
}
 
// the default form:
// - long instruction
// - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
// - address & flags
void
CodeEmitterNV50::emitForm_MAD(const Instruction *i)
{
assert(i->encSize == 8);
code[0] |= 1;
 
emitFlagsRd(i);
emitFlagsWr(i);
 
setDst(i, 0);
 
setSrcFileBits(i, NV50_OP_ENC_LONG);
setSrc(i, 0, 0);
setSrc(i, 1, 1);
setSrc(i, 2, 2);
 
if (i->getIndirect(0, 0)) {
assert(!i->getIndirect(1, 0));
setAReg16(i, 0);
} else {
setAReg16(i, 1);
}
}
 
// like default form, but 2nd source in slot 2, and no 3rd source
void
CodeEmitterNV50::emitForm_ADD(const Instruction *i)
{
assert(i->encSize == 8);
code[0] |= 1;
 
emitFlagsRd(i);
emitFlagsWr(i);
 
setDst(i, 0);
 
setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
setSrc(i, 0, 0);
setSrc(i, 1, 2);
 
if (i->getIndirect(0, 0)) {
assert(!i->getIndirect(1, 0));
setAReg16(i, 0);
} else {
setAReg16(i, 1);
}
}
 
// default short form (rr, ar, rc, gr)
void
CodeEmitterNV50::emitForm_MUL(const Instruction *i)
{
assert(i->encSize == 4 && !(code[0] & 1));
assert(i->defExists(0));
assert(!i->getPredicate());
 
setDst(i, 0);
 
setSrcFileBits(i, NV50_OP_ENC_SHORT);
setSrc(i, 0, 0);
setSrc(i, 1, 1);
}
 
// usual immediate form
// - 1 to 3 sources where last is immediate (rir, gir)
// - no address or predicate possible
void
CodeEmitterNV50::emitForm_IMM(const Instruction *i)
{
assert(i->encSize == 8);
code[0] |= 1;
 
assert(i->defExists(0) && i->srcExists(0));
 
setDst(i, 0);
 
setSrcFileBits(i, NV50_OP_ENC_IMM);
if (Target::operationSrcNr[i->op] > 1) {
setSrc(i, 0, 0);
setImmediate(i, 1);
setSrc(i, 2, 1);
} else {
setImmediate(i, 0);
}
}
 
void
CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
{
uint8_t enc;
 
switch (ty) {
case TYPE_F32: // fall through
case TYPE_S32: // fall through
case TYPE_U32: enc = 0x6; break;
case TYPE_B128: enc = 0x5; break;
case TYPE_F64: // fall through
case TYPE_S64: // fall through
case TYPE_U64: enc = 0x4; break;
case TYPE_S16: enc = 0x3; break;
case TYPE_U16: enc = 0x2; break;
case TYPE_S8: enc = 0x1; break;
case TYPE_U8: enc = 0x0; break;
default:
enc = 0;
assert(!"invalid load/store type");
break;
}
code[pos / 32] |= enc << (pos % 32);
}
 
void
CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
{
switch (ty) {
case TYPE_U8: break;
case TYPE_U16: code[1] |= 0x4000; break;
case TYPE_S16: code[1] |= 0x8000; break;
case TYPE_F32:
case TYPE_S32:
case TYPE_U32: code[1] |= 0xc000; break;
default:
assert(0);
break;
}
}
 
void
CodeEmitterNV50::emitLOAD(const Instruction *i)
{
DataFile sf = i->src(0).getFile();
int32_t offset = i->getSrc(0)->reg.data.offset;
 
switch (sf) {
case FILE_SHADER_INPUT:
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
code[0] = 0x11800001;
else
// use 'mov' where we can
code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
code[1] = 0x00200000 | (i->lanes << 14);
if (typeSizeof(i->dType) == 4)
code[1] |= 0x04000000;
break;
case FILE_MEMORY_SHARED:
if (targ->getChipset() >= 0x84) {
assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
code[0] = 0x10000001;
code[1] = 0x40000000;
 
if (typeSizeof(i->dType) == 4)
code[1] |= 0x04000000;
 
emitLoadStoreSizeCS(i->sType);
} else {
assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
code[0] = 0x10000001;
code[1] = 0x00200000 | (i->lanes << 14);
emitLoadStoreSizeCS(i->sType);
}
break;
case FILE_MEMORY_CONST:
code[0] = 0x10000001;
code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
if (typeSizeof(i->dType) == 4)
code[1] |= 0x04000000;
emitLoadStoreSizeCS(i->sType);
break;
case FILE_MEMORY_LOCAL:
code[0] = 0xd0000001;
code[1] = 0x40000000;
break;
case FILE_MEMORY_GLOBAL:
code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
code[1] = 0x80000000;
break;
default:
assert(!"invalid load source file");
break;
}
if (sf == FILE_MEMORY_LOCAL ||
sf == FILE_MEMORY_GLOBAL)
emitLoadStoreSizeLG(i->sType, 21 + 32);
 
setDst(i, 0);
 
emitFlagsRd(i);
emitFlagsWr(i);
 
if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
srcId(*i->src(0).getIndirect(0), 9);
} else {
setAReg16(i, 0);
srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
}
}
 
void
CodeEmitterNV50::emitSTORE(const Instruction *i)
{
DataFile f = i->getSrc(0)->reg.file;
int32_t offset = i->getSrc(0)->reg.data.offset;
 
switch (f) {
case FILE_SHADER_OUTPUT:
code[0] = 0x00000001 | ((offset >> 2) << 9);
code[1] = 0x80c00000;
srcId(i->src(1), 32 + 14);
break;
case FILE_MEMORY_GLOBAL:
code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
code[1] = 0xa0000000;
emitLoadStoreSizeLG(i->dType, 21 + 32);
srcId(i->src(1), 2);
break;
case FILE_MEMORY_LOCAL:
code[0] = 0xd0000001;
code[1] = 0x60000000;
emitLoadStoreSizeLG(i->dType, 21 + 32);
srcId(i->src(1), 2);
break;
case FILE_MEMORY_SHARED:
code[0] = 0x00000001;
code[1] = 0xe0000000;
switch (typeSizeof(i->dType)) {
case 1:
code[0] |= offset << 9;
code[1] |= 0x00400000;
break;
case 2:
code[0] |= (offset >> 1) << 9;
break;
case 4:
code[0] |= (offset >> 2) << 9;
code[1] |= 0x04200000;
break;
default:
assert(0);
break;
}
srcId(i->src(1), 32 + 14);
break;
default:
assert(!"invalid store destination file");
break;
}
 
if (f == FILE_MEMORY_GLOBAL)
srcId(*i->src(0).getIndirect(0), 9);
else
setAReg16(i, 0);
 
if (f == FILE_MEMORY_LOCAL)
srcAddr16(i->src(0), false, 9);
 
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitMOV(const Instruction *i)
{
DataFile sf = i->getSrc(0)->reg.file;
DataFile df = i->getDef(0)->reg.file;
 
assert(sf == FILE_GPR || df == FILE_GPR);
 
if (sf == FILE_FLAGS) {
code[0] = 0x00000001;
code[1] = 0x20000000;
defId(i->def(0), 2);
srcId(i->src(0), 12);
emitFlagsRd(i);
} else
if (sf == FILE_ADDRESS) {
code[0] = 0x00000001;
code[1] = 0x40000000;
defId(i->def(0), 2);
setARegBits(SDATA(i->src(0)).id + 1);
emitFlagsRd(i);
} else
if (df == FILE_FLAGS) {
code[0] = 0x00000001;
code[1] = 0xa0000000;
defId(i->def(0), 4);
srcId(i->src(0), 9);
emitFlagsRd(i);
} else
if (sf == FILE_IMMEDIATE) {
code[0] = 0x10008001;
code[1] = 0x00000003;
emitForm_IMM(i);
} else {
if (i->encSize == 4) {
code[0] = 0x10008000;
} else {
code[0] = 0x10000001;
code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
code[1] |= (i->lanes << 14);
emitFlagsRd(i);
}
defId(i->def(0), 2);
srcId(i->src(0), 9);
}
if (df == FILE_SHADER_OUTPUT) {
assert(i->encSize == 8);
code[1] |= 0x8;
}
}
 
static inline uint8_t getSRegEncoding(const ValueRef &ref)
{
switch (SDATA(ref).sv.sv) {
case SV_PHYSID: return 0;
case SV_CLOCK: return 1;
case SV_VERTEX_STRIDE: return 3;
// case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
case SV_SAMPLE_INDEX: return 8;
default:
assert(!"no sreg for system value");
return 0;
}
}
 
void
CodeEmitterNV50::emitRDSV(const Instruction *i)
{
code[0] = 0x00000001;
code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
defId(i->def(0), 2);
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitNOP()
{
code[0] = 0xf0000001;
code[1] = 0xe0000000;
}
 
void
CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
{
code[0] = 0xc0000000 | (lane << 16);
code[1] = 0x80000000;
 
code[0] |= (quOp & 0x03) << 20;
code[1] |= (quOp & 0xfc) << 20;
 
emitForm_ADD(i);
 
if (!i->srcExists(1))
srcId(i->src(0), 32 + 14);
}
 
/* NOTE: This returns the base address of a vertex inside the primitive.
* src0 is an immediate, the index (not offset) of the vertex
* inside the primitive. XXX: signed or unsigned ?
* src1 (may be NULL) should use whatever units the hardware requires
* (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
*/
void
CodeEmitterNV50::emitPFETCH(const Instruction *i)
{
const uint32_t prim = i->src(0).get()->reg.data.u32;
assert(prim <= 127);
 
if (i->def(0).getFile() == FILE_ADDRESS) {
// shl $aX a[] 0
code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
code[1] = 0xc0200000;
code[0] |= prim << 9;
assert(!i->srcExists(1));
} else
if (i->srcExists(1)) {
// ld b32 $rX a[$aX+base]
code[0] = 0x00000001;
code[1] = 0x04200000 | (0xf << 14);
defId(i->def(0), 2);
code[0] |= prim << 9;
setARegBits(SDATA(i->src(1)).id + 1);
} else {
// mov b32 $rX a[]
code[0] = 0x10000001;
code[1] = 0x04200000 | (0xf << 14);
defId(i->def(0), 2);
code[0] |= prim << 9;
}
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitINTERP(const Instruction *i)
{
code[0] = 0x80000000;
 
defId(i->def(0), 2);
srcAddr8(i->src(0), 16);
 
if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
code[0] |= 1 << 8;
} else {
if (i->op == OP_PINTERP) {
code[0] |= 1 << 25;
srcId(i->src(1), 9);
}
if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
code[0] |= 1 << 24;
}
 
if (i->encSize == 8) {
code[1] =
(code[0] & (3 << 24)) >> (24 - 16) |
(code[0] & (1 << 8)) << (18 - 8);
code[0] &= ~0x03000100;
code[0] |= 1;
emitFlagsRd(i);
}
}
 
void
CodeEmitterNV50::emitMINMAX(const Instruction *i)
{
if (i->dType == TYPE_F64) {
code[0] = 0xe0000000;
code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
} else {
code[0] = 0x30000000;
code[1] = 0x80000000;
if (i->op == OP_MIN)
code[1] |= 0x20000000;
 
switch (i->dType) {
case TYPE_F32: code[0] |= 0x80000000; break;
case TYPE_S32: code[1] |= 0x8c000000; break;
case TYPE_U32: code[1] |= 0x84000000; break;
case TYPE_S16: code[1] |= 0x80000000; break;
case TYPE_U16: break;
default:
assert(0);
break;
}
code[1] |= i->src(0).mod.abs() << 20;
code[1] |= i->src(0).mod.neg() << 26;
code[1] |= i->src(1).mod.abs() << 19;
code[1] |= i->src(1).mod.neg() << 27;
}
emitForm_MAD(i);
}
 
void
CodeEmitterNV50::emitFMAD(const Instruction *i)
{
const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
const int neg_add = i->src(2).mod.neg();
 
code[0] = 0xe0000000;
 
if (i->src(1).getFile() == FILE_IMMEDIATE) {
code[1] = 0;
emitForm_IMM(i);
code[0] |= neg_mul << 15;
code[0] |= neg_add << 22;
if (i->saturate)
code[0] |= 1 << 8;
} else
if (i->encSize == 4) {
emitForm_MUL(i);
code[0] |= neg_mul << 15;
code[0] |= neg_add << 22;
if (i->saturate)
code[0] |= 1 << 8;
} else {
code[1] = neg_mul << 26;
code[1] |= neg_add << 27;
if (i->saturate)
code[1] |= 1 << 29;
emitForm_MAD(i);
}
}
 
void
CodeEmitterNV50::emitFADD(const Instruction *i)
{
const int neg0 = i->src(0).mod.neg();
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
 
code[0] = 0xb0000000;
 
assert(!(i->src(0).mod | i->src(1).mod).abs());
 
if (i->src(1).getFile() == FILE_IMMEDIATE) {
code[1] = 0;
emitForm_IMM(i);
code[0] |= neg0 << 15;
code[0] |= neg1 << 22;
if (i->saturate)
code[0] |= 1 << 8;
} else
if (i->encSize == 8) {
code[1] = 0;
emitForm_ADD(i);
code[1] |= neg0 << 26;
code[1] |= neg1 << 27;
if (i->saturate)
code[1] |= 1 << 29;
} else {
emitForm_MUL(i);
code[0] |= neg0 << 15;
code[0] |= neg1 << 22;
if (i->saturate)
code[0] |= 1 << 8;
}
}
 
void
CodeEmitterNV50::emitUADD(const Instruction *i)
{
const int neg0 = i->src(0).mod.neg();
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
 
code[0] = 0x20008000;
 
if (i->src(1).getFile() == FILE_IMMEDIATE) {
code[1] = 0;
emitForm_IMM(i);
} else
if (i->encSize == 8) {
code[0] = 0x20000000;
code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
emitForm_ADD(i);
} else {
emitForm_MUL(i);
}
assert(!(neg0 && neg1));
code[0] |= neg0 << 28;
code[0] |= neg1 << 22;
 
if (i->flagsSrc >= 0) {
// addc == sub | subr
assert(!(code[0] & 0x10400000) && !i->getPredicate());
code[0] |= 0x10400000;
srcId(i->src(i->flagsSrc), 32 + 12);
}
}
 
void
CodeEmitterNV50::emitAADD(const Instruction *i)
{
const int s = (i->op == OP_MOV) ? 0 : 1;
 
code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
code[1] = 0x20000000;
 
code[0] |= (DDATA(i->def(0)).id + 1) << 2;
 
emitFlagsRd(i);
 
if (s && i->srcExists(0))
setARegBits(SDATA(i->src(0)).id + 1);
}
 
void
CodeEmitterNV50::emitIMUL(const Instruction *i)
{
code[0] = 0x40000000;
 
if (i->encSize == 8) {
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
emitForm_MAD(i);
} else {
if (i->sType == TYPE_S16)
code[0] |= 0x8100;
emitForm_MUL(i);
}
}
 
void
CodeEmitterNV50::emitFMUL(const Instruction *i)
{
const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
 
code[0] = 0xc0000000;
 
if (i->src(1).getFile() == FILE_IMMEDIATE) {
code[1] = 0;
emitForm_IMM(i);
if (neg)
code[0] |= 0x8000;
if (i->saturate)
code[0] |= 1 << 8;
} else
if (i->encSize == 8) {
code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
if (neg)
code[1] |= 0x08000000;
if (i->saturate)
code[1] |= 1 << 20;
emitForm_MAD(i);
} else {
emitForm_MUL(i);
if (neg)
code[0] |= 0x8000;
if (i->saturate)
code[0] |= 1 << 8;
}
}
 
void
CodeEmitterNV50::emitIMAD(const Instruction *i)
{
code[0] = 0x60000000;
if (isSignedType(i->sType))
code[1] = i->saturate ? 0x40000000 : 0x20000000;
else
code[1] = 0x00000000;
 
int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
int neg2 = i->src(2).mod.neg();
 
assert(!(neg1 & neg2));
code[1] |= neg1 << 27;
code[1] |= neg2 << 26;
 
emitForm_MAD(i);
 
if (i->flagsSrc >= 0) {
// add with carry from $cX
assert(!(code[1] & 0x0c000000) && !i->getPredicate());
code[1] |= 0xc << 24;
srcId(i->src(i->flagsSrc), 32 + 12);
}
}
 
void
CodeEmitterNV50::emitISAD(const Instruction *i)
{
if (i->encSize == 8) {
code[0] = 0x50000000;
switch (i->sType) {
case TYPE_U32: code[1] = 0x04000000; break;
case TYPE_S32: code[1] = 0x0c000000; break;
case TYPE_U16: code[1] = 0x00000000; break;
case TYPE_S16: code[1] = 0x08000000; break;
default:
assert(0);
break;
}
emitForm_MAD(i);
} else {
switch (i->sType) {
case TYPE_U32: code[0] = 0x50008000; break;
case TYPE_S32: code[0] = 0x50008100; break;
case TYPE_U16: code[0] = 0x50000000; break;
case TYPE_S16: code[0] = 0x50000100; break;
default:
assert(0);
break;
}
emitForm_MUL(i);
}
}
 
void
CodeEmitterNV50::emitSET(const Instruction *i)
{
code[0] = 0x30000000;
code[1] = 0x60000000;
 
emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
 
switch (i->sType) {
case TYPE_F32: code[0] |= 0x80000000; break;
case TYPE_S32: code[1] |= 0x0c000000; break;
case TYPE_U32: code[1] |= 0x04000000; break;
case TYPE_S16: code[1] |= 0x08000000; break;
case TYPE_U16: break;
default:
assert(0);
break;
}
if (i->src(0).mod.neg()) code[1] |= 0x04000000;
if (i->src(1).mod.neg()) code[1] |= 0x08000000;
if (i->src(0).mod.abs()) code[1] |= 0x00100000;
if (i->src(1).mod.abs()) code[1] |= 0x00080000;
 
emitForm_MAD(i);
}
 
void
CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
{
switch (rnd) {
case ROUND_NI: code[1] |= 0x08000000; break;
case ROUND_M: code[1] |= 0x00020000; break;
case ROUND_MI: code[1] |= 0x08020000; break;
case ROUND_P: code[1] |= 0x00040000; break;
case ROUND_PI: code[1] |= 0x08040000; break;
case ROUND_Z: code[1] |= 0x00060000; break;
case ROUND_ZI: code[1] |= 0x08060000; break;
default:
assert(rnd == ROUND_N);
break;
}
}
 
void
CodeEmitterNV50::emitCVT(const Instruction *i)
{
const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
RoundMode rnd;
DataType dType;
 
switch (i->op) {
case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
default:
rnd = i->rnd;
break;
}
 
if (i->op == OP_NEG && i->dType == TYPE_U32)
dType = TYPE_S32;
else
dType = i->dType;
 
code[0] = 0xa0000000;
 
switch (dType) {
case TYPE_F64:
switch (i->sType) {
case TYPE_F64: code[1] = 0xc4404000; break;
case TYPE_S64: code[1] = 0x44414000; break;
case TYPE_U64: code[1] = 0x44404000; break;
case TYPE_F32: code[1] = 0xc4400000; break;
case TYPE_S32: code[1] = 0x44410000; break;
case TYPE_U32: code[1] = 0x44400000; break;
default:
assert(0);
break;
}
break;
case TYPE_S64:
switch (i->sType) {
case TYPE_F64: code[1] = 0x8c404000; break;
case TYPE_F32: code[1] = 0x8c400000; break;
default:
assert(0);
break;
}
break;
case TYPE_U64:
switch (i->sType) {
case TYPE_F64: code[1] = 0x84404000; break;
case TYPE_F32: code[1] = 0x84400000; break;
default:
assert(0);
break;
}
break;
case TYPE_F32:
switch (i->sType) {
case TYPE_F64: code[1] = 0xc0404000; break;
case TYPE_S64: code[1] = 0x40414000; break;
case TYPE_U64: code[1] = 0x40404000; break;
case TYPE_F32: code[1] = 0xc4004000; break;
case TYPE_S32: code[1] = 0x44014000; break;
case TYPE_U32: code[1] = 0x44004000; break;
case TYPE_F16: code[1] = 0xc4000000; break;
case TYPE_U16: code[1] = 0x44000000; break;
default:
assert(0);
break;
}
break;
case TYPE_S32:
switch (i->sType) {
case TYPE_F64: code[1] = 0x88404000; break;
case TYPE_F32: code[1] = 0x8c004000; break;
case TYPE_S32: code[1] = 0x0c014000; break;
case TYPE_U32: code[1] = 0x0c004000; break;
case TYPE_F16: code[1] = 0x8c000000; break;
case TYPE_S16: code[1] = 0x0c010000; break;
case TYPE_U16: code[1] = 0x0c000000; break;
case TYPE_S8: code[1] = 0x0c018000; break;
case TYPE_U8: code[1] = 0x0c008000; break;
default:
assert(0);
break;
}
break;
case TYPE_U32:
switch (i->sType) {
case TYPE_F64: code[1] = 0x80404000; break;
case TYPE_F32: code[1] = 0x84004000; break;
case TYPE_S32: code[1] = 0x04014000; break;
case TYPE_U32: code[1] = 0x04004000; break;
case TYPE_F16: code[1] = 0x84000000; break;
case TYPE_S16: code[1] = 0x04010000; break;
case TYPE_U16: code[1] = 0x04000000; break;
case TYPE_S8: code[1] = 0x04018000; break;
case TYPE_U8: code[1] = 0x04008000; break;
default:
assert(0);
break;
}
break;
case TYPE_S16:
case TYPE_U16:
case TYPE_S8:
case TYPE_U8:
default:
assert(0);
break;
}
if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
code[1] |= 0x00004000;
 
roundMode_CVT(rnd);
 
switch (i->op) {
case OP_ABS: code[1] |= 1 << 20; break;
case OP_SAT: code[1] |= 1 << 19; break;
case OP_NEG: code[1] |= 1 << 29; break;
default:
break;
}
code[1] ^= i->src(0).mod.neg() << 29;
code[1] |= i->src(0).mod.abs() << 20;
if (i->saturate)
code[1] |= 1 << 19;
 
assert(i->op != OP_ABS || !i->src(0).mod.neg());
 
emitForm_MAD(i);
}
 
void
CodeEmitterNV50::emitPreOp(const Instruction *i)
{
code[0] = 0xb0000000;
code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
 
code[1] |= i->src(0).mod.abs() << 20;
code[1] |= i->src(0).mod.neg() << 26;
 
emitForm_MAD(i);
}
 
void
CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
{
code[0] = 0x90000000;
 
if (i->encSize == 4) {
assert(i->op == OP_RCP);
code[0] |= i->src(0).mod.abs() << 15;
code[0] |= i->src(0).mod.neg() << 22;
emitForm_MUL(i);
} else {
code[1] = subOp << 29;
code[1] |= i->src(0).mod.abs() << 20;
code[1] |= i->src(0).mod.neg() << 26;
emitForm_MAD(i);
}
}
 
void
CodeEmitterNV50::emitNOT(const Instruction *i)
{
code[0] = 0xd0000000;
code[1] = 0x0002c000;
 
switch (i->sType) {
case TYPE_U32:
case TYPE_S32:
code[1] |= 0x04000000;
break;
default:
break;
}
emitForm_MAD(i);
setSrc(i, 0, 1);
}
 
void
CodeEmitterNV50::emitLogicOp(const Instruction *i)
{
code[0] = 0xd0000000;
code[1] = 0;
 
if (i->src(1).getFile() == FILE_IMMEDIATE) {
switch (i->op) {
case OP_OR: code[0] |= 0x0100; break;
case OP_XOR: code[0] |= 0x8000; break;
default:
assert(i->op == OP_AND);
break;
}
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
code[0] |= 1 << 22;
 
emitForm_IMM(i);
} else {
switch (i->op) {
case OP_AND: code[1] = 0x04000000; break;
case OP_OR: code[1] = 0x04004000; break;
case OP_XOR: code[1] = 0x04008000; break;
default:
assert(0);
break;
}
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 16;
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 17;
 
emitForm_MAD(i);
}
}
 
void
CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
{
code[0] = 0x00000001 | (shl << 16);
code[1] = 0xc0000000;
 
code[0] |= (DDATA(i->def(0)).id + 1) << 2;
 
setSrcFileBits(i, NV50_OP_ENC_IMM);
setSrc(i, 0, 0);
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitShift(const Instruction *i)
{
if (i->def(0).getFile() == FILE_ADDRESS) {
assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
} else {
code[0] = 0x30000001;
code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
if (i->op == OP_SHR && isSignedType(i->sType))
code[1] |= 1 << 27;
 
if (i->src(1).getFile() == FILE_IMMEDIATE) {
code[1] |= 1 << 20;
code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
defId(i->def(0), 2);
srcId(i->src(0), 9);
emitFlagsRd(i);
} else {
emitForm_MAD(i);
}
}
}
 
void
CodeEmitterNV50::emitOUT(const Instruction *i)
{
code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
code[1] = 0xc0000000;
 
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitTEX(const TexInstruction *i)
{
code[0] = 0xf0000001;
code[1] = 0x00000000;
 
switch (i->op) {
case OP_TXB:
code[1] = 0x20000000;
break;
case OP_TXL:
code[1] = 0x40000000;
break;
case OP_TXF:
code[0] |= 0x01000000;
break;
case OP_TXG:
code[0] |= 0x01000000;
code[1] = 0x80000000;
break;
case OP_TXLQ:
code[1] = 0x60020000;
break;
default:
assert(i->op == OP_TEX);
break;
}
 
code[0] |= i->tex.r << 9;
code[0] |= i->tex.s << 17;
 
int argc = i->tex.target.getArgCount();
 
if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
argc += 1;
if (i->tex.target.isShadow())
argc += 1;
assert(argc <= 4);
 
code[0] |= (argc - 1) << 22;
 
if (i->tex.target.isCube()) {
code[0] |= 0x08000000;
} else
if (i->tex.useOffsets) {
code[1] |= (i->tex.offset[0] & 0xf) << 24;
code[1] |= (i->tex.offset[1] & 0xf) << 20;
code[1] |= (i->tex.offset[2] & 0xf) << 16;
}
 
code[0] |= (i->tex.mask & 0x3) << 25;
code[1] |= (i->tex.mask & 0xc) << 12;
 
if (i->tex.liveOnly)
code[1] |= 4;
 
defId(i->def(0), 2);
 
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitTXQ(const TexInstruction *i)
{
assert(i->tex.query == TXQ_DIMS);
 
code[0] = 0xf0000001;
code[1] = 0x60000000;
 
code[0] |= i->tex.r << 9;
code[0] |= i->tex.s << 17;
 
code[0] |= (i->tex.mask & 0x3) << 25;
code[1] |= (i->tex.mask & 0xc) << 12;
 
defId(i->def(0), 2);
 
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
{
code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
code[1] = 0x60010000;
 
code[0] |= (i->tex.mask & 0x3) << 25;
code[1] |= (i->tex.mask & 0xc) << 12;
defId(i->def(0), 2);
 
emitFlagsRd(i);
}
 
void
CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
{
uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
 
code[0] = 0x10000003; // bra
code[1] = 0x00000780; // always
 
switch (i->subOp) {
case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
break;
case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
pos += 8;
break;
default:
assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
code[0] = 0x20000003; // call
code[1] = 0x00000000; // no predicate
break;
}
addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
}
 
void
CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
{
const FlowInstruction *f = i->asFlow();
bool hasPred = false;
bool hasTarg = false;
 
code[0] = 0x00000003 | (flowOp << 28);
code[1] = 0x00000000;
 
switch (i->op) {
case OP_BRA:
hasPred = true;
hasTarg = true;
break;
case OP_BREAK:
case OP_BRKPT:
case OP_DISCARD:
case OP_RET:
hasPred = true;
break;
case OP_CALL:
case OP_PREBREAK:
case OP_JOINAT:
hasTarg = true;
break;
case OP_PRERET:
hasTarg = true;
if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
emitPRERETEmu(f);
return;
}
break;
default:
break;
}
 
if (hasPred)
emitFlagsRd(i);
 
if (hasTarg && f) {
uint32_t pos;
 
if (f->op == OP_CALL) {
if (f->builtin) {
pos = targNV50->getBuiltinOffset(f->target.builtin);
} else {
pos = f->target.fn->binPos;
}
} else {
pos = f->target.bb->binPos;
}
 
code[0] |= ((pos >> 2) & 0xffff) << 11;
code[1] |= ((pos >> 18) & 0x003f) << 14;
 
RelocEntry::Type relocTy;
 
relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
 
addReloc(relocTy, 0, pos, 0x07fff800, 9);
addReloc(relocTy, 1, pos, 0x000fc000, -4);
}
}
 
void
CodeEmitterNV50::emitBAR(const Instruction *i)
{
ImmediateValue *barId = i->getSrc(0)->asImm();
assert(barId);
 
code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
code[1] = 0x00004000;
 
if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
code[0] |= 1 << 26;
}
 
void
CodeEmitterNV50::emitATOM(const Instruction *i)
{
uint8_t subOp;
switch (i->subOp) {
case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
default:
assert(!"invalid subop");
return;
}
code[0] = 0xd0000001;
code[1] = 0xe0c00000 | (subOp << 2);
if (isSignedType(i->dType))
code[1] |= 1 << 21;
 
// args
emitFlagsRd(i);
setDst(i, 0);
setSrc(i, 1, 1);
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
setSrc(i, 2, 2);
 
// g[] pointer
code[0] |= i->getSrc(0)->reg.fileIndex << 23;
srcId(i->getIndirect(0, 0), 9);
}
 
bool
CodeEmitterNV50::emitInstruction(Instruction *insn)
{
if (!insn->encSize) {
ERROR("skipping unencodable instruction: "); insn->print();
return false;
} else
if (codeSize + insn->encSize > codeSizeLimit) {
ERROR("code emitter output buffer too small\n");
return false;
}
 
if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
INFO("EMIT: "); insn->print();
}
 
switch (insn->op) {
case OP_MOV:
emitMOV(insn);
break;
case OP_EXIT:
case OP_NOP:
case OP_JOIN:
emitNOP();
break;
case OP_VFETCH:
case OP_LOAD:
emitLOAD(insn);
break;
case OP_EXPORT:
case OP_STORE:
emitSTORE(insn);
break;
case OP_PFETCH:
emitPFETCH(insn);
break;
case OP_RDSV:
emitRDSV(insn);
break;
case OP_LINTERP:
case OP_PINTERP:
emitINTERP(insn);
break;
case OP_ADD:
case OP_SUB:
if (isFloatType(insn->dType))
emitFADD(insn);
else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
emitAADD(insn);
else
emitUADD(insn);
break;
case OP_MUL:
if (isFloatType(insn->dType))
emitFMUL(insn);
else
emitIMUL(insn);
break;
case OP_MAD:
case OP_FMA:
if (isFloatType(insn->dType))
emitFMAD(insn);
else
emitIMAD(insn);
break;
case OP_SAD:
emitISAD(insn);
break;
case OP_NOT:
emitNOT(insn);
break;
case OP_AND:
case OP_OR:
case OP_XOR:
emitLogicOp(insn);
break;
case OP_SHL:
case OP_SHR:
emitShift(insn);
break;
case OP_SET:
emitSET(insn);
break;
case OP_MIN:
case OP_MAX:
emitMINMAX(insn);
break;
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_ABS:
case OP_NEG:
case OP_SAT:
emitCVT(insn);
break;
case OP_CVT:
if (insn->def(0).getFile() == FILE_ADDRESS)
emitARL(insn, 0);
else
if (insn->def(0).getFile() == FILE_FLAGS ||
insn->src(0).getFile() == FILE_FLAGS ||
insn->src(0).getFile() == FILE_ADDRESS)
emitMOV(insn);
else
emitCVT(insn);
break;
case OP_RCP:
emitSFnOp(insn, 0);
break;
case OP_RSQ:
emitSFnOp(insn, 2);
break;
case OP_LG2:
emitSFnOp(insn, 3);
break;
case OP_SIN:
emitSFnOp(insn, 4);
break;
case OP_COS:
emitSFnOp(insn, 5);
break;
case OP_EX2:
emitSFnOp(insn, 6);
break;
case OP_PRESIN:
case OP_PREEX2:
emitPreOp(insn);
break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXF:
case OP_TXG:
case OP_TXLQ:
emitTEX(insn->asTex());
break;
case OP_TXQ:
emitTXQ(insn->asTex());
break;
case OP_TEXPREP:
emitTEXPREP(insn->asTex());
break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
break;
case OP_DISCARD:
emitFlow(insn, 0x0);
break;
case OP_BRA:
emitFlow(insn, 0x1);
break;
case OP_CALL:
emitFlow(insn, 0x2);
break;
case OP_RET:
emitFlow(insn, 0x3);
break;
case OP_PREBREAK:
emitFlow(insn, 0x4);
break;
case OP_BREAK:
emitFlow(insn, 0x5);
break;
case OP_QUADON:
emitFlow(insn, 0x6);
break;
case OP_QUADPOP:
emitFlow(insn, 0x7);
break;
case OP_JOINAT:
emitFlow(insn, 0xa);
break;
case OP_PRERET:
emitFlow(insn, 0xd);
break;
case OP_QUADOP:
emitQUADOP(insn, insn->lanes, insn->subOp);
break;
case OP_DFDX:
emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
break;
case OP_DFDY:
emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
break;
case OP_ATOM:
emitATOM(insn);
break;
case OP_BAR:
emitBAR(insn);
break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:
ERROR("operation should have been eliminated\n");
return false;
case OP_EXP:
case OP_LOG:
case OP_SQRT:
case OP_POW:
case OP_SELP:
case OP_SLCT:
case OP_TXD:
case OP_PRECONT:
case OP_CONT:
case OP_POPCNT:
case OP_INSBF:
case OP_EXTBF:
ERROR("operation should have been lowered\n");
return false;
default:
ERROR("unknown op: %u\n", insn->op);
return false;
}
if (insn->join || insn->op == OP_JOIN)
code[1] |= 0x2;
else
if (insn->exit || insn->op == OP_EXIT)
code[1] |= 0x1;
 
assert((insn->encSize == 8) == (code[0] & 1));
 
code += insn->encSize / 4;
codeSize += insn->encSize;
return true;
}
 
uint32_t
CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
{
const Target::OpInfo &info = targ->getOpInfo(i);
 
if (info.minEncSize > 4)
return 8;
 
// check constraints on dst and src operands
for (int d = 0; i->defExists(d); ++d) {
if (i->def(d).rep()->reg.data.id > 63 ||
i->def(d).rep()->reg.file != FILE_GPR)
return 8;
}
 
for (int s = 0; i->srcExists(s); ++s) {
DataFile sf = i->src(s).getFile();
if (sf != FILE_GPR)
if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
return 8;
if (i->src(s).rep()->reg.data.id > 63)
return 8;
}
 
// check modifiers & rounding
if (i->join || i->lanes != 0xf || i->exit)
return 8;
if (i->op == OP_MUL && i->rnd != ROUND_N)
return 8;
 
if (i->asTex())
return 8; // TODO: short tex encoding
 
// check constraints on short MAD
if (info.srcNr >= 2 && i->srcExists(2)) {
if (!i->defExists(0) || !isFloatType(i->dType) ||
i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
return 8;
}
 
return info.minEncSize;
}
 
// Change the encoding size of an instruction after BBs have been scheduled.
static void
makeInstructionLong(Instruction *insn)
{
if (insn->encSize == 8)
return;
Function *fn = insn->bb->getFunction();
int n = 0;
int adj = 4;
 
for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
 
if (n & 1) {
adj = 8;
insn->next->encSize = 8;
} else
if (insn->prev && insn->prev->encSize == 4) {
adj = 8;
insn->prev->encSize = 8;
}
insn->encSize = 8;
 
for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
fn->bbArray[i]->binPos += 4;
}
fn->binSize += adj;
insn->bb->binSize += adj;
}
 
static bool
trySetExitModifier(Instruction *insn)
{
if (insn->op == OP_DISCARD ||
insn->op == OP_QUADON ||
insn->op == OP_QUADPOP)
return false;
for (int s = 0; insn->srcExists(s); ++s)
if (insn->src(s).getFile() == FILE_IMMEDIATE)
return false;
if (insn->asFlow()) {
if (insn->op == OP_CALL) // side effects !
return false;
if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
return false;
insn->op = OP_EXIT;
}
insn->exit = 1;
makeInstructionLong(insn);
return true;
}
 
static void
replaceExitWithModifier(Function *func)
{
BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
 
if (!epilogue->getExit() ||
epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
return;
 
if (epilogue->getEntry()->op != OP_EXIT) {
Instruction *insn = epilogue->getExit()->prev;
if (!insn || !trySetExitModifier(insn))
return;
insn->exit = 1;
} else {
for (Graph::EdgeIterator ei = func->cfgExit->incident();
!ei.end(); ei.next()) {
BasicBlock *bb = BasicBlock::get(ei.getNode());
Instruction *i = bb->getExit();
 
if (!i || !trySetExitModifier(i))
return;
}
}
epilogue->binSize -= 8;
func->binSize -= 8;
delete_Instruction(func->getProgram(), epilogue->getExit());
}
 
void
CodeEmitterNV50::prepareEmission(Function *func)
{
CodeEmitter::prepareEmission(func);
 
replaceExitWithModifier(func);
}
 
CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
CodeEmitter(target), targNV50(target)
{
targ = target; // specialized
code = NULL;
codeSize = codeSizeLimit = 0;
relocInfo = NULL;
}
 
CodeEmitter *
TargetNV50::getCodeEmitter(Program::Type type)
{
CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
emit->setProgramType(type);
return emit;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
0,0 → 1,3097
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_target_nvc0.h"
 
namespace nv50_ir {
 
// Argh, all these assertions ...
 
class CodeEmitterNVC0 : public CodeEmitter
{
public:
CodeEmitterNVC0(const TargetNVC0 *);
 
virtual bool emitInstruction(Instruction *);
virtual uint32_t getMinEncodingSize(const Instruction *) const;
virtual void prepareEmission(Function *);
 
inline void setProgramType(Program::Type pType) { progType = pType; }
 
private:
const TargetNVC0 *targNVC0;
 
Program::Type progType;
 
const bool writeIssueDelays;
 
private:
void emitForm_A(const Instruction *, uint64_t);
void emitForm_B(const Instruction *, uint64_t);
void emitForm_S(const Instruction *, uint32_t, bool pred);
 
void emitPredicate(const Instruction *);
 
void setAddress16(const ValueRef&);
void setAddress24(const ValueRef&);
void setAddressByFile(const ValueRef&);
void setImmediate(const Instruction *, const int s); // needs op already set
void setImmediateS8(const ValueRef&);
void setSUConst16(const Instruction *, const int s);
void setSUPred(const Instruction *, const int s);
 
void emitCondCode(CondCode cc, int pos);
void emitInterpMode(const Instruction *);
void emitLoadStoreType(DataType ty);
void emitSUGType(DataType);
void emitCachingMode(CacheMode c);
 
void emitShortSrc2(const ValueRef&);
 
inline uint8_t getSRegEncoding(const ValueRef&);
 
void roundMode_A(const Instruction *);
void roundMode_C(const Instruction *);
void roundMode_CS(const Instruction *);
 
void emitNegAbs12(const Instruction *);
 
void emitNOP(const Instruction *);
 
void emitLOAD(const Instruction *);
void emitSTORE(const Instruction *);
void emitMOV(const Instruction *);
void emitATOM(const Instruction *);
void emitMEMBAR(const Instruction *);
void emitCCTL(const Instruction *);
 
void emitINTERP(const Instruction *);
void emitPFETCH(const Instruction *);
void emitVFETCH(const Instruction *);
void emitEXPORT(const Instruction *);
void emitOUT(const Instruction *);
 
void emitUADD(const Instruction *);
void emitFADD(const Instruction *);
void emitDADD(const Instruction *);
void emitUMUL(const Instruction *);
void emitFMUL(const Instruction *);
void emitDMUL(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
void emitFMAD(const Instruction *);
void emitDMAD(const Instruction *);
void emitMADSP(const Instruction *);
 
void emitNOT(Instruction *);
void emitLogicOp(const Instruction *, uint8_t subOp);
void emitPOPC(const Instruction *);
void emitINSBF(const Instruction *);
void emitEXTBF(const Instruction *);
void emitBFIND(const Instruction *);
void emitPERMT(const Instruction *);
void emitShift(const Instruction *);
 
void emitSFnOp(const Instruction *, uint8_t subOp);
 
void emitCVT(Instruction *);
void emitMINMAX(const Instruction *);
void emitPreOp(const Instruction *);
 
void emitSET(const CmpInstruction *);
void emitSLCT(const CmpInstruction *);
void emitSELP(const Instruction *);
 
void emitTEXBAR(const Instruction *);
void emitTEX(const TexInstruction *);
void emitTEXCSAA(const TexInstruction *);
void emitTXQ(const TexInstruction *);
 
void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
 
void emitFlow(const Instruction *);
void emitBAR(const Instruction *);
 
void emitSUCLAMPMode(uint16_t);
void emitSUCalc(Instruction *);
void emitSULDGB(const TexInstruction *);
void emitSUSTGx(const TexInstruction *);
 
void emitVSHL(const Instruction *);
void emitVectorSubOp(const Instruction *);
 
void emitPIXLD(const Instruction *);
 
inline void defId(const ValueDef&, const int pos);
inline void defId(const Instruction *, int d, const int pos);
inline void srcId(const ValueRef&, const int pos);
inline void srcId(const ValueRef *, const int pos);
inline void srcId(const Instruction *, int s, const int pos);
inline void srcAddr32(const ValueRef&, int pos, int shr);
 
inline bool isLIMM(const ValueRef&, DataType ty);
};
 
// for better visibility
#define HEX64(h, l) 0x##h##l##ULL
 
#define SDATA(a) ((a).rep()->reg.data)
#define DDATA(a) ((a).rep()->reg.data)
 
void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
{
code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
}
 
void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
{
code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
}
 
void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
{
int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
code[pos / 32] |= r << (pos % 32);
}
 
void
CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
{
const uint32_t offset = SDATA(src).offset >> shr;
 
code[pos / 32] |= offset << (pos % 32);
if (pos && (pos < 32))
code[1] |= offset >> (32 - pos);
}
 
void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
{
code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
}
 
void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
{
int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
code[pos / 32] |= r << (pos % 32);
}
 
bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
{
const ImmediateValue *imm = ref.get()->asImm();
 
return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
}
 
void
CodeEmitterNVC0::roundMode_A(const Instruction *insn)
{
switch (insn->rnd) {
case ROUND_M: code[1] |= 1 << 23; break;
case ROUND_P: code[1] |= 2 << 23; break;
case ROUND_Z: code[1] |= 3 << 23; break;
default:
assert(insn->rnd == ROUND_N);
break;
}
}
 
void
CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
{
if (i->src(1).mod.abs()) code[0] |= 1 << 6;
if (i->src(0).mod.abs()) code[0] |= 1 << 7;
if (i->src(1).mod.neg()) code[0] |= 1 << 8;
if (i->src(0).mod.neg()) code[0] |= 1 << 9;
}
 
void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
{
uint8_t val;
 
switch (cc) {
case CC_LT: val = 0x1; break;
case CC_LTU: val = 0x9; break;
case CC_EQ: val = 0x2; break;
case CC_EQU: val = 0xa; break;
case CC_LE: val = 0x3; break;
case CC_LEU: val = 0xb; break;
case CC_GT: val = 0x4; break;
case CC_GTU: val = 0xc; break;
case CC_NE: val = 0x5; break;
case CC_NEU: val = 0xd; break;
case CC_GE: val = 0x6; break;
case CC_GEU: val = 0xe; break;
case CC_TR: val = 0xf; break;
case CC_FL: val = 0x0; break;
 
case CC_A: val = 0x14; break;
case CC_NA: val = 0x13; break;
case CC_S: val = 0x15; break;
case CC_NS: val = 0x12; break;
case CC_C: val = 0x16; break;
case CC_NC: val = 0x11; break;
case CC_O: val = 0x17; break;
case CC_NO: val = 0x10; break;
 
default:
val = 0;
assert(!"invalid condition code");
break;
}
code[pos / 32] |= val << (pos % 32);
}
 
void
CodeEmitterNVC0::emitPredicate(const Instruction *i)
{
if (i->predSrc >= 0) {
assert(i->getPredicate()->reg.file == FILE_PREDICATE);
srcId(i->src(i->predSrc), 10);
if (i->cc == CC_NOT_P)
code[0] |= 0x2000; // negate
} else {
code[0] |= 0x1c00;
}
}
 
void
CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
{
switch (src.getFile()) {
case FILE_MEMORY_GLOBAL:
srcAddr32(src, 26, 0);
break;
case FILE_MEMORY_LOCAL:
case FILE_MEMORY_SHARED:
setAddress24(src);
break;
default:
assert(src.getFile() == FILE_MEMORY_CONST);
setAddress16(src);
break;
}
}
 
void
CodeEmitterNVC0::setAddress16(const ValueRef& src)
{
Symbol *sym = src.get()->asSym();
 
assert(sym);
 
code[0] |= (sym->reg.data.offset & 0x003f) << 26;
code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
}
 
void
CodeEmitterNVC0::setAddress24(const ValueRef& src)
{
Symbol *sym = src.get()->asSym();
 
assert(sym);
 
code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
}
 
void
CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
{
const ImmediateValue *imm = i->src(s).get()->asImm();
uint32_t u32;
 
assert(imm);
u32 = imm->reg.data.u32;
 
if ((code[0] & 0xf) == 0x2) {
// LIMM
code[0] |= (u32 & 0x3f) << 26;
code[1] |= u32 >> 6;
} else
if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
// integer immediate
assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
assert(!(code[1] & 0xc000));
u32 &= 0xfffff;
code[0] |= (u32 & 0x3f) << 26;
code[1] |= 0xc000 | (u32 >> 6);
} else {
// float immediate
assert(!(u32 & 0x00000fff));
assert(!(code[1] & 0xc000));
code[0] |= ((u32 >> 12) & 0x3f) << 26;
code[1] |= 0xc000 | (u32 >> 18);
}
}
 
void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
{
const ImmediateValue *imm = ref.get()->asImm();
 
int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
 
assert(s8 == imm->reg.data.s32);
 
code[0] |= (s8 & 0x3f) << 26;
code[0] |= (s8 >> 6) << 8;
}
 
void
CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
{
code[0] = opc;
code[1] = opc >> 32;
 
emitPredicate(i);
 
defId(i->def(0), 14);
 
int s1 = 26;
if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
s1 = 49;
 
for (int s = 0; s < 3 && i->srcExists(s); ++s) {
switch (i->getSrc(s)->reg.file) {
case FILE_MEMORY_CONST:
assert(!(code[1] & 0xc000));
code[1] |= (s == 2) ? 0x8000 : 0x4000;
code[1] |= i->getSrc(s)->reg.fileIndex << 10;
setAddress16(i->src(s));
break;
case FILE_IMMEDIATE:
assert(s == 1 ||
i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
assert(!(code[1] & 0xc000));
setImmediate(i, s);
break;
case FILE_GPR:
if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
break;
srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
break;
default:
// ignore here, can be predicate or flags, but must not be address
break;
}
}
}
 
void
CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
{
code[0] = opc;
code[1] = opc >> 32;
 
emitPredicate(i);
 
defId(i->def(0), 14);
 
switch (i->src(0).getFile()) {
case FILE_MEMORY_CONST:
assert(!(code[1] & 0xc000));
code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
setAddress16(i->src(0));
break;
case FILE_IMMEDIATE:
assert(!(code[1] & 0xc000));
setImmediate(i, 0);
break;
case FILE_GPR:
srcId(i->src(0), 26);
break;
default:
// ignore here, can be predicate or flags, but must not be address
break;
}
}
 
void
CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
{
code[0] = opc;
 
int ss2a = 0;
if (opc == 0x0d || opc == 0x0e)
ss2a = 2;
 
defId(i->def(0), 14);
srcId(i->src(0), 20);
 
assert(pred || (i->predSrc < 0));
if (pred)
emitPredicate(i);
 
for (int s = 1; s < 3 && i->srcExists(s); ++s) {
if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
assert(!(code[0] & (0x300 >> ss2a)));
switch (i->src(s).get()->reg.fileIndex) {
case 0: code[0] |= 0x100 >> ss2a; break;
case 1: code[0] |= 0x200 >> ss2a; break;
case 16: code[0] |= 0x300 >> ss2a; break;
default:
ERROR("invalid c[] space for short form\n");
break;
}
if (s == 1)
code[0] |= i->getSrc(s)->reg.data.offset << 24;
else
code[0] |= i->getSrc(s)->reg.data.offset << 6;
} else
if (i->src(s).getFile() == FILE_IMMEDIATE) {
assert(s == 1);
setImmediateS8(i->src(s));
} else
if (i->src(s).getFile() == FILE_GPR) {
srcId(i->src(s), (s == 1) ? 26 : 8);
}
}
}
 
void
CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
{
if (src.getFile() == FILE_MEMORY_CONST) {
switch (src.get()->reg.fileIndex) {
case 0: code[0] |= 0x100; break;
case 1: code[0] |= 0x200; break;
case 16: code[0] |= 0x300; break;
default:
assert(!"unsupported file index for short op");
break;
}
srcAddr32(src, 20, 2);
} else {
srcId(src, 20);
assert(src.getFile() == FILE_GPR);
}
}
 
void
CodeEmitterNVC0::emitNOP(const Instruction *i)
{
code[0] = 0x000001e4;
code[1] = 0x40000000;
emitPredicate(i);
}
 
void
CodeEmitterNVC0::emitFMAD(const Instruction *i)
{
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
 
if (i->encSize == 8) {
if (isLIMM(i->src(1), TYPE_F32)) {
emitForm_A(i, HEX64(20000000, 00000002));
} else {
emitForm_A(i, HEX64(30000000, 00000000));
 
if (i->src(2).mod.neg())
code[0] |= 1 << 8;
}
roundMode_A(i);
 
if (neg1)
code[0] |= 1 << 9;
 
if (i->saturate)
code[0] |= 1 << 5;
if (i->ftz)
code[0] |= 1 << 6;
} else {
assert(!i->saturate && !i->src(2).mod.neg());
emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
false);
if (neg1)
code[0] |= 1 << 4;
}
}
 
void
CodeEmitterNVC0::emitDMAD(const Instruction *i)
{
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
 
emitForm_A(i, HEX64(20000000, 00000001));
 
if (i->src(2).mod.neg())
code[0] |= 1 << 8;
 
roundMode_A(i);
 
if (neg1)
code[0] |= 1 << 9;
 
assert(!i->saturate);
assert(!i->ftz);
}
 
void
CodeEmitterNVC0::emitFMUL(const Instruction *i)
{
bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
 
assert(i->postFactor >= -3 && i->postFactor <= 3);
 
if (i->encSize == 8) {
if (isLIMM(i->src(1), TYPE_F32)) {
assert(i->postFactor == 0); // constant folded, hopefully
emitForm_A(i, HEX64(30000000, 00000002));
} else {
emitForm_A(i, HEX64(58000000, 00000000));
roundMode_A(i);
code[1] |= ((i->postFactor > 0) ?
(7 - i->postFactor) : (0 - i->postFactor)) << 17;
}
if (neg)
code[1] ^= 1 << 25; // aliases with LIMM sign bit
 
if (i->saturate)
code[0] |= 1 << 5;
 
if (i->dnz)
code[0] |= 1 << 7;
else
if (i->ftz)
code[0] |= 1 << 6;
} else {
assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
emitForm_S(i, 0xa8, true);
}
}
 
void
CodeEmitterNVC0::emitDMUL(const Instruction *i)
{
bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
 
emitForm_A(i, HEX64(50000000, 00000001));
roundMode_A(i);
 
if (neg)
code[0] |= 1 << 9;
 
assert(!i->saturate);
assert(!i->ftz);
assert(!i->dnz);
assert(!i->postFactor);
}
 
void
CodeEmitterNVC0::emitUMUL(const Instruction *i)
{
if (i->encSize == 8) {
if (i->src(1).getFile() == FILE_IMMEDIATE) {
emitForm_A(i, HEX64(10000000, 00000002));
} else {
emitForm_A(i, HEX64(50000000, 00000003));
}
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
code[0] |= 1 << 6;
if (i->sType == TYPE_S32)
code[0] |= 1 << 5;
if (i->dType == TYPE_S32)
code[0] |= 1 << 7;
} else {
emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
 
if (i->sType == TYPE_S32)
code[0] |= 1 << 6;
}
}
 
void
CodeEmitterNVC0::emitFADD(const Instruction *i)
{
if (i->encSize == 8) {
if (isLIMM(i->src(1), TYPE_F32)) {
assert(!i->saturate);
emitForm_A(i, HEX64(28000000, 00000002));
 
code[0] |= i->src(0).mod.abs() << 7;
code[0] |= i->src(0).mod.neg() << 9;
 
if (i->src(1).mod.abs())
code[1] &= 0xfdffffff;
if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
code[1] ^= 0x02000000;
} else {
emitForm_A(i, HEX64(50000000, 00000000));
 
roundMode_A(i);
if (i->saturate)
code[1] |= 1 << 17;
 
emitNegAbs12(i);
if (i->op == OP_SUB) code[0] ^= 1 << 8;
}
if (i->ftz)
code[0] |= 1 << 5;
} else {
assert(!i->saturate && i->op != OP_SUB &&
!i->src(0).mod.abs() &&
!i->src(1).mod.neg() && !i->src(1).mod.abs());
 
emitForm_S(i, 0x49, true);
 
if (i->src(0).mod.neg())
code[0] |= 1 << 7;
}
}
 
void
CodeEmitterNVC0::emitDADD(const Instruction *i)
{
assert(i->encSize == 8);
emitForm_A(i, HEX64(48000000, 00000001));
roundMode_A(i);
assert(!i->saturate);
assert(!i->ftz);
emitNegAbs12(i);
if (i->op == OP_SUB)
code[0] ^= 1 << 8;
}
 
void
CodeEmitterNVC0::emitUADD(const Instruction *i)
{
uint32_t addOp = 0;
 
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
 
if (i->src(0).mod.neg())
addOp |= 0x200;
if (i->src(1).mod.neg())
addOp |= 0x100;
if (i->op == OP_SUB) {
addOp ^= 0x100;
assert(addOp != 0x300); // would be add-plus-one
}
 
if (i->encSize == 8) {
if (isLIMM(i->src(1), TYPE_U32)) {
emitForm_A(i, HEX64(08000000, 00000002));
if (i->defExists(1))
code[1] |= 1 << 26; // write carry
} else {
emitForm_A(i, HEX64(48000000, 00000003));
if (i->defExists(1))
code[1] |= 1 << 16; // write carry
}
code[0] |= addOp;
 
if (i->saturate)
code[0] |= 1 << 5;
if (i->flagsSrc >= 0) // add carry
code[0] |= 1 << 6;
} else {
assert(!(addOp & 0x100));
emitForm_S(i, (addOp >> 3) |
((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
}
}
 
// TODO: shl-add
void
CodeEmitterNVC0::emitIMAD(const Instruction *i)
{
assert(i->encSize == 8);
emitForm_A(i, HEX64(20000000, 00000003));
 
if (isSignedType(i->dType))
code[0] |= 1 << 7;
if (isSignedType(i->sType))
code[0] |= 1 << 5;
 
code[1] |= i->saturate << 24;
 
if (i->flagsDef >= 0) code[1] |= 1 << 16;
if (i->flagsSrc >= 0) code[1] |= 1 << 23;
 
if (i->src(2).mod.neg()) code[0] |= 0x10;
if (i->src(1).mod.neg() ^
i->src(0).mod.neg()) code[0] |= 0x20;
 
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
code[0] |= 1 << 6;
}
 
void
CodeEmitterNVC0::emitMADSP(const Instruction *i)
{
assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
 
emitForm_A(i, HEX64(00000000, 00000003));
 
if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
code[1] |= 0x01800000;
} else {
code[0] |= (i->subOp & 0x00f) << 7;
code[0] |= (i->subOp & 0x0f0) << 1;
code[0] |= (i->subOp & 0x100) >> 3;
code[0] |= (i->subOp & 0x200) >> 2;
code[1] |= (i->subOp & 0xc00) << 13;
}
 
if (i->flagsDef >= 0)
code[1] |= 1 << 16;
}
 
void
CodeEmitterNVC0::emitISAD(const Instruction *i)
{
assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
assert(i->encSize == 8);
 
emitForm_A(i, HEX64(38000000, 00000003));
 
if (i->dType == TYPE_S32)
code[0] |= 1 << 5;
}
 
void
CodeEmitterNVC0::emitNOT(Instruction *i)
{
assert(i->encSize == 8);
i->setSrc(1, i->src(0));
emitForm_A(i, HEX64(68000000, 000001c3));
}
 
void
CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
{
if (i->def(0).getFile() == FILE_PREDICATE) {
code[0] = 0x00000004 | (subOp << 30);
code[1] = 0x0c000000;
 
emitPredicate(i);
 
defId(i->def(0), 17);
srcId(i->src(0), 20);
if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
srcId(i->src(1), 26);
if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
 
if (i->defExists(1)) {
defId(i->def(1), 14);
} else {
code[0] |= 7 << 14;
}
// (a OP b) OP c
if (i->predSrc != 2 && i->srcExists(2)) {
code[1] |= subOp << 21;
srcId(i->src(2), 17);
if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
} else {
code[1] |= 0x000e0000;
}
} else
if (i->encSize == 8) {
if (isLIMM(i->src(1), TYPE_U32)) {
emitForm_A(i, HEX64(38000000, 00000002));
 
if (i->flagsDef >= 0)
code[1] |= 1 << 26;
} else {
emitForm_A(i, HEX64(68000000, 00000003));
 
if (i->flagsDef >= 0)
code[1] |= 1 << 16;
}
code[0] |= subOp << 6;
 
if (i->flagsSrc >= 0) // carry
code[0] |= 1 << 5;
 
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
} else {
emitForm_S(i, (subOp << 5) |
((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
}
}
 
void
CodeEmitterNVC0::emitPOPC(const Instruction *i)
{
emitForm_A(i, HEX64(54000000, 00000004));
 
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
}
 
void
CodeEmitterNVC0::emitINSBF(const Instruction *i)
{
emitForm_A(i, HEX64(28000000, 00000003));
}
 
void
CodeEmitterNVC0::emitEXTBF(const Instruction *i)
{
emitForm_A(i, HEX64(70000000, 00000003));
 
if (i->dType == TYPE_S32)
code[0] |= 1 << 5;
if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
code[0] |= 1 << 8;
}
 
void
CodeEmitterNVC0::emitBFIND(const Instruction *i)
{
emitForm_B(i, HEX64(78000000, 00000003));
 
if (i->dType == TYPE_S32)
code[0] |= 1 << 5;
if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
code[0] |= 1 << 8;
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
code[0] |= 1 << 6;
}
 
void
CodeEmitterNVC0::emitPERMT(const Instruction *i)
{
emitForm_A(i, HEX64(24000000, 00000004));
 
code[0] |= i->subOp << 5;
}
 
void
CodeEmitterNVC0::emitShift(const Instruction *i)
{
if (i->op == OP_SHR) {
emitForm_A(i, HEX64(58000000, 00000003)
| (isSignedType(i->dType) ? 0x20 : 0x00));
} else {
emitForm_A(i, HEX64(60000000, 00000003));
}
 
if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
code[0] |= 1 << 9;
}
 
void
CodeEmitterNVC0::emitPreOp(const Instruction *i)
{
if (i->encSize == 8) {
emitForm_B(i, HEX64(60000000, 00000000));
 
if (i->op == OP_PREEX2)
code[0] |= 0x20;
 
if (i->src(0).mod.abs()) code[0] |= 1 << 6;
if (i->src(0).mod.neg()) code[0] |= 1 << 8;
} else {
emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
}
}
 
void
CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
{
if (i->encSize == 8) {
code[0] = 0x00000000 | (subOp << 26);
code[1] = 0xc8000000;
 
emitPredicate(i);
 
defId(i->def(0), 14);
srcId(i->src(0), 20);
 
assert(i->src(0).getFile() == FILE_GPR);
 
if (i->saturate) code[0] |= 1 << 5;
 
if (i->src(0).mod.abs()) code[0] |= 1 << 7;
if (i->src(0).mod.neg()) code[0] |= 1 << 9;
} else {
emitForm_S(i, 0x80000008 | (subOp << 26), true);
 
assert(!i->src(0).mod.neg());
if (i->src(0).mod.abs()) code[0] |= 1 << 30;
}
}
 
void
CodeEmitterNVC0::emitMINMAX(const Instruction *i)
{
uint64_t op;
 
assert(i->encSize == 8);
 
op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
 
if (i->ftz)
op |= 1 << 5;
else
if (!isFloatType(i->dType))
op |= isSignedType(i->dType) ? 0x23 : 0x03;
if (i->dType == TYPE_F64)
op |= 0x01;
 
emitForm_A(i, op);
emitNegAbs12(i);
}
 
void
CodeEmitterNVC0::roundMode_C(const Instruction *i)
{
switch (i->rnd) {
case ROUND_M: code[1] |= 1 << 17; break;
case ROUND_P: code[1] |= 2 << 17; break;
case ROUND_Z: code[1] |= 3 << 17; break;
case ROUND_NI: code[0] |= 1 << 7; break;
case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
case ROUND_N: break;
default:
assert(!"invalid round mode");
break;
}
}
 
void
CodeEmitterNVC0::roundMode_CS(const Instruction *i)
{
switch (i->rnd) {
case ROUND_M:
case ROUND_MI: code[0] |= 1 << 16; break;
case ROUND_P:
case ROUND_PI: code[0] |= 2 << 16; break;
case ROUND_Z:
case ROUND_ZI: code[0] |= 3 << 16; break;
default:
break;
}
}
 
void
CodeEmitterNVC0::emitCVT(Instruction *i)
{
const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
DataType dType;
 
switch (i->op) {
case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
default:
break;
}
 
const bool sat = (i->op == OP_SAT) || i->saturate;
const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
 
if (i->op == OP_NEG && i->dType == TYPE_U32)
dType = TYPE_S32;
else
dType = i->dType;
 
if (i->encSize == 8) {
emitForm_B(i, HEX64(10000000, 00000004));
 
roundMode_C(i);
 
// cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
code[0] |= util_logbase2(typeSizeof(dType)) << 20;
code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
 
if (sat)
code[0] |= 0x20;
if (abs)
code[0] |= 1 << 6;
if (neg && i->op != OP_ABS)
code[0] |= 1 << 8;
 
if (i->ftz)
code[1] |= 1 << 23;
 
if (isSignedIntType(dType))
code[0] |= 0x080;
if (isSignedIntType(i->sType))
code[0] |= 0x200;
 
if (isFloatType(dType)) {
if (!isFloatType(i->sType))
code[1] |= 0x08000000;
} else {
if (isFloatType(i->sType))
code[1] |= 0x04000000;
else
code[1] |= 0x0c000000;
}
} else {
if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
code[0] = 0x298;
} else
if (isFloatType(dType)) {
if (isFloatType(i->sType))
code[0] = 0x098;
else
code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
} else {
assert(isFloatType(i->sType));
 
code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
}
 
if (neg) code[0] |= 1 << 16;
if (sat) code[0] |= 1 << 18;
if (abs) code[0] |= 1 << 19;
 
roundMode_CS(i);
}
}
 
void
CodeEmitterNVC0::emitSET(const CmpInstruction *i)
{
uint32_t hi;
uint32_t lo = 0;
 
if (i->sType == TYPE_F64)
lo = 0x1;
else
if (!isFloatType(i->sType))
lo = 0x3;
 
if (isFloatType(i->dType) || isSignedIntType(i->sType))
lo |= 0x20;
 
switch (i->op) {
case OP_SET_AND: hi = 0x10000000; break;
case OP_SET_OR: hi = 0x10200000; break;
case OP_SET_XOR: hi = 0x10400000; break;
default:
hi = 0x100e0000;
break;
}
emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
 
if (i->op != OP_SET)
srcId(i->src(2), 32 + 17);
 
if (i->def(0).getFile() == FILE_PREDICATE) {
if (i->sType == TYPE_F32)
code[1] += 0x10000000;
else
code[1] += 0x08000000;
 
code[0] &= ~0xfc000;
defId(i->def(0), 17);
if (i->defExists(1))
defId(i->def(1), 14);
else
code[0] |= 0x1c000;
}
 
if (i->ftz)
code[1] |= 1 << 27;
 
emitCondCode(i->setCond, 32 + 23);
emitNegAbs12(i);
}
 
void
CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
{
uint64_t op;
 
switch (i->dType) {
case TYPE_S32:
op = HEX64(30000000, 00000023);
break;
case TYPE_U32:
op = HEX64(30000000, 00000003);
break;
case TYPE_F32:
op = HEX64(38000000, 00000000);
break;
default:
assert(!"invalid type for SLCT");
op = 0;
break;
}
emitForm_A(i, op);
 
CondCode cc = i->setCond;
 
if (i->src(2).mod.neg())
cc = reverseCondCode(cc);
 
emitCondCode(cc, 32 + 23);
 
if (i->ftz)
code[0] |= 1 << 5;
}
 
void CodeEmitterNVC0::emitSELP(const Instruction *i)
{
emitForm_A(i, HEX64(20000000, 00000004));
 
if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
}
 
void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
{
code[0] = 0x00000006 | (i->subOp << 26);
code[1] = 0xf0000000;
emitPredicate(i);
emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
}
 
void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
{
code[0] = 0x00000086;
code[1] = 0xd0000000;
 
code[1] |= i->tex.r;
code[1] |= i->tex.s << 8;
 
if (i->tex.liveOnly)
code[0] |= 1 << 9;
 
defId(i->def(0), 14);
srcId(i->src(0), 20);
}
 
static inline bool
isNextIndependentTex(const TexInstruction *i)
{
if (!i->next || !isTextureOp(i->next->op))
return false;
if (i->getDef(0)->interfers(i->next->getSrc(0)))
return false;
return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
}
 
void
CodeEmitterNVC0::emitTEX(const TexInstruction *i)
{
code[0] = 0x00000006;
 
if (isNextIndependentTex(i))
code[0] |= 0x080; // t mode
else
code[0] |= 0x100; // p mode
 
if (i->tex.liveOnly)
code[0] |= 1 << 9;
 
switch (i->op) {
case OP_TEX: code[1] = 0x80000000; break;
case OP_TXB: code[1] = 0x84000000; break;
case OP_TXL: code[1] = 0x86000000; break;
case OP_TXF: code[1] = 0x90000000; break;
case OP_TXG: code[1] = 0xa0000000; break;
case OP_TXLQ: code[1] = 0xb0000000; break;
case OP_TXD: code[1] = 0xe0000000; break;
default:
assert(!"invalid texture op");
break;
}
if (i->op == OP_TXF) {
if (!i->tex.levelZero)
code[1] |= 0x02000000;
} else
if (i->tex.levelZero) {
code[1] |= 0x02000000;
}
 
if (i->op != OP_TXD && i->tex.derivAll)
code[1] |= 1 << 13;
 
defId(i->def(0), 14);
srcId(i->src(0), 20);
 
emitPredicate(i);
 
if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
 
code[1] |= i->tex.mask << 14;
 
code[1] |= i->tex.r;
code[1] |= i->tex.s << 8;
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
code[1] |= 1 << 18; // in 1st source (with array index)
 
// texture target:
code[1] |= (i->tex.target.getDim() - 1) << 20;
if (i->tex.target.isCube())
code[1] += 2 << 20;
if (i->tex.target.isArray())
code[1] |= 1 << 19;
if (i->tex.target.isShadow())
code[1] |= 1 << 24;
 
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 
if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
// lzero
if (i->op == OP_TXL)
code[1] &= ~(1 << 26);
else
if (i->op == OP_TXF)
code[1] &= ~(1 << 25);
}
if (i->tex.target == TEX_TARGET_2D_MS ||
i->tex.target == TEX_TARGET_2D_MS_ARRAY)
code[1] |= 1 << 23;
 
if (i->tex.useOffsets == 1)
code[1] |= 1 << 22;
if (i->tex.useOffsets == 4)
code[1] |= 1 << 23;
 
srcId(i, src1, 26);
}
 
void
CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
{
code[0] = 0x00000086;
code[1] = 0xc0000000;
 
switch (i->tex.query) {
case TXQ_DIMS: code[1] |= 0 << 22; break;
case TXQ_TYPE: code[1] |= 1 << 22; break;
case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
case TXQ_FILTER: code[1] |= 3 << 22; break;
case TXQ_LOD: code[1] |= 4 << 22; break;
case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
default:
assert(!"invalid texture query");
break;
}
 
code[1] |= i->tex.mask << 14;
 
code[1] |= i->tex.r;
code[1] |= i->tex.s << 8;
if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
code[1] |= 1 << 18;
 
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 
defId(i->def(0), 14);
srcId(i->src(0), 20);
srcId(i, src1, 26);
 
emitPredicate(i);
}
 
void
CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
code[0] = 0x00000000 | (laneMask << 6);
code[1] = 0x48000000 | qOp;
 
defId(i->def(0), 14);
srcId(i->src(0), 20);
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
 
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
code[0] |= 1 << 9; // dall
 
emitPredicate(i);
}
 
void
CodeEmitterNVC0::emitFlow(const Instruction *i)
{
const FlowInstruction *f = i->asFlow();
 
unsigned mask; // bit 0: predicate, bit 1: target
 
code[0] = 0x00000007;
 
switch (i->op) {
case OP_BRA:
code[1] = f->absolute ? 0x00000000 : 0x40000000;
if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
code[0] |= 0x4000;
mask = 3;
break;
case OP_CALL:
code[1] = f->absolute ? 0x10000000 : 0x50000000;
if (f->indirect)
code[0] |= 0x4000; // indirect calls always use c[] source
mask = 2;
break;
 
case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
case OP_RET: code[1] = 0x90000000; mask = 1; break;
case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
 
case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
 
case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
default:
assert(!"invalid flow operation");
return;
}
 
if (mask & 1) {
emitPredicate(i);
if (i->flagsSrc < 0)
code[0] |= 0x1e0;
}
 
if (!f)
return;
 
if (f->allWarp)
code[0] |= 1 << 15;
if (f->limit)
code[0] |= 1 << 16;
 
if (f->indirect) {
if (code[0] & 0x4000) {
assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
setAddress16(i->src(0));
code[1] |= i->getSrc(0)->reg.fileIndex << 10;
if (f->op == OP_BRA)
srcId(f->src(0).getIndirect(0), 20);
} else {
srcId(f, 0, 20);
}
}
 
if (f->op == OP_CALL) {
if (f->indirect) {
// nothing
} else
if (f->builtin) {
assert(f->absolute);
uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
} else {
assert(!f->absolute);
int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
code[0] |= (pcRel & 0x3f) << 26;
code[1] |= (pcRel >> 6) & 0x3ffff;
}
} else
if (mask & 2) {
int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
// currently we don't want absolute branches
assert(!f->absolute);
code[0] |= (pcRel & 0x3f) << 26;
code[1] |= (pcRel >> 6) & 0x3ffff;
}
}
 
void
CodeEmitterNVC0::emitBAR(const Instruction *i)
{
Value *rDef = NULL, *pDef = NULL;
 
switch (i->subOp) {
case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;
case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;
case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;
case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
default:
code[0] = 0x04;
assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
break;
}
code[1] = 0x50000000;
 
code[0] |= 63 << 14;
code[1] |= 7 << 21;
 
emitPredicate(i);
 
// barrier id
if (i->src(0).getFile() == FILE_GPR) {
srcId(i->src(0), 20);
} else {
ImmediateValue *imm = i->getSrc(0)->asImm();
assert(imm);
code[0] |= imm->reg.data.u32 << 20;
}
 
// thread count
if (i->src(1).getFile() == FILE_GPR) {
srcId(i->src(1), 26);
} else {
ImmediateValue *imm = i->getSrc(1)->asImm();
assert(imm);
code[0] |= imm->reg.data.u32 << 26;
code[1] |= imm->reg.data.u32 >> 6;
}
 
if (i->srcExists(2) && (i->predSrc != 2)) {
srcId(i->src(2), 32 + 17);
if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
} else {
code[1] |= 7 << 17;
}
 
if (i->defExists(0)) {
if (i->def(0).getFile() == FILE_GPR)
rDef = i->getDef(0);
else
pDef = i->getDef(0);
 
if (i->defExists(1)) {
if (i->def(1).getFile() == FILE_GPR)
rDef = i->getDef(1);
else
pDef = i->getDef(1);
}
}
if (rDef) {
code[0] &= ~(63 << 14);
defId(rDef, 14);
}
if (pDef) {
code[1] &= ~(7 << 21);
defId(pDef, 32 + 21);
}
}
 
void
CodeEmitterNVC0::emitPFETCH(const Instruction *i)
{
uint32_t prim = i->src(0).get()->reg.data.u32;
 
code[0] = 0x00000006 | ((prim & 0x3f) << 26);
code[1] = 0x00000000 | (prim >> 6);
 
emitPredicate(i);
 
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 
defId(i->def(0), 14);
srcId(i, src1, 20);
}
 
void
CodeEmitterNVC0::emitVFETCH(const Instruction *i)
{
code[0] = 0x00000006;
code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
 
if (i->perPatch)
code[0] |= 0x100;
if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
 
emitPredicate(i);
 
code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
 
defId(i->def(0), 14);
srcId(i->src(0).getIndirect(0), 20);
srcId(i->src(0).getIndirect(1), 26); // vertex address
}
 
void
CodeEmitterNVC0::emitEXPORT(const Instruction *i)
{
unsigned int size = typeSizeof(i->dType);
 
code[0] = 0x00000006 | ((size / 4 - 1) << 5);
code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
 
assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
 
if (i->perPatch)
code[0] |= 0x100;
 
emitPredicate(i);
 
assert(i->src(1).getFile() == FILE_GPR);
 
srcId(i->src(0).getIndirect(0), 20);
srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
srcId(i->src(1), 26);
}
 
void
CodeEmitterNVC0::emitOUT(const Instruction *i)
{
code[0] = 0x00000006;
code[1] = 0x1c000000;
 
emitPredicate(i);
 
defId(i->def(0), 14); // new secret address
srcId(i->src(0), 20); // old secret address, should be 0 initially
 
assert(i->src(0).getFile() == FILE_GPR);
 
if (i->op == OP_EMIT)
code[0] |= 1 << 5;
if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
code[0] |= 1 << 6;
 
// vertex stream
if (i->src(1).getFile() == FILE_IMMEDIATE) {
unsigned int stream = SDATA(i->src(1)).u32;
assert(stream < 4);
if (stream) {
code[1] |= 0xc000;
code[0] |= stream << 26;
} else {
srcId(NULL, 26);
}
} else {
srcId(i->src(1), 26);
}
}
 
void
CodeEmitterNVC0::emitInterpMode(const Instruction *i)
{
if (i->encSize == 8) {
code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
} else {
if (i->getInterpMode() == NV50_IR_INTERP_SC)
code[0] |= 0x80;
assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
}
}
 
void
CodeEmitterNVC0::emitINTERP(const Instruction *i)
{
const uint32_t base = i->getSrc(0)->reg.data.offset;
 
if (i->encSize == 8) {
code[0] = 0x00000000;
code[1] = 0xc0000000 | (base & 0xffff);
 
if (i->saturate)
code[0] |= 1 << 5;
 
if (i->op == OP_PINTERP)
srcId(i->src(1), 26);
else
code[0] |= 0x3f << 26;
 
srcId(i->src(0).getIndirect(0), 20);
} else {
assert(i->op == OP_PINTERP);
code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
srcId(i->src(1), 20);
}
emitInterpMode(i);
 
emitPredicate(i);
defId(i->def(0), 14);
 
if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
else
code[1] |= 0x3f << 17;
}
 
void
CodeEmitterNVC0::emitLoadStoreType(DataType ty)
{
uint8_t val;
 
switch (ty) {
case TYPE_U8:
val = 0x00;
break;
case TYPE_S8:
val = 0x20;
break;
case TYPE_F16:
case TYPE_U16:
val = 0x40;
break;
case TYPE_S16:
val = 0x60;
break;
case TYPE_F32:
case TYPE_U32:
case TYPE_S32:
val = 0x80;
break;
case TYPE_F64:
case TYPE_U64:
case TYPE_S64:
val = 0xa0;
break;
case TYPE_B128:
val = 0xc0;
break;
default:
val = 0x80;
assert(!"invalid type");
break;
}
code[0] |= val;
}
 
void
CodeEmitterNVC0::emitCachingMode(CacheMode c)
{
uint32_t val;
 
switch (c) {
case CACHE_CA:
// case CACHE_WB:
val = 0x000;
break;
case CACHE_CG:
val = 0x100;
break;
case CACHE_CS:
val = 0x200;
break;
case CACHE_CV:
// case CACHE_WT:
val = 0x300;
break;
default:
val = 0;
assert(!"invalid caching mode");
break;
}
code[0] |= val;
}
 
static inline bool
uses64bitAddress(const Instruction *ldst)
{
return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
ldst->src(0).isIndirect(0) &&
ldst->getIndirect(0, 0)->reg.size == 8;
}
 
void
CodeEmitterNVC0::emitSTORE(const Instruction *i)
{
uint32_t opc;
 
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
default:
assert(!"invalid memory file");
opc = 0;
break;
}
code[0] = 0x00000005;
code[1] = opc;
 
setAddressByFile(i->src(0));
srcId(i->src(1), 14);
srcId(i->src(0).getIndirect(0), 20);
if (uses64bitAddress(i))
code[1] |= 1 << 26;
 
emitPredicate(i);
 
emitLoadStoreType(i->dType);
emitCachingMode(i->cache);
}
 
void
CodeEmitterNVC0::emitLOAD(const Instruction *i)
{
uint32_t opc;
 
code[0] = 0x00000005;
 
switch (i->src(0).getFile()) {
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
case FILE_MEMORY_CONST:
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
emitMOV(i); // not sure if this is any better
return;
}
opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
code[0] = 0x00000006 | (i->subOp << 8);
break;
default:
assert(!"invalid memory file");
opc = 0;
break;
}
code[1] = opc;
 
defId(i->def(0), 14);
 
setAddressByFile(i->src(0));
srcId(i->src(0).getIndirect(0), 20);
if (uses64bitAddress(i))
code[1] |= 1 << 26;
 
emitPredicate(i);
 
emitLoadStoreType(i->dType);
emitCachingMode(i->cache);
}
 
uint8_t
CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
{
switch (SDATA(ref).sv.sv) {
case SV_LANEID: return 0x00;
case SV_PHYSID: return 0x03;
case SV_VERTEX_COUNT: return 0x10;
case SV_INVOCATION_ID: return 0x11;
case SV_YDIR: return 0x12;
case SV_TID: return 0x21 + SDATA(ref).sv.index;
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
case SV_GRIDID: return 0x2c;
case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
case SV_LBASE: return 0x34;
case SV_SBASE: return 0x30;
case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
default:
assert(!"no sreg for system value");
return 0;
}
}
 
void
CodeEmitterNVC0::emitMOV(const Instruction *i)
{
if (i->def(0).getFile() == FILE_PREDICATE) {
if (i->src(0).getFile() == FILE_GPR) {
code[0] = 0xfc01c003;
code[1] = 0x1a8e0000;
srcId(i->src(0), 20);
} else {
code[0] = 0x0001c004;
code[1] = 0x0c0e0000;
if (i->src(0).getFile() == FILE_IMMEDIATE) {
code[0] |= 7 << 20;
if (!i->getSrc(0)->reg.data.u32)
code[0] |= 1 << 23;
} else {
srcId(i->src(0), 20);
}
}
defId(i->def(0), 17);
emitPredicate(i);
} else
if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
uint8_t sr = getSRegEncoding(i->src(0));
 
if (i->encSize == 8) {
code[0] = 0x00000004 | (sr << 26);
code[1] = 0x2c000000;
} else {
code[0] = 0x40000008 | (sr << 20);
}
defId(i->def(0), 14);
 
emitPredicate(i);
} else
if (i->encSize == 8) {
uint64_t opc;
 
if (i->src(0).getFile() == FILE_IMMEDIATE)
opc = HEX64(18000000, 000001e2);
else
if (i->src(0).getFile() == FILE_PREDICATE)
opc = HEX64(080e0000, 1c000004);
else
opc = HEX64(28000000, 00000004);
 
opc |= i->lanes << 5;
 
emitForm_B(i, opc);
} else {
uint32_t imm;
 
if (i->src(0).getFile() == FILE_IMMEDIATE) {
imm = SDATA(i->src(0)).u32;
if (imm & 0xfff00000) {
assert(!(imm & 0x000fffff));
code[0] = 0x00000318 | imm;
} else {
assert(imm < 0x800 || ((int32_t)imm >= -0x800));
code[0] = 0x00000118 | (imm << 20);
}
} else {
code[0] = 0x0028;
emitShortSrc2(i->src(0));
}
defId(i->def(0), 14);
 
emitPredicate(i);
}
}
 
void
CodeEmitterNVC0::emitATOM(const Instruction *i)
{
const bool hasDst = i->defExists(0);
const bool casOrExch =
i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
i->subOp == NV50_IR_SUBOP_ATOM_CAS;
 
if (i->dType == TYPE_U64) {
switch (i->subOp) {
case NV50_IR_SUBOP_ATOM_ADD:
code[0] = 0x205;
if (hasDst)
code[1] = 0x507e0000;
else
code[1] = 0x10000000;
break;
case NV50_IR_SUBOP_ATOM_EXCH:
code[0] = 0x305;
code[1] = 0x507e0000;
break;
case NV50_IR_SUBOP_ATOM_CAS:
code[0] = 0x325;
code[1] = 0x50000000;
break;
default:
assert(!"invalid u64 red op");
break;
}
} else
if (i->dType == TYPE_U32) {
switch (i->subOp) {
case NV50_IR_SUBOP_ATOM_EXCH:
code[0] = 0x105;
code[1] = 0x507e0000;
break;
case NV50_IR_SUBOP_ATOM_CAS:
code[0] = 0x125;
code[1] = 0x50000000;
break;
default:
code[0] = 0x5 | (i->subOp << 5);
if (hasDst)
code[1] = 0x507e0000;
else
code[1] = 0x10000000;
break;
}
} else
if (i->dType == TYPE_S32) {
assert(i->subOp <= 2);
code[0] = 0x205 | (i->subOp << 5);
if (hasDst)
code[1] = 0x587e0000;
else
code[1] = 0x18000000;
} else
if (i->dType == TYPE_F32) {
assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
code[0] = 0x205;
if (hasDst)
code[1] = 0x687e0000;
else
code[1] = 0x28000000;
}
 
emitPredicate(i);
 
srcId(i->src(1), 14);
 
if (hasDst)
defId(i->def(0), 32 + 11);
else
if (casOrExch)
code[1] |= 63 << 11;
 
if (hasDst || casOrExch) {
const int32_t offset = SDATA(i->src(0)).offset;
assert(offset < 0x80000 && offset >= -0x80000);
code[0] |= offset << 26;
code[1] |= (offset & 0x1ffc0) >> 6;
code[1] |= (offset & 0xe0000) << 6;
} else {
srcAddr32(i->src(0), 26, 0);
}
if (i->getIndirect(0, 0)) {
srcId(i->getIndirect(0, 0), 20);
if (i->getIndirect(0, 0)->reg.size == 8)
code[1] |= 1 << 26;
} else {
code[0] |= 63 << 20;
}
 
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
srcId(i->src(2), 32 + 17);
}
 
void
CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
{
switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;
default:
code[0] = 0x45;
assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
break;
}
code[1] = 0xe0000000;
 
emitPredicate(i);
}
 
void
CodeEmitterNVC0::emitCCTL(const Instruction *i)
{
code[0] = 0x00000005 | (i->subOp << 5);
 
if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
code[1] = 0x98000000;
srcAddr32(i->src(0), 28, 2);
} else {
code[1] = 0xd0000000;
setAddress24(i->src(0));
}
if (uses64bitAddress(i))
code[1] |= 1 << 26;
srcId(i->src(0).getIndirect(0), 20);
 
emitPredicate(i);
 
defId(i, 0, 14);
}
 
void
CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
{
uint8_t m;
switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
default:
return;
}
code[0] |= m << 5;
if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
code[1] |= 1 << 16;
}
 
void
CodeEmitterNVC0::emitSUCalc(Instruction *i)
{
ImmediateValue *imm = NULL;
uint64_t opc;
 
if (i->srcExists(2)) {
imm = i->getSrc(2)->asImm();
if (imm)
i->setSrc(2, NULL); // special case, make emitForm_A not assert
}
 
switch (i->op) {
case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
default:
assert(0);
return;
}
emitForm_A(i, opc);
 
if (i->op == OP_SUCLAMP) {
if (i->dType == TYPE_S32)
code[0] |= 1 << 9;
emitSUCLAMPMode(i->subOp);
}
 
if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
code[1] |= 1 << 16;
 
if (i->op != OP_SUEAU) {
if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
code[0] |= 63 << 14;
code[1] |= i->getDef(0)->reg.data.id << 23;
} else
if (i->defExists(1)) { // r, p
assert(i->def(1).getFile() == FILE_PREDICATE);
code[1] |= i->getDef(1)->reg.data.id << 23;
} else { // r, #
code[1] |= 7 << 23;
}
}
if (imm) {
assert(i->op == OP_SUCLAMP);
i->setSrc(2, imm);
code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
}
}
 
void
CodeEmitterNVC0::emitSUGType(DataType ty)
{
switch (ty) {
case TYPE_S32: code[1] |= 1 << 13; break;
case TYPE_U8: code[1] |= 2 << 13; break;
case TYPE_S8: code[1] |= 3 << 13; break;
default:
assert(ty == TYPE_U32);
break;
}
}
 
void
CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
{
const uint32_t offset = i->getSrc(s)->reg.data.offset;
 
assert(i->src(s).getFile() == FILE_MEMORY_CONST);
assert(offset == (offset & 0xfffc));
 
code[1] |= 1 << 21;
code[0] |= offset << 24;
code[1] |= offset >> 8;
code[1] |= i->getSrc(s)->reg.fileIndex << 8;
}
 
void
CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
{
if (!i->srcExists(s) || (i->predSrc == s)) {
code[1] |= 0x7 << 17;
} else {
if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
code[1] |= 1 << 20;
srcId(i->src(s), 32 + 17);
}
}
 
void
CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
{
code[0] = 0x5;
code[1] = 0xd4000000 | (i->subOp << 15);
 
emitLoadStoreType(i->dType);
emitSUGType(i->sType);
emitCachingMode(i->cache);
 
emitPredicate(i);
defId(i->def(0), 14); // destination
srcId(i->src(0), 20); // address
// format
if (i->src(1).getFile() == FILE_GPR)
srcId(i->src(1), 26);
else
setSUConst16(i, 1);
setSUPred(i, 2);
}
 
void
CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
{
code[0] = 0x5;
code[1] = 0xdc000000 | (i->subOp << 15);
 
if (i->op == OP_SUSTP)
code[1] |= i->tex.mask << 22;
else
emitLoadStoreType(i->dType);
emitSUGType(i->sType);
emitCachingMode(i->cache);
 
emitPredicate(i);
srcId(i->src(0), 20); // address
// format
if (i->src(1).getFile() == FILE_GPR)
srcId(i->src(1), 26);
else
setSUConst16(i, 1);
srcId(i->src(3), 14); // values
setSUPred(i, 2);
}
 
void
CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
{
switch (NV50_IR_SUBOP_Vn(i->subOp)) {
case 0:
code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
code[1] |= (i->subOp & 0x3c00) << 13; // vdst
break;
case 1:
code[1] |= (i->subOp & 0x000f) << 8; // v2src1
code[1] |= (i->subOp & 0x0010) << 11; // v2src1
code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
code[1] |= (i->subOp & 0x0200) << 6; // v2src2
code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
code[1] |= (i->mask & 0x3) << 2;
break;
case 2:
code[1] |= (i->subOp & 0x000f) << 8; // v4src1
code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
code[1] |= (i->mask & 0x3) << 2;
code[1] |= (i->mask & 0xc) << 21;
break;
default:
assert(0);
break;
}
}
 
void
CodeEmitterNVC0::emitVSHL(const Instruction *i)
{
uint64_t opc = 0x4;
 
switch (NV50_IR_SUBOP_Vn(i->subOp)) {
case 0: opc |= 0xe8ULL << 56; break;
case 1: opc |= 0xb4ULL << 56; break;
case 2: opc |= 0x94ULL << 56; break;
default:
assert(0);
break;
}
if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
} else {
if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
if (isSignedType(i->sType)) opc |= 1 << 6;
}
emitForm_A(i, opc);
emitVectorSubOp(i);
 
if (i->saturate)
code[0] |= 1 << 9;
if (i->flagsDef >= 0)
code[1] |= 1 << 16;
}
 
void
CodeEmitterNVC0::emitPIXLD(const Instruction *i)
{
assert(i->encSize == 8);
emitForm_A(i, HEX64(10000000, 00000006));
code[0] |= i->subOp << 5;
code[1] |= 0x00e00000;
}
 
bool
CodeEmitterNVC0::emitInstruction(Instruction *insn)
{
unsigned int size = insn->encSize;
 
if (writeIssueDelays && !(codeSize & 0x3f))
size += 8;
 
if (!insn->encSize) {
ERROR("skipping unencodable instruction: "); insn->print();
return false;
} else
if (codeSize + size > codeSizeLimit) {
ERROR("code emitter output buffer too small\n");
return false;
}
 
if (writeIssueDelays) {
if (!(codeSize & 0x3f)) {
code[0] = 0x00000007; // cf issue delay "instruction"
code[1] = 0x20000000;
code += 2;
codeSize += 8;
}
const unsigned int id = (codeSize & 0x3f) / 8 - 1;
uint32_t *data = code - (id * 2 + 2);
if (id <= 2) {
data[0] |= insn->sched << (id * 8 + 4);
} else
if (id == 3) {
data[0] |= insn->sched << 28;
data[1] |= insn->sched >> 4;
} else {
data[1] |= insn->sched << ((id - 4) * 8 + 4);
}
}
 
// assert that instructions with multiple defs don't corrupt registers
for (int d = 0; insn->defExists(d); ++d)
assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
 
switch (insn->op) {
case OP_MOV:
case OP_RDSV:
emitMOV(insn);
break;
case OP_NOP:
break;
case OP_LOAD:
emitLOAD(insn);
break;
case OP_STORE:
emitSTORE(insn);
break;
case OP_LINTERP:
case OP_PINTERP:
emitINTERP(insn);
break;
case OP_VFETCH:
emitVFETCH(insn);
break;
case OP_EXPORT:
emitEXPORT(insn);
break;
case OP_PFETCH:
emitPFETCH(insn);
break;
case OP_EMIT:
case OP_RESTART:
emitOUT(insn);
break;
case OP_ADD:
case OP_SUB:
if (insn->dType == TYPE_F64)
emitDADD(insn);
else if (isFloatType(insn->dType))
emitFADD(insn);
else
emitUADD(insn);
break;
case OP_MUL:
if (insn->dType == TYPE_F64)
emitDMUL(insn);
else if (isFloatType(insn->dType))
emitFMUL(insn);
else
emitUMUL(insn);
break;
case OP_MAD:
case OP_FMA:
if (insn->dType == TYPE_F64)
emitDMAD(insn);
else if (isFloatType(insn->dType))
emitFMAD(insn);
else
emitIMAD(insn);
break;
case OP_SAD:
emitISAD(insn);
break;
case OP_NOT:
emitNOT(insn);
break;
case OP_AND:
emitLogicOp(insn, 0);
break;
case OP_OR:
emitLogicOp(insn, 1);
break;
case OP_XOR:
emitLogicOp(insn, 2);
break;
case OP_SHL:
case OP_SHR:
emitShift(insn);
break;
case OP_SET:
case OP_SET_AND:
case OP_SET_OR:
case OP_SET_XOR:
emitSET(insn->asCmp());
break;
case OP_SELP:
emitSELP(insn);
break;
case OP_SLCT:
emitSLCT(insn->asCmp());
break;
case OP_MIN:
case OP_MAX:
emitMINMAX(insn);
break;
case OP_ABS:
case OP_NEG:
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
case OP_RCP:
emitSFnOp(insn, 4 + 2 * insn->subOp);
break;
case OP_LG2:
emitSFnOp(insn, 3);
break;
case OP_EX2:
emitSFnOp(insn, 2);
break;
case OP_SIN:
emitSFnOp(insn, 1);
break;
case OP_COS:
emitSFnOp(insn, 0);
break;
case OP_PRESIN:
case OP_PREEX2:
emitPreOp(insn);
break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXD:
case OP_TXF:
case OP_TXG:
case OP_TXLQ:
emitTEX(insn->asTex());
break;
case OP_TXQ:
emitTXQ(insn->asTex());
break;
case OP_TEXBAR:
emitTEXBAR(insn);
break;
case OP_SUBFM:
case OP_SUCLAMP:
case OP_SUEAU:
emitSUCalc(insn);
break;
case OP_MADSP:
emitMADSP(insn);
break;
case OP_SULDB:
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
emitSULDGB(insn->asTex());
else
ERROR("SULDB not yet supported on < nve4\n");
break;
case OP_SUSTB:
case OP_SUSTP:
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
emitSUSTGx(insn->asTex());
else
ERROR("SUSTx not yet supported on < nve4\n");
break;
case OP_ATOM:
emitATOM(insn);
break;
case OP_BRA:
case OP_CALL:
case OP_PRERET:
case OP_RET:
case OP_DISCARD:
case OP_EXIT:
case OP_PRECONT:
case OP_CONT:
case OP_PREBREAK:
case OP_BREAK:
case OP_JOINAT:
case OP_BRKPT:
case OP_QUADON:
case OP_QUADPOP:
emitFlow(insn);
break;
case OP_QUADOP:
emitQUADOP(insn, insn->subOp, insn->lanes);
break;
case OP_DFDX:
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
break;
case OP_DFDY:
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
break;
case OP_POPCNT:
emitPOPC(insn);
break;
case OP_INSBF:
emitINSBF(insn);
break;
case OP_EXTBF:
emitEXTBF(insn);
break;
case OP_BFIND:
emitBFIND(insn);
break;
case OP_PERMT:
emitPERMT(insn);
break;
case OP_JOIN:
emitNOP(insn);
insn->join = 1;
break;
case OP_BAR:
emitBAR(insn);
break;
case OP_MEMBAR:
emitMEMBAR(insn);
break;
case OP_CCTL:
emitCCTL(insn);
break;
case OP_VSHL:
emitVSHL(insn);
break;
case OP_PIXLD:
emitPIXLD(insn);
break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:
ERROR("operation should have been eliminated");
return false;
case OP_EXP:
case OP_LOG:
case OP_SQRT:
case OP_POW:
ERROR("operation should have been lowered\n");
return false;
default:
ERROR("unknow op\n");
return false;
}
 
if (insn->join) {
code[0] |= 0x10;
assert(insn->encSize == 8);
}
 
code += insn->encSize / 4;
codeSize += insn->encSize;
return true;
}
 
uint32_t
CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
{
const Target::OpInfo &info = targ->getOpInfo(i);
 
if (writeIssueDelays || info.minEncSize == 8 || 1)
return 8;
 
if (i->ftz || i->saturate || i->join)
return 8;
if (i->rnd != ROUND_N)
return 8;
if (i->predSrc >= 0 && i->op == OP_MAD)
return 8;
 
if (i->op == OP_PINTERP) {
if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
return 8;
} else
if (i->op == OP_MOV && i->lanes != 0xf) {
return 8;
}
 
for (int s = 0; i->srcExists(s); ++s) {
if (i->src(s).isIndirect(0))
return 8;
 
if (i->src(s).getFile() == FILE_MEMORY_CONST) {
if (SDATA(i->src(s)).offset >= 0x100)
return 8;
if (i->getSrc(s)->reg.fileIndex > 1 &&
i->getSrc(s)->reg.fileIndex != 16)
return 8;
} else
if (i->src(s).getFile() == FILE_IMMEDIATE) {
if (i->dType == TYPE_F32) {
if (SDATA(i->src(s)).u32 >= 0x100)
return 8;
} else {
if (SDATA(i->src(s)).u32 > 0xff)
return 8;
}
}
 
if (i->op == OP_CVT)
continue;
if (i->src(s).mod != Modifier(0)) {
if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
if (i->op != OP_RSQ)
return 8;
if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
if (i->op != OP_ADD || s != 0)
return 8;
}
}
 
return 4;
}
 
// Simplified, erring on safe side.
class SchedDataCalculator : public Pass
{
public:
SchedDataCalculator(const Target *targ) : targ(targ) { }
 
private:
struct RegScores
{
struct Resource {
int st[DATA_FILE_COUNT]; // LD to LD delay 3
int ld[DATA_FILE_COUNT]; // ST to ST delay 3
int tex; // TEX to non-TEX delay 17 (0x11)
int sfu; // SFU to SFU delay 3 (except PRE-ops)
int imul; // integer MUL to MUL delay 3
} res;
struct ScoreData {
int r[64];
int p[8];
int c;
} rd, wr;
int base;
 
void rebase(const int base)
{
const int delta = this->base - base;
if (!delta)
return;
this->base = 0;
 
for (int i = 0; i < 64; ++i) {
rd.r[i] += delta;
wr.r[i] += delta;
}
for (int i = 0; i < 8; ++i) {
rd.p[i] += delta;
wr.p[i] += delta;
}
rd.c += delta;
wr.c += delta;
 
for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
res.ld[f] += delta;
res.st[f] += delta;
}
res.sfu += delta;
res.imul += delta;
res.tex += delta;
}
void wipe()
{
memset(&rd, 0, sizeof(rd));
memset(&wr, 0, sizeof(wr));
memset(&res, 0, sizeof(res));
}
int getLatest(const ScoreData& d) const
{
int max = 0;
for (int i = 0; i < 64; ++i)
if (d.r[i] > max)
max = d.r[i];
for (int i = 0; i < 8; ++i)
if (d.p[i] > max)
max = d.p[i];
if (d.c > max)
max = d.c;
return max;
}
inline int getLatestRd() const
{
return getLatest(rd);
}
inline int getLatestWr() const
{
return getLatest(wr);
}
inline int getLatest() const
{
const int a = getLatestRd();
const int b = getLatestWr();
 
int max = MAX2(a, b);
for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
max = MAX2(res.ld[f], max);
max = MAX2(res.st[f], max);
}
max = MAX2(res.sfu, max);
max = MAX2(res.imul, max);
max = MAX2(res.tex, max);
return max;
}
void setMax(const RegScores *that)
{
for (int i = 0; i < 64; ++i) {
rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
}
for (int i = 0; i < 8; ++i) {
rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
}
rd.c = MAX2(rd.c, that->rd.c);
wr.c = MAX2(wr.c, that->wr.c);
 
for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
res.st[f] = MAX2(res.st[f], that->res.st[f]);
}
res.sfu = MAX2(res.sfu, that->res.sfu);
res.imul = MAX2(res.imul, that->res.imul);
res.tex = MAX2(res.tex, that->res.tex);
}
void print(int cycle)
{
for (int i = 0; i < 64; ++i) {
if (rd.r[i] > cycle)
INFO("rd $r%i @ %i\n", i, rd.r[i]);
if (wr.r[i] > cycle)
INFO("wr $r%i @ %i\n", i, wr.r[i]);
}
for (int i = 0; i < 8; ++i) {
if (rd.p[i] > cycle)
INFO("rd $p%i @ %i\n", i, rd.p[i]);
if (wr.p[i] > cycle)
INFO("wr $p%i @ %i\n", i, wr.p[i]);
}
if (rd.c > cycle)
INFO("rd $c @ %i\n", rd.c);
if (wr.c > cycle)
INFO("wr $c @ %i\n", wr.c);
if (res.sfu > cycle)
INFO("sfu @ %i\n", res.sfu);
if (res.imul > cycle)
INFO("imul @ %i\n", res.imul);
if (res.tex > cycle)
INFO("tex @ %i\n", res.tex);
}
};
 
RegScores *score; // for current BB
std::vector<RegScores> scoreBoards;
int prevData;
operation prevOp;
 
const Target *targ;
 
bool visit(Function *);
bool visit(BasicBlock *);
 
void commitInsn(const Instruction *, int cycle);
int calcDelay(const Instruction *, int cycle) const;
void setDelay(Instruction *, int delay, Instruction *next);
 
void recordRd(const Value *, const int ready);
void recordWr(const Value *, const int ready);
void checkRd(const Value *, int cycle, int& delay) const;
void checkWr(const Value *, int cycle, int& delay) const;
 
int getCycles(const Instruction *, int origDelay) const;
};
 
void
SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
{
if (insn->op == OP_EXIT || insn->op == OP_RET)
delay = MAX2(delay, 14);
 
if (insn->op == OP_TEXBAR) {
// TODO: except if results not used before EXIT
insn->sched = 0xc2;
} else
if (insn->op == OP_JOIN || insn->join) {
insn->sched = 0x00;
} else
if (delay >= 0 || prevData == 0x04 ||
!next || !targ->canDualIssue(insn, next)) {
insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
if (prevOp == OP_EXPORT)
insn->sched |= 0x40;
else
insn->sched |= 0x20;
} else {
insn->sched = 0x04; // dual-issue
}
 
if (prevData != 0x04 || prevOp != OP_EXPORT)
if (insn->sched != 0x04 || insn->op == OP_EXPORT)
prevOp = insn->op;
 
prevData = insn->sched;
}
 
int
SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
{
if (insn->sched & 0x80) {
int c = (insn->sched & 0x0f) * 2 + 1;
if (insn->op == OP_TEXBAR && origDelay > 0)
c += origDelay;
return c;
}
if (insn->sched & 0x60)
return (insn->sched & 0x1f) + 1;
return (insn->sched == 0x04) ? 0 : 32;
}
 
bool
SchedDataCalculator::visit(Function *func)
{
scoreBoards.resize(func->cfg.getSize());
for (size_t i = 0; i < scoreBoards.size(); ++i)
scoreBoards[i].wipe();
return true;
}
 
bool
SchedDataCalculator::visit(BasicBlock *bb)
{
Instruction *insn;
Instruction *next = NULL;
 
int cycle = 0;
 
prevData = 0x00;
prevOp = OP_NOP;
score = &scoreBoards.at(bb->getId());
 
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
// back branches will wait until all target dependencies are satisfied
if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
continue;
BasicBlock *in = BasicBlock::get(ei.getNode());
if (in->getExit()) {
if (prevData != 0x04)
prevData = in->getExit()->sched;
prevOp = in->getExit()->op;
}
score->setMax(&scoreBoards.at(in->getId()));
}
if (bb->cfg.incidentCount() > 1)
prevOp = OP_NOP;
 
#ifdef NVC0_DEBUG_SCHED_DATA
INFO("=== BB:%i initial scores\n", bb->getId());
score->print(cycle);
#endif
 
for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
next = insn->next;
 
commitInsn(insn, cycle);
int delay = calcDelay(next, cycle);
setDelay(insn, delay, next);
cycle += getCycles(insn, delay);
 
#ifdef NVC0_DEBUG_SCHED_DATA
INFO("cycle %i, sched %02x\n", cycle, insn->sched);
insn->print();
next->print();
#endif
}
if (!insn)
return true;
commitInsn(insn, cycle);
 
int bbDelay = -1;
 
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
BasicBlock *out = BasicBlock::get(ei.getNode());
 
if (ei.getType() != Graph::Edge::BACK) {
// only test the first instruction of the outgoing block
next = out->getEntry();
if (next)
bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
} else {
// wait until all dependencies are satisfied
const int regsFree = score->getLatest();
next = out->getFirst();
for (int c = cycle; next && c < regsFree; next = next->next) {
bbDelay = MAX2(bbDelay, calcDelay(next, c));
c += getCycles(next, bbDelay);
}
next = NULL;
}
}
if (bb->cfg.outgoingCount() != 1)
next = NULL;
setDelay(insn, bbDelay, next);
cycle += getCycles(insn, bbDelay);
 
score->rebase(cycle); // common base for initializing out blocks' scores
return true;
}
 
#define NVE4_MAX_ISSUE_DELAY 0x1f
int
SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
{
int delay = 0, ready = cycle;
 
for (int s = 0; insn->srcExists(s); ++s)
checkRd(insn->getSrc(s), cycle, delay);
// WAR & WAW don't seem to matter
// for (int s = 0; insn->srcExists(s); ++s)
// recordRd(insn->getSrc(s), cycle);
 
switch (Target::getOpClass(insn->op)) {
case OPCLASS_SFU:
ready = score->res.sfu;
break;
case OPCLASS_ARITH:
if (insn->op == OP_MUL && !isFloatType(insn->dType))
ready = score->res.imul;
break;
case OPCLASS_TEXTURE:
ready = score->res.tex;
break;
case OPCLASS_LOAD:
ready = score->res.ld[insn->src(0).getFile()];
break;
case OPCLASS_STORE:
ready = score->res.st[insn->src(0).getFile()];
break;
default:
break;
}
if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
ready = MAX2(ready, score->res.tex);
 
delay = MAX2(delay, ready - cycle);
 
// if can issue next cycle, delay is 0, not 1
return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
}
 
void
SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
{
const int ready = cycle + targ->getLatency(insn);
 
for (int d = 0; insn->defExists(d); ++d)
recordWr(insn->getDef(d), ready);
// WAR & WAW don't seem to matter
// for (int s = 0; insn->srcExists(s); ++s)
// recordRd(insn->getSrc(s), cycle);
 
switch (Target::getOpClass(insn->op)) {
case OPCLASS_SFU:
score->res.sfu = cycle + 4;
break;
case OPCLASS_ARITH:
if (insn->op == OP_MUL && !isFloatType(insn->dType))
score->res.imul = cycle + 4;
break;
case OPCLASS_TEXTURE:
score->res.tex = cycle + 18;
break;
case OPCLASS_LOAD:
if (insn->src(0).getFile() == FILE_MEMORY_CONST)
break;
score->res.ld[insn->src(0).getFile()] = cycle + 4;
score->res.st[insn->src(0).getFile()] = ready;
break;
case OPCLASS_STORE:
score->res.st[insn->src(0).getFile()] = cycle + 4;
score->res.ld[insn->src(0).getFile()] = ready;
break;
case OPCLASS_OTHER:
if (insn->op == OP_TEXBAR)
score->res.tex = cycle;
break;
default:
break;
}
 
#ifdef NVC0_DEBUG_SCHED_DATA
score->print(cycle);
#endif
}
 
void
SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
{
int ready = cycle;
int a, b;
 
switch (v->reg.file) {
case FILE_GPR:
a = v->reg.data.id;
b = a + v->reg.size / 4;
for (int r = a; r < b; ++r)
ready = MAX2(ready, score->rd.r[r]);
break;
case FILE_PREDICATE:
ready = MAX2(ready, score->rd.p[v->reg.data.id]);
break;
case FILE_FLAGS:
ready = MAX2(ready, score->rd.c);
break;
case FILE_SHADER_INPUT:
case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
case FILE_MEMORY_LOCAL:
case FILE_MEMORY_CONST:
case FILE_MEMORY_SHARED:
case FILE_MEMORY_GLOBAL:
case FILE_SYSTEM_VALUE:
// TODO: any restrictions here ?
break;
case FILE_IMMEDIATE:
break;
default:
assert(0);
break;
}
if (cycle < ready)
delay = MAX2(delay, ready - cycle);
}
 
void
SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
{
int ready = cycle;
int a, b;
 
switch (v->reg.file) {
case FILE_GPR:
a = v->reg.data.id;
b = a + v->reg.size / 4;
for (int r = a; r < b; ++r)
ready = MAX2(ready, score->wr.r[r]);
break;
case FILE_PREDICATE:
ready = MAX2(ready, score->wr.p[v->reg.data.id]);
break;
default:
assert(v->reg.file == FILE_FLAGS);
ready = MAX2(ready, score->wr.c);
break;
}
if (cycle < ready)
delay = MAX2(delay, ready - cycle);
}
 
void
SchedDataCalculator::recordWr(const Value *v, const int ready)
{
int a = v->reg.data.id;
 
if (v->reg.file == FILE_GPR) {
int b = a + v->reg.size / 4;
for (int r = a; r < b; ++r)
score->rd.r[r] = ready;
} else
// $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
if (v->reg.file == FILE_PREDICATE) {
score->rd.p[a] = ready + 4;
} else {
assert(v->reg.file == FILE_FLAGS);
score->rd.c = ready + 4;
}
}
 
void
SchedDataCalculator::recordRd(const Value *v, const int ready)
{
int a = v->reg.data.id;
 
if (v->reg.file == FILE_GPR) {
int b = a + v->reg.size / 4;
for (int r = a; r < b; ++r)
score->wr.r[r] = ready;
} else
if (v->reg.file == FILE_PREDICATE) {
score->wr.p[a] = ready;
} else
if (v->reg.file == FILE_FLAGS) {
score->wr.c = ready;
}
}
 
bool
calculateSchedDataNVC0(const Target *targ, Function *func)
{
SchedDataCalculator sched(targ);
return sched.run(func, true, true);
}
 
void
CodeEmitterNVC0::prepareEmission(Function *func)
{
CodeEmitter::prepareEmission(func);
 
if (targ->hasSWSched)
calculateSchedDataNVC0(targ, func);
}
 
CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
: CodeEmitter(target),
targNVC0(target),
writeIssueDelays(target->hasSWSched)
{
code = NULL;
codeSize = codeSizeLimit = 0;
relocInfo = NULL;
}
 
CodeEmitter *
TargetNVC0::createCodeEmitterNVC0(Program::Type type)
{
CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
emit->setProgramType(type);
return emit;
}
 
CodeEmitter *
TargetNVC0::getCodeEmitter(Program::Type type)
{
if (chipset >= NVISA_GK20A_CHIPSET)
return createCodeEmitterGK110(type);
return createCodeEmitterNVC0(type);
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
0,0 → 1,3334
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_util.h"
 
#include <set>
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_util.h"
#include "codegen/nv50_ir_build_util.h"
 
namespace tgsi {
 
class Source;
 
static nv50_ir::operation translateOpcode(uint opcode);
static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
 
class Instruction
{
public:
Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
 
class SrcRegister
{
public:
SrcRegister(const struct tgsi_full_src_register *src)
: reg(src->Register),
fsr(src)
{ }
 
SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
 
SrcRegister(const struct tgsi_ind_register& ind)
: reg(tgsi_util_get_src_from_ind(&ind)),
fsr(NULL)
{ }
 
struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
{
struct tgsi_src_register reg;
memset(&reg, 0, sizeof(reg));
reg.Index = off.Index;
reg.File = off.File;
reg.SwizzleX = off.SwizzleX;
reg.SwizzleY = off.SwizzleY;
reg.SwizzleZ = off.SwizzleZ;
return reg;
}
 
SrcRegister(const struct tgsi_texture_offset& off) :
reg(offsetToSrc(off)),
fsr(NULL)
{ }
 
uint getFile() const { return reg.File; }
 
bool is2D() const { return reg.Dimension; }
 
bool isIndirect(int dim) const
{
return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
}
 
int getIndex(int dim) const
{
return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
}
 
int getSwizzle(int chan) const
{
return tgsi_util_get_src_register_swizzle(&reg, chan);
}
 
nv50_ir::Modifier getMod(int chan) const;
 
SrcRegister getIndirect(int dim) const
{
assert(fsr && isIndirect(dim));
if (dim)
return SrcRegister(fsr->DimIndirect);
return SrcRegister(fsr->Indirect);
}
 
uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
{
assert(reg.File == TGSI_FILE_IMMEDIATE);
assert(!reg.Absolute);
assert(!reg.Negate);
return info->immd.data[reg.Index * 4 + getSwizzle(c)];
}
 
private:
const struct tgsi_src_register reg;
const struct tgsi_full_src_register *fsr;
};
 
class DstRegister
{
public:
DstRegister(const struct tgsi_full_dst_register *dst)
: reg(dst->Register),
fdr(dst)
{ }
 
DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
 
uint getFile() const { return reg.File; }
 
bool is2D() const { return reg.Dimension; }
 
bool isIndirect(int dim) const
{
return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
}
 
int getIndex(int dim) const
{
return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
}
 
unsigned int getMask() const { return reg.WriteMask; }
 
bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
 
SrcRegister getIndirect(int dim) const
{
assert(fdr && isIndirect(dim));
if (dim)
return SrcRegister(fdr->DimIndirect);
return SrcRegister(fdr->Indirect);
}
 
private:
const struct tgsi_dst_register reg;
const struct tgsi_full_dst_register *fdr;
};
 
inline uint getOpcode() const { return insn->Instruction.Opcode; }
 
unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
 
// mask of used components of source s
unsigned int srcMask(unsigned int s) const;
 
SrcRegister getSrc(unsigned int s) const
{
assert(s < srcCount());
return SrcRegister(&insn->Src[s]);
}
 
DstRegister getDst(unsigned int d) const
{
assert(d < dstCount());
return DstRegister(&insn->Dst[d]);
}
 
SrcRegister getTexOffset(unsigned int i) const
{
assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
return SrcRegister(insn->TexOffsets[i]);
}
 
unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
 
bool checkDstSrcAliasing() const;
 
inline nv50_ir::operation getOP() const {
return translateOpcode(getOpcode()); }
 
nv50_ir::DataType inferSrcType() const;
nv50_ir::DataType inferDstType() const;
 
nv50_ir::CondCode getSetCond() const;
 
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
 
inline uint getLabel() { return insn->Label.Label; }
 
unsigned getSaturate() const { return insn->Instruction.Saturate; }
 
void print() const
{
tgsi_dump_instruction(insn, 1);
}
 
private:
const struct tgsi_full_instruction *insn;
};
 
unsigned int Instruction::srcMask(unsigned int s) const
{
unsigned int mask = insn->Dst[0].Register.WriteMask;
 
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
case TGSI_OPCODE_DP2:
return 0x3;
case TGSI_OPCODE_DP3:
return 0x7;
case TGSI_OPCODE_DP4:
case TGSI_OPCODE_DPH:
case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
return 0xf;
case TGSI_OPCODE_DST:
return mask & (s ? 0xa : 0x6);
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_EXP:
case TGSI_OPCODE_LG2:
case TGSI_OPCODE_LOG:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_SCS:
return 0x1;
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
return 0x1;
case TGSI_OPCODE_LIT:
return 0xb;
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXL2:
return (s == 0) ? 0xf : 0x3;
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXD:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_LODQ:
{
const struct tgsi_instruction_texture *tex = &insn->Texture;
 
assert(insn->Instruction.Texture);
 
mask = 0x7;
if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
insn->Instruction.Opcode != TGSI_OPCODE_TXD)
mask |= 0x8; /* bias, lod or proj */
 
switch (tex->Texture) {
case TGSI_TEXTURE_1D:
mask &= 0x9;
break;
case TGSI_TEXTURE_SHADOW1D:
mask &= 0xd;
break;
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
mask &= 0xb;
break;
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
mask |= 0x8;
break;
default:
break;
}
}
return mask;
case TGSI_OPCODE_XPD:
{
unsigned int x = 0;
if (mask & 1) x |= 0x6;
if (mask & 2) x |= 0x5;
if (mask & 4) x |= 0x3;
return x;
}
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSNE:
switch (util_bitcount(mask)) {
case 1: return 0x3;
case 2: return 0xf;
default:
assert(!"unexpected mask");
return 0xf;
}
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D: {
unsigned int x = 0;
if ((mask & 0x3) == 0x3)
x |= 1;
if ((mask & 0xc) == 0xc)
x |= 2;
return x;
}
default:
break;
}
 
return mask;
}
 
nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
{
nv50_ir::Modifier m(0);
 
if (reg.Absolute)
m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
if (reg.Negate)
m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
return m;
}
 
static nv50_ir::DataFile translateFile(uint file)
{
switch (file) {
case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;
case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;
case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;
case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;
case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
return nv50_ir::FILE_NULL;
}
}
 
static nv50_ir::SVSemantic translateSysVal(uint sysval)
{
switch (sysval) {
case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;
case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;
case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;
case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;
case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;
case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
default:
assert(0);
return nv50_ir::SV_CLOCK;
}
}
 
#define NV50_IR_TEX_TARG_CASE(a, b) \
case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
 
static nv50_ir::TexTarget translateTexture(uint tex)
{
switch (tex) {
NV50_IR_TEX_TARG_CASE(1D, 1D);
NV50_IR_TEX_TARG_CASE(2D, 2D);
NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
NV50_IR_TEX_TARG_CASE(3D, 3D);
NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
NV50_IR_TEX_TARG_CASE(RECT, RECT);
NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
 
case TGSI_TEXTURE_UNKNOWN:
default:
assert(!"invalid texture target");
return nv50_ir::TEX_TARGET_2D;
}
}
 
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
case TGSI_OPCODE_UIF:
case TGSI_OPCODE_AND:
case TGSI_OPCODE_OR:
case TGSI_OPCODE_XOR:
case TGSI_OPCODE_NOT:
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMOD:
case TGSI_OPCODE_UMAD:
case TGSI_OPCODE_UMUL:
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_UMAX:
case TGSI_OPCODE_UMIN:
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_USGE:
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_USNE:
case TGSI_OPCODE_USHR:
case TGSI_OPCODE_UCMP:
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
case TGSI_OPCODE_ATOMAND:
case TGSI_OPCODE_ATOMOR:
case TGSI_OPCODE_ATOMXOR:
case TGSI_OPCODE_ATOMUMIN:
case TGSI_OPCODE_ATOMUMAX:
case TGSI_OPCODE_UBFE:
case TGSI_OPCODE_UMSB:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_IDIV:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_IMAX:
case TGSI_OPCODE_IMIN:
case TGSI_OPCODE_IABS:
case TGSI_OPCODE_INEG:
case TGSI_OPCODE_ISGE:
case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_ISLT:
case TGSI_OPCODE_ISSG:
case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
case TGSI_OPCODE_MOD:
case TGSI_OPCODE_UARL:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_ATOMIMAX:
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_IMSB:
return nv50_ir::TYPE_S32;
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DMUL:
case TGSI_OPCODE_DMAX:
case TGSI_OPCODE_DMIN:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSNE:
case TGSI_OPCODE_DRCP:
case TGSI_OPCODE_DSQRT:
case TGSI_OPCODE_DMAD:
case TGSI_OPCODE_DFRAC:
case TGSI_OPCODE_DRSQ:
case TGSI_OPCODE_DTRUNC:
case TGSI_OPCODE_DCEIL:
case TGSI_OPCODE_DFLR:
case TGSI_OPCODE_DROUND:
return nv50_ir::TYPE_F64;
default:
return nv50_ir::TYPE_F32;
}
}
 
nv50_ir::DataType Instruction::inferDstType() const
{
switch (getOpcode()) {
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_D2F:
return nv50_ir::TYPE_F32;
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
return nv50_ir::TYPE_F64;
default:
return inferSrcType();
}
}
 
nv50_ir::CondCode Instruction::getSetCond() const
{
using namespace nv50_ir;
 
switch (getOpcode()) {
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_ISLT:
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_DSLT:
return CC_LT;
case TGSI_OPCODE_SLE:
return CC_LE;
case TGSI_OPCODE_SGE:
case TGSI_OPCODE_ISGE:
case TGSI_OPCODE_USGE:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_DSGE:
return CC_GE;
case TGSI_OPCODE_SGT:
return CC_GT;
case TGSI_OPCODE_SEQ:
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_DSEQ:
return CC_EQ;
case TGSI_OPCODE_SNE:
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_DSNE:
return CC_NEU;
case TGSI_OPCODE_USNE:
return CC_NE;
default:
return CC_ALWAYS;
}
}
 
#define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
 
static nv50_ir::operation translateOpcode(uint opcode)
{
switch (opcode) {
NV50_IR_OPCODE_CASE(ARL, SHL);
NV50_IR_OPCODE_CASE(MOV, MOV);
 
NV50_IR_OPCODE_CASE(RCP, RCP);
NV50_IR_OPCODE_CASE(RSQ, RSQ);
 
NV50_IR_OPCODE_CASE(MUL, MUL);
NV50_IR_OPCODE_CASE(ADD, ADD);
 
NV50_IR_OPCODE_CASE(MIN, MIN);
NV50_IR_OPCODE_CASE(MAX, MAX);
NV50_IR_OPCODE_CASE(SLT, SET);
NV50_IR_OPCODE_CASE(SGE, SET);
NV50_IR_OPCODE_CASE(MAD, MAD);
NV50_IR_OPCODE_CASE(SUB, SUB);
 
NV50_IR_OPCODE_CASE(FLR, FLOOR);
NV50_IR_OPCODE_CASE(ROUND, CVT);
NV50_IR_OPCODE_CASE(EX2, EX2);
NV50_IR_OPCODE_CASE(LG2, LG2);
NV50_IR_OPCODE_CASE(POW, POW);
 
NV50_IR_OPCODE_CASE(ABS, ABS);
 
NV50_IR_OPCODE_CASE(COS, COS);
NV50_IR_OPCODE_CASE(DDX, DFDX);
NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
NV50_IR_OPCODE_CASE(DDY, DFDY);
NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
NV50_IR_OPCODE_CASE(KILL, DISCARD);
 
NV50_IR_OPCODE_CASE(SEQ, SET);
NV50_IR_OPCODE_CASE(SGT, SET);
NV50_IR_OPCODE_CASE(SIN, SIN);
NV50_IR_OPCODE_CASE(SLE, SET);
NV50_IR_OPCODE_CASE(SNE, SET);
NV50_IR_OPCODE_CASE(TEX, TEX);
NV50_IR_OPCODE_CASE(TXD, TXD);
NV50_IR_OPCODE_CASE(TXP, TEX);
 
NV50_IR_OPCODE_CASE(CAL, CALL);
NV50_IR_OPCODE_CASE(RET, RET);
NV50_IR_OPCODE_CASE(CMP, SLCT);
 
NV50_IR_OPCODE_CASE(TXB, TXB);
 
NV50_IR_OPCODE_CASE(DIV, DIV);
 
NV50_IR_OPCODE_CASE(TXL, TXL);
 
NV50_IR_OPCODE_CASE(CEIL, CEIL);
NV50_IR_OPCODE_CASE(I2F, CVT);
NV50_IR_OPCODE_CASE(NOT, NOT);
NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
NV50_IR_OPCODE_CASE(SHL, SHL);
 
NV50_IR_OPCODE_CASE(AND, AND);
NV50_IR_OPCODE_CASE(OR, OR);
NV50_IR_OPCODE_CASE(MOD, MOD);
NV50_IR_OPCODE_CASE(XOR, XOR);
NV50_IR_OPCODE_CASE(SAD, SAD);
NV50_IR_OPCODE_CASE(TXF, TXF);
NV50_IR_OPCODE_CASE(TXQ, TXQ);
NV50_IR_OPCODE_CASE(TG4, TXG);
NV50_IR_OPCODE_CASE(LODQ, TXLQ);
 
NV50_IR_OPCODE_CASE(EMIT, EMIT);
NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
 
NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
 
NV50_IR_OPCODE_CASE(F2I, CVT);
NV50_IR_OPCODE_CASE(FSEQ, SET);
NV50_IR_OPCODE_CASE(FSGE, SET);
NV50_IR_OPCODE_CASE(FSLT, SET);
NV50_IR_OPCODE_CASE(FSNE, SET);
NV50_IR_OPCODE_CASE(IDIV, DIV);
NV50_IR_OPCODE_CASE(IMAX, MAX);
NV50_IR_OPCODE_CASE(IMIN, MIN);
NV50_IR_OPCODE_CASE(IABS, ABS);
NV50_IR_OPCODE_CASE(INEG, NEG);
NV50_IR_OPCODE_CASE(ISGE, SET);
NV50_IR_OPCODE_CASE(ISHR, SHR);
NV50_IR_OPCODE_CASE(ISLT, SET);
NV50_IR_OPCODE_CASE(F2U, CVT);
NV50_IR_OPCODE_CASE(U2F, CVT);
NV50_IR_OPCODE_CASE(UADD, ADD);
NV50_IR_OPCODE_CASE(UDIV, DIV);
NV50_IR_OPCODE_CASE(UMAD, MAD);
NV50_IR_OPCODE_CASE(UMAX, MAX);
NV50_IR_OPCODE_CASE(UMIN, MIN);
NV50_IR_OPCODE_CASE(UMOD, MOD);
NV50_IR_OPCODE_CASE(UMUL, MUL);
NV50_IR_OPCODE_CASE(USEQ, SET);
NV50_IR_OPCODE_CASE(USGE, SET);
NV50_IR_OPCODE_CASE(USHR, SHR);
NV50_IR_OPCODE_CASE(USLT, SET);
NV50_IR_OPCODE_CASE(USNE, SET);
 
NV50_IR_OPCODE_CASE(DABS, ABS);
NV50_IR_OPCODE_CASE(DNEG, NEG);
NV50_IR_OPCODE_CASE(DADD, ADD);
NV50_IR_OPCODE_CASE(DMUL, MUL);
NV50_IR_OPCODE_CASE(DMAX, MAX);
NV50_IR_OPCODE_CASE(DMIN, MIN);
NV50_IR_OPCODE_CASE(DSLT, SET);
NV50_IR_OPCODE_CASE(DSGE, SET);
NV50_IR_OPCODE_CASE(DSEQ, SET);
NV50_IR_OPCODE_CASE(DSNE, SET);
NV50_IR_OPCODE_CASE(DRCP, RCP);
NV50_IR_OPCODE_CASE(DSQRT, SQRT);
NV50_IR_OPCODE_CASE(DMAD, MAD);
NV50_IR_OPCODE_CASE(D2I, CVT);
NV50_IR_OPCODE_CASE(D2U, CVT);
NV50_IR_OPCODE_CASE(I2D, CVT);
NV50_IR_OPCODE_CASE(U2D, CVT);
NV50_IR_OPCODE_CASE(DRSQ, RSQ);
NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
NV50_IR_OPCODE_CASE(DCEIL, CEIL);
NV50_IR_OPCODE_CASE(DFLR, FLOOR);
NV50_IR_OPCODE_CASE(DROUND, CVT);
 
NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
 
NV50_IR_OPCODE_CASE(SAMPLE, TEX);
NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
NV50_IR_OPCODE_CASE(GATHER4, TXG);
NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
 
NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
 
NV50_IR_OPCODE_CASE(TEX2, TEX);
NV50_IR_OPCODE_CASE(TXB2, TXB);
NV50_IR_OPCODE_CASE(TXL2, TXL);
 
NV50_IR_OPCODE_CASE(IBFE, EXTBF);
NV50_IR_OPCODE_CASE(UBFE, EXTBF);
NV50_IR_OPCODE_CASE(BFI, INSBF);
NV50_IR_OPCODE_CASE(BREV, EXTBF);
NV50_IR_OPCODE_CASE(POPC, POPCNT);
NV50_IR_OPCODE_CASE(LSB, BFIND);
NV50_IR_OPCODE_CASE(IMSB, BFIND);
NV50_IR_OPCODE_CASE(UMSB, BFIND);
 
NV50_IR_OPCODE_CASE(END, EXIT);
 
default:
return nv50_ir::OP_NOP;
}
}
 
static uint16_t opcodeToSubOp(uint opcode)
{
switch (opcode) {
case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL);
case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL);
case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL);
case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND;
case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR;
case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR;
case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UMUL_HI:
return NV50_IR_SUBOP_MUL_HIGH;
default:
return 0;
}
}
 
bool Instruction::checkDstSrcAliasing() const
{
if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
return false;
 
for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
if (insn->Src[s].Register.File == TGSI_FILE_NULL)
break;
if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
return true;
}
return false;
}
 
class Source
{
public:
Source(struct nv50_ir_prog_info *);
~Source();
 
public:
bool scanSource();
unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
 
public:
struct tgsi_shader_info scan;
struct tgsi_full_instruction *insns;
const struct tgsi_token *tokens;
struct nv50_ir_prog_info *info;
 
nv50_ir::DynArray tempArrays;
nv50_ir::DynArray immdArrays;
 
typedef nv50_ir::BuildUtil::Location Location;
// these registers are per-subroutine, cannot be used for parameter passing
std::set<Location> locals;
 
bool mainTempsInLMem;
 
int clipVertexOutput;
 
struct TextureView {
uint8_t target; // TGSI_TEXTURE_*
};
std::vector<TextureView> textureViews;
 
struct Resource {
uint8_t target; // TGSI_TEXTURE_*
bool raw;
uint8_t slot; // $surface index
};
std::vector<Resource> resources;
 
private:
int inferSysValDirection(unsigned sn) const;
bool scanDeclaration(const struct tgsi_full_declaration *);
bool scanInstruction(const struct tgsi_full_instruction *);
void scanProperty(const struct tgsi_full_property *);
void scanImmediate(const struct tgsi_full_immediate *);
 
inline bool isEdgeFlagPassthrough(const Instruction&) const;
};
 
Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
{
tokens = (const struct tgsi_token *)info->bin.source;
 
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0);
 
mainTempsInLMem = FALSE;
}
 
Source::~Source()
{
if (insns)
FREE(insns);
 
if (info->immd.data)
FREE(info->immd.data);
if (info->immd.type)
FREE(info->immd.type);
}
 
bool Source::scanSource()
{
unsigned insnCount = 0;
struct tgsi_parse_context parse;
 
tgsi_scan_shader(tokens, &scan);
 
insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
sizeof(insns[0]));
if (!insns)
return false;
 
clipVertexOutput = -1;
 
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
 
info->immd.bufSize = 0;
 
info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
 
if (info->type == PIPE_SHADER_FRAGMENT) {
info->prop.fp.writesDepth = scan.writes_z;
info->prop.fp.usesDiscard = scan.uses_kill;
} else
if (info->type == PIPE_SHADER_GEOMETRY) {
info->prop.gp.instanceCount = 1; // default value
}
 
info->io.viewportId = -1;
 
info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
 
tgsi_parse_init(&parse, tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
 
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
scanImmediate(&parse.FullToken.FullImmediate);
break;
case TGSI_TOKEN_TYPE_DECLARATION:
scanDeclaration(&parse.FullToken.FullDeclaration);
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
insns[insnCount++] = parse.FullToken.FullInstruction;
scanInstruction(&parse.FullToken.FullInstruction);
break;
case TGSI_TOKEN_TYPE_PROPERTY:
scanProperty(&parse.FullToken.FullProperty);
break;
default:
INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
break;
}
}
tgsi_parse_free(&parse);
 
if (mainTempsInLMem)
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
 
if (info->io.genUserClip > 0) {
info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
 
const unsigned int nOut = (info->io.genUserClip + 3) / 4;
 
for (unsigned int n = 0; n < nOut; ++n) {
unsigned int i = info->numOutputs++;
info->out[i].id = i;
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
info->out[i].si = n;
info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
}
}
 
return info->assignSlots(info) == 0;
}
 
void Source::scanProperty(const struct tgsi_full_property *prop)
{
switch (prop->Property.PropertyName) {
case TGSI_PROPERTY_GS_OUTPUT_PRIM:
info->prop.gp.outputPrim = prop->u[0].Data;
break;
case TGSI_PROPERTY_GS_INPUT_PRIM:
info->prop.gp.inputPrim = prop->u[0].Data;
break;
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
info->prop.gp.maxVertices = prop->u[0].Data;
break;
case TGSI_PROPERTY_GS_INVOCATIONS:
info->prop.gp.instanceCount = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
info->prop.fp.separateFragData = TRUE;
break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
// we don't care
break;
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
info->io.genUserClip = -1;
break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
}
}
 
void Source::scanImmediate(const struct tgsi_full_immediate *imm)
{
const unsigned n = info->immd.count++;
 
assert(n < scan.immediate_count);
 
for (int c = 0; c < 4; ++c)
info->immd.data[n * 4 + c] = imm->u[c].Uint;
 
info->immd.type[n] = imm->Immediate.DataType;
}
 
int Source::inferSysValDirection(unsigned sn) const
{
switch (sn) {
case TGSI_SEMANTIC_INSTANCEID:
case TGSI_SEMANTIC_VERTEXID:
return 1;
case TGSI_SEMANTIC_LAYER:
#if 0
case TGSI_SEMANTIC_VIEWPORTINDEX:
return 0;
#endif
case TGSI_SEMANTIC_PRIMID:
return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
default:
return 0;
}
}
 
bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
{
unsigned i, c;
unsigned sn = TGSI_SEMANTIC_GENERIC;
unsigned si = 0;
const unsigned first = decl->Range.First, last = decl->Range.Last;
 
if (decl->Declaration.Semantic) {
sn = decl->Semantic.Name;
si = decl->Semantic.Index;
}
 
if (decl->Declaration.Local) {
for (i = first; i <= last; ++i) {
for (c = 0; c < 4; ++c) {
locals.insert(
Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
}
}
}
 
switch (decl->Declaration.File) {
case TGSI_FILE_INPUT:
if (info->type == PIPE_SHADER_VERTEX) {
// all vertex attributes are equal
for (i = first; i <= last; ++i) {
info->in[i].sn = TGSI_SEMANTIC_GENERIC;
info->in[i].si = i;
}
} else {
for (i = first; i <= last; ++i, ++si) {
info->in[i].id = i;
info->in[i].sn = sn;
info->in[i].si = si;
if (info->type == PIPE_SHADER_FRAGMENT) {
// translate interpolation mode
switch (decl->Interp.Interpolate) {
case TGSI_INTERPOLATE_CONSTANT:
info->in[i].flat = 1;
break;
case TGSI_INTERPOLATE_COLOR:
info->in[i].sc = 1;
break;
case TGSI_INTERPOLATE_LINEAR:
info->in[i].linear = 1;
break;
default:
break;
}
if (decl->Interp.Location || info->io.sampleInterp)
info->in[i].centroid = 1;
}
}
}
break;
case TGSI_FILE_OUTPUT:
for (i = first; i <= last; ++i, ++si) {
switch (sn) {
case TGSI_SEMANTIC_POSITION:
if (info->type == PIPE_SHADER_FRAGMENT)
info->io.fragDepth = i;
else
if (clipVertexOutput < 0)
clipVertexOutput = i;
break;
case TGSI_SEMANTIC_COLOR:
if (info->type == PIPE_SHADER_FRAGMENT)
info->prop.fp.numColourResults++;
break;
case TGSI_SEMANTIC_EDGEFLAG:
info->io.edgeFlagOut = i;
break;
case TGSI_SEMANTIC_CLIPVERTEX:
clipVertexOutput = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
info->io.clipDistanceMask |=
decl->Declaration.UsageMask << (si * 4);
info->io.genUserClip = -1;
break;
case TGSI_SEMANTIC_SAMPLEMASK:
info->io.sampleMask = i;
break;
case TGSI_SEMANTIC_VIEWPORT_INDEX:
info->io.viewportId = i;
break;
default:
break;
}
info->out[i].id = i;
info->out[i].sn = sn;
info->out[i].si = si;
}
break;
case TGSI_FILE_SYSTEM_VALUE:
switch (sn) {
case TGSI_SEMANTIC_INSTANCEID:
info->io.instanceId = first;
break;
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
default:
break;
}
for (i = first; i <= last; ++i, ++si) {
info->sv[i].sn = sn;
info->sv[i].si = si;
info->sv[i].input = inferSysValDirection(sn);
}
break;
case TGSI_FILE_RESOURCE:
for (i = first; i <= last; ++i) {
resources[i].target = decl->Resource.Resource;
resources[i].raw = decl->Resource.Raw;
resources[i].slot = i;
}
break;
case TGSI_FILE_SAMPLER_VIEW:
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
break;
case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
return false;
}
return true;
}
 
inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
{
return insn.getOpcode() == TGSI_OPCODE_MOV &&
insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
}
 
bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
{
Instruction insn(inst);
 
if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
info->numBarriers = 1;
 
if (insn.dstCount()) {
if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
Instruction::DstRegister dst = insn.getDst(0);
 
if (dst.isIndirect(0))
for (unsigned i = 0; i < info->numOutputs; ++i)
info->out[i].mask = 0xf;
else
info->out[dst.getIndex(0)].mask |= dst.getMask();
 
if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
info->out[dst.getIndex(0)].mask &= 1;
 
if (isEdgeFlagPassthrough(insn))
info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
} else
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
mainTempsInLMem = TRUE;
}
}
 
for (unsigned s = 0; s < insn.srcCount(); ++s) {
Instruction::SrcRegister src = insn.getSrc(s);
if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0))
mainTempsInLMem = TRUE;
} else
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
}
if (src.getFile() != TGSI_FILE_INPUT)
continue;
unsigned mask = insn.srcMask(s);
 
if (src.isIndirect(0)) {
for (unsigned i = 0; i < info->numInputs; ++i)
info->in[i].mask = 0xf;
} else {
const int i = src.getIndex(0);
for (unsigned c = 0; c < 4; ++c) {
if (!(mask & (1 << c)))
continue;
int k = src.getSwizzle(c);
if (k <= TGSI_SWIZZLE_W)
info->in[i].mask |= 1 << k;
}
switch (info->in[i].sn) {
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_PRIMID:
case TGSI_SEMANTIC_FOG:
info->in[i].mask &= 0x1;
break;
case TGSI_SEMANTIC_PCOORD:
info->in[i].mask &= 0x3;
break;
default:
break;
}
}
}
return true;
}
 
nv50_ir::TexInstruction::Target
Instruction::getTexture(const tgsi::Source *code, int s) const
{
// XXX: indirect access
unsigned int r;
 
switch (getSrc(s).getFile()) {
case TGSI_FILE_RESOURCE:
r = getSrc(s).getIndex(0);
return translateTexture(code->resources.at(r).target);
case TGSI_FILE_SAMPLER_VIEW:
r = getSrc(s).getIndex(0);
return translateTexture(code->textureViews.at(r).target);
default:
return translateTexture(insn->Texture.Texture);
}
}
 
} // namespace tgsi
 
namespace {
 
using namespace nv50_ir;
 
class Converter : public BuildUtil
{
public:
Converter(Program *, const tgsi::Source *);
~Converter();
 
bool run();
 
private:
struct Subroutine
{
Subroutine(Function *f) : f(f) { }
Function *f;
ValueMap values;
};
 
Value *shiftAddress(Value *);
Value *getVertexBase(int s);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
Value *acquireDst(int d, int c);
void storeDst(int d, int c, Value *);
 
Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
void storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr);
 
Value *applySrcMod(Value *, int s, int c);
 
Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
 
bool handleInstruction(const struct tgsi_full_instruction *);
void exportOutputs();
inline Subroutine *getSubroutine(unsigned ip);
inline Subroutine *getSubroutine(Function *);
inline bool isEndOfSubroutine(uint ip);
 
void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
 
// R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
void handleTXF(Value *dst0[4], int R, int L_M);
void handleTXQ(Value *dst0[4], enum TexQuery);
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
 
Symbol *getResourceBase(int r);
void getResourceCoords(std::vector<Value *>&, int r, int s);
 
void handleLOAD(Value *dst0[4]);
void handleSTORE();
void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
 
void handleINTERP(Value *dst0[4]);
 
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
 
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
 
Value *buildDot(int dim);
 
class BindArgumentsPass : public Pass {
public:
BindArgumentsPass(Converter &conv) : conv(conv) { }
 
private:
Converter &conv;
Subroutine *sub;
 
inline const Location *getValueLocation(Subroutine *, Value *);
 
template<typename T> inline void
updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
T (Function::*proto));
 
template<typename T> inline void
updatePrototype(BitSet *set, void (Function::*updateSet)(),
T (Function::*proto));
 
protected:
bool visit(Function *);
bool visit(BasicBlock *bb) { return false; }
};
 
private:
const tgsi::Source *code;
const struct nv50_ir_prog_info *info;
 
struct {
std::map<unsigned, Subroutine> map;
Subroutine *cur;
} sub;
 
uint ip; // instruction pointer
 
tgsi::Instruction tgsi;
 
DataType dstTy;
DataType srcTy;
 
DataArray tData; // TGSI_FILE_TEMPORARY
DataArray aData; // TGSI_FILE_ADDRESS
DataArray pData; // TGSI_FILE_PREDICATE
DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
 
Value *zero;
Value *fragCoord[4];
Value *clipVtx[4];
 
Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
uint8_t vtxBaseValid;
 
Stack condBBs; // fork BB, then else clause BB
Stack joinBBs; // fork BB, for inserting join ops on ENDIF
Stack loopBBs; // loop headers
Stack breakBBs; // end of / after loop
 
Value *viewport;
};
 
Symbol *
Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
{
const int swz = src.getSwizzle(c);
 
/* TODO: Use Array ID when it's available for the index */
return makeSym(src.getFile(),
src.is2D() ? src.getIndex(1) : 0,
src.getIndex(0), swz,
src.getIndex(0) * 16 + swz * 4);
}
 
Symbol *
Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
{
/* TODO: Use Array ID when it's available for the index */
return makeSym(dst.getFile(),
dst.is2D() ? dst.getIndex(1) : 0,
dst.getIndex(0), c,
dst.getIndex(0) * 16 + c * 4);
}
 
Symbol *
Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
{
Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
 
sym->reg.fileIndex = fileIdx;
 
if (idx >= 0) {
if (sym->reg.file == FILE_SHADER_INPUT)
sym->setOffset(info->in[idx].slot[c] * 4);
else
if (sym->reg.file == FILE_SHADER_OUTPUT)
sym->setOffset(info->out[idx].slot[c] * 4);
else
if (sym->reg.file == FILE_SYSTEM_VALUE)
sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
else
sym->setOffset(address);
} else {
sym->setOffset(address);
}
return sym;
}
 
static inline uint8_t
translateInterpMode(const struct nv50_ir_varying *var, operation& op)
{
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
 
if (var->flat)
mode = NV50_IR_INTERP_FLAT;
else
if (var->linear)
mode = NV50_IR_INTERP_LINEAR;
else
if (var->sc)
mode = NV50_IR_INTERP_SC;
 
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
? OP_PINTERP : OP_LINTERP;
 
if (var->centroid)
mode |= NV50_IR_INTERP_CENTROID;
 
return mode;
}
 
Value *
Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
operation op;
 
// XXX: no way to know interpolation mode if we don't know what's accessed
const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
src.getIndex(0)], op);
 
Instruction *insn = new_Instruction(func, op, TYPE_F32);
 
insn->setDef(0, getScratch());
insn->setSrc(0, srcToSym(src, c));
if (op == OP_PINTERP)
insn->setSrc(1, fragCoord[3]);
if (ptr)
insn->setIndirect(0, 0, ptr);
 
insn->setInterpolate(mode);
 
bb->insertTail(insn);
return insn->getDef(0);
}
 
Value *
Converter::applySrcMod(Value *val, int s, int c)
{
Modifier m = tgsi.getSrc(s).getMod(c);
DataType ty = tgsi.inferSrcType();
 
if (m & Modifier(NV50_IR_MOD_ABS))
val = mkOp1v(OP_ABS, ty, getScratch(), val);
 
if (m & Modifier(NV50_IR_MOD_NEG))
val = mkOp1v(OP_NEG, ty, getScratch(), val);
 
return val;
}
 
Value *
Converter::getVertexBase(int s)
{
assert(s < 5);
if (!(vtxBaseValid & (1 << s))) {
const int index = tgsi.getSrc(s).getIndex(1);
Value *rel = NULL;
if (tgsi.getSrc(s).isIndirect(1))
rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
vtxBaseValid |= 1 << s;
vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
mkImm(index), rel);
}
return vtxBase[s];
}
 
Value *
Converter::fetchSrc(int s, int c)
{
Value *res;
Value *ptr = NULL, *dimRel = NULL;
 
tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
 
if (src.isIndirect(0))
ptr = fetchSrc(src.getIndirect(0), 0, NULL);
 
if (src.is2D()) {
switch (src.getFile()) {
case TGSI_FILE_INPUT:
dimRel = getVertexBase(s);
break;
case TGSI_FILE_CONSTANT:
// on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
if (src.isIndirect(1))
dimRel = fetchSrc(src.getIndirect(1), 0, 0);
break;
default:
break;
}
}
 
res = fetchSrc(src, c, ptr);
 
if (dimRel)
res->getInsn()->setIndirect(0, 1, dimRel);
 
return applySrcMod(res, s, c);
}
 
Converter::DataArray *
Converter::getArrayForFile(unsigned file, int idx)
{
switch (file) {
case TGSI_FILE_TEMPORARY:
return &tData;
case TGSI_FILE_PREDICATE:
return &pData;
case TGSI_FILE_ADDRESS:
return &aData;
case TGSI_FILE_OUTPUT:
assert(prog->getType() == Program::TYPE_FRAGMENT);
return &oData;
default:
assert(!"invalid/unhandled TGSI source file");
return NULL;
}
}
 
Value *
Converter::shiftAddress(Value *index)
{
if (!index)
return NULL;
return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
}
 
Value *
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
{
const int idx2d = src.is2D() ? src.getIndex(1) : 0;
const int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
 
switch (src.getFile()) {
case TGSI_FILE_IMMEDIATE:
assert(!ptr);
return loadImm(NULL, info->immd.data[idx * 4 + swz]);
case TGSI_FILE_CONSTANT:
return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
case TGSI_FILE_INPUT:
if (prog->getType() == Program::TYPE_FRAGMENT) {
// don't load masked inputs, won't be assigned a slot
if (!ptr && !(info->in[idx].mask & (1 << swz)))
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
return interpolate(src, c, shiftAddress(ptr));
} else
if (prog->getType() == Program::TYPE_GEOMETRY) {
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
// XXX: This is going to be a problem with scalar arrays, i.e. when
// we cannot assume that the address is given in units of vec4.
//
// nv50 and nvc0 need different things here, so let the lowering
// passes decide what to do with the address
if (ptr)
return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
}
return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
case TGSI_FILE_OUTPUT:
assert(!"load from output file");
return NULL;
case TGSI_FILE_SYSTEM_VALUE:
assert(!ptr);
return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
default:
return getArrayForFile(src.getFile(), idx2d)->load(
sub.cur->values, idx, swz, shiftAddress(ptr));
}
}
 
Value *
Converter::acquireDst(int d, int c)
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
const unsigned f = dst.getFile();
const int idx = dst.getIndex(0);
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
 
if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
return NULL;
 
if (dst.isIndirect(0) ||
f == TGSI_FILE_SYSTEM_VALUE ||
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
return getScratch();
 
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
}
 
void
Converter::storeDst(int d, int c, Value *val)
{
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
 
switch (tgsi.getSaturate()) {
case TGSI_SAT_NONE:
break;
case TGSI_SAT_ZERO_ONE:
mkOp1(OP_SAT, dstTy, val, val);
break;
case TGSI_SAT_MINUS_PLUS_ONE:
mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
break;
default:
assert(!"invalid saturation mode");
break;
}
 
Value *ptr = NULL;
if (dst.isIndirect(0))
ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
 
if (info->io.genUserClip > 0 &&
dst.getFile() == TGSI_FILE_OUTPUT &&
!dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
mkMov(clipVtx[c], val);
val = clipVtx[c];
}
 
storeDst(dst, c, val, ptr);
}
 
void
Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
Value *val, Value *ptr)
{
const unsigned f = dst.getFile();
const int idx = dst.getIndex(0);
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
 
if (f == TGSI_FILE_SYSTEM_VALUE) {
assert(!ptr);
mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
} else
if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
 
if (ptr || (info->out[idx].mask & (1 << c))) {
/* Save the viewport index into a scratch register so that it can be
exported at EMIT time */
if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
viewport != NULL)
mkOp1(OP_MOV, TYPE_U32, viewport, val);
else
mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
}
} else
if (f == TGSI_FILE_TEMPORARY ||
f == TGSI_FILE_PREDICATE ||
f == TGSI_FILE_ADDRESS ||
f == TGSI_FILE_OUTPUT) {
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
} else {
assert(!"invalid dst file");
}
}
 
#define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
for (chan = 0; chan < 4; ++chan) \
if (!inst.getDst(d).isMasked(chan))
 
Value *
Converter::buildDot(int dim)
{
assert(dim > 0);
 
Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
Value *dotp = getScratch();
 
mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
 
for (int c = 1; c < dim; ++c) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
}
return dotp;
}
 
void
Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
{
FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
join->fixed = 1;
conv->insertHead(join);
 
fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
fork->insertBefore(fork->getExit(), fork->joinAt);
}
 
void
Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
{
unsigned rIdx = 0, sIdx = 0;
 
if (R >= 0)
rIdx = tgsi.getSrc(R).getIndex(0);
if (S >= 0)
sIdx = tgsi.getSrc(S).getIndex(0);
 
tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
 
if (tgsi.getSrc(R).isIndirect(0)) {
tex->tex.rIndirectSrc = s;
tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
}
if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
tex->tex.sIndirectSrc = s;
tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
}
}
 
void
Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
{
TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
tex->tex.query = query;
unsigned int c, d;
 
for (d = 0, c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
tex->tex.mask |= 1 << c;
tex->setDef(d++, dst0[c]);
}
tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
 
setTexRS(tex, c, 1, -1);
 
bb->insertTail(tex);
}
 
void
Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
{
Value *proj = fetchSrc(0, 3);
Instruction *insn = proj->getUniqueInsn();
int c;
 
if (insn->op == OP_PINTERP) {
bb->insertTail(insn = cloneForward(func, insn));
insn->op = OP_LINTERP;
insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
insn->setSrc(1, NULL);
proj = insn->getDef(0);
}
proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
 
for (c = 0; c < 4; ++c) {
if (!(mask & (1 << c)))
continue;
if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
continue;
mask &= ~(1 << c);
 
bb->insertTail(insn = cloneForward(func, insn));
insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
insn->setSrc(1, proj);
dst[c] = insn->getDef(0);
}
if (!mask)
return;
 
proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
 
for (c = 0; c < 4; ++c)
if (mask & (1 << c))
dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
}
 
// order of nv50 ir sources: x y z layer lod/bias shadow
// order of TGSI TEX sources: x y z layer shadow lod/bias
// lowering will finally set the hw specific order (like array first on nvc0)
void
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
{
Value *val;
Value *arg[4], *src[8];
Value *lod = NULL, *shd = NULL;
unsigned int s, c, d;
TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
 
TexInstruction::Target tgt = tgsi.getTexture(code, R);
 
for (s = 0; s < tgt.getArgCount(); ++s)
arg[s] = src[s] = fetchSrc(0, s);
 
if (texi->op == OP_TXL || texi->op == OP_TXB)
lod = fetchSrc(L >> 4, L & 3);
 
if (C == 0x0f)
C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
 
if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
shd = fetchSrc(1, 0);
else if (tgt.isShadow())
shd = fetchSrc(C >> 4, C & 3);
 
if (texi->op == OP_TXD) {
for (c = 0; c < tgt.getDim(); ++c) {
texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
}
}
 
// cube textures don't care about projection value, it's divided out
if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
unsigned int n = tgt.getDim();
if (shd) {
arg[n] = shd;
++n;
assert(tgt.getDim() == tgt.getArgCount());
}
loadProjTexCoords(src, arg, (1 << n) - 1);
if (shd)
shd = src[n - 1];
}
 
if (tgt.isCube()) {
for (c = 0; c < 3; ++c)
src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
val = getScratch();
mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
mkOp1(OP_RCP, TYPE_F32, val, val);
for (c = 0; c < 3; ++c)
src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
}
 
for (c = 0, d = 0; c < 4; ++c) {
if (dst[c]) {
texi->setDef(d++, dst[c]);
texi->tex.mask |= 1 << c;
} else {
// NOTE: maybe hook up def too, for CSE
}
}
for (s = 0; s < tgt.getArgCount(); ++s)
texi->setSrc(s, src[s]);
if (lod)
texi->setSrc(s++, lod);
if (shd)
texi->setSrc(s++, shd);
 
setTexRS(texi, s, R, S);
 
if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
texi->tex.levelZero = true;
if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
 
texi->tex.useOffsets = tgsi.getNumTexOffsets();
for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
for (c = 0; c < 3; ++c) {
texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
texi->offset[s][c].setInsn(texi);
}
}
 
bb->insertTail(texi);
}
 
// 1st source: xyz = coordinates, w = lod/sample
// 2nd source: offset
void
Converter::handleTXF(Value *dst[4], int R, int L_M)
{
TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
int ms;
unsigned int c, d, s;
 
texi->tex.target = tgsi.getTexture(code, R);
 
ms = texi->tex.target.isMS() ? 1 : 0;
texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
 
for (c = 0, d = 0; c < 4; ++c) {
if (dst[c]) {
texi->setDef(d++, dst[c]);
texi->tex.mask |= 1 << c;
}
}
for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
texi->setSrc(c, fetchSrc(0, c));
texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
 
setTexRS(texi, c, R, -1);
 
texi->tex.useOffsets = tgsi.getNumTexOffsets();
for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
for (c = 0; c < 3; ++c) {
texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
texi->offset[s][c].setInsn(texi);
}
}
 
bb->insertTail(texi);
}
 
void
Converter::handleLIT(Value *dst0[4])
{
Value *val0 = NULL;
unsigned int mask = tgsi.getDst(0).getMask();
 
if (mask & (1 << 0))
loadImm(dst0[0], 1.0f);
 
if (mask & (1 << 3))
loadImm(dst0[3], 1.0f);
 
if (mask & (3 << 1)) {
val0 = getScratch();
mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
if (mask & (1 << 1))
mkMov(dst0[1], val0);
}
 
if (mask & (1 << 2)) {
Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
Value *val1 = getScratch(), *val3 = getScratch();
 
Value *pos128 = loadImm(NULL, +127.999999f);
Value *neg128 = loadImm(NULL, -127.999999f);
 
mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
 
mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
}
}
 
static inline bool
isResourceSpecial(const int r)
{
return (r == TGSI_RESOURCE_GLOBAL ||
r == TGSI_RESOURCE_LOCAL ||
r == TGSI_RESOURCE_PRIVATE ||
r == TGSI_RESOURCE_INPUT);
}
 
static inline bool
isResourceRaw(const tgsi::Source *code, const int r)
{
return isResourceSpecial(r) || code->resources[r].raw;
}
 
static inline nv50_ir::TexTarget
getResourceTarget(const tgsi::Source *code, int r)
{
if (isResourceSpecial(r))
return nv50_ir::TEX_TARGET_BUFFER;
return tgsi::translateTexture(code->resources.at(r).target);
}
 
Symbol *
Converter::getResourceBase(const int r)
{
Symbol *sym = NULL;
 
switch (r) {
case TGSI_RESOURCE_GLOBAL:
sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
break;
case TGSI_RESOURCE_LOCAL:
assert(prog->getType() == Program::TYPE_COMPUTE);
sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
info->prop.cp.sharedOffset);
break;
case TGSI_RESOURCE_PRIVATE:
sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
info->bin.tlsSpace);
break;
case TGSI_RESOURCE_INPUT:
assert(prog->getType() == Program::TYPE_COMPUTE);
sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
info->prop.cp.inputOffset);
break;
default:
sym = new_Symbol(prog,
nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
break;
}
return sym;
}
 
void
Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
{
const int arg =
TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
 
for (int c = 0; c < arg; ++c)
coords.push_back(fetchSrc(s, c));
 
// NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
if (r == TGSI_RESOURCE_LOCAL ||
r == TGSI_RESOURCE_PRIVATE ||
r == TGSI_RESOURCE_INPUT)
coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
coords[0]);
}
 
static inline int
partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
{
int n = 0;
 
while (mask) {
if (mask & 1) {
size[n]++;
} else {
if (size[n])
comp[n = 1] = size[0] + 1;
else
comp[n]++;
}
mask >>= 1;
}
if (size[0] == 3) {
n = 1;
size[0] = (comp[0] == 1) ? 1 : 2;
size[1] = 3 - size[0];
comp[1] = comp[0] + size[0];
}
return n + 1;
}
 
// For raw loads, granularity is 4 byte.
// Usage of the texture read mask on OP_SULDP is not allowed.
void
Converter::handleLOAD(Value *dst0[4])
{
const int r = tgsi.getSrc(0).getIndex(0);
int c;
std::vector<Value *> off, src, ldv, def;
 
getResourceCoords(off, r, 1);
 
if (isResourceRaw(code, r)) {
uint8_t mask = 0;
uint8_t comp[2] = { 0, 0 };
uint8_t size[2] = { 0, 0 };
 
Symbol *base = getResourceBase(r);
 
// determine the base and size of the at most 2 load ops
for (c = 0; c < 4; ++c)
if (!tgsi.getDst(0).isMasked(c))
mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
 
int n = partitionLoadStore(comp, size, mask);
 
src = off;
 
def.resize(4); // index by component, the ones we need will be non-NULL
for (c = 0; c < 4; ++c) {
if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
def[c] = dst0[c];
else
if (mask & (1 << c))
def[c] = getScratch();
}
 
const bool useLd = isResourceSpecial(r) ||
(info->io.nv50styleSurfaces &&
code->resources[r].target == TGSI_TEXTURE_BUFFER);
 
for (int i = 0; i < n; ++i) {
ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
 
if (comp[i]) // adjust x component of source address if necessary
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
off[0], mkImm(comp[i] * 4));
else
src[0] = off[0];
 
if (useLd) {
Instruction *ld =
mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
for (size_t c = 1; c < ldv.size(); ++c)
ld->setDef(c, ldv[c]);
} else {
mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
0, ldv, src)->dType = typeOfSize(size[i] * 4);
}
}
} else {
def.resize(4);
for (c = 0; c < 4; ++c) {
if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
def[c] = getScratch();
else
def[c] = dst0[c];
}
 
mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
def, off);
}
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
if (dst0[c] != def[c])
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
}
 
// For formatted stores, the write mask on OP_SUSTP can be used.
// Raw stores have to be split.
void
Converter::handleSTORE()
{
const int r = tgsi.getDst(0).getIndex(0);
int c;
std::vector<Value *> off, src, dummy;
 
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
 
if (isResourceRaw(code, r)) {
uint8_t comp[2] = { 0, 0 };
uint8_t size[2] = { 0, 0 };
 
int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
 
Symbol *base = getResourceBase(r);
 
const bool useSt = isResourceSpecial(r) ||
(info->io.nv50styleSurfaces &&
code->resources[r].target == TGSI_TEXTURE_BUFFER);
 
for (int i = 0; i < n; ++i) {
if (comp[i]) // adjust x component of source address if necessary
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
off[0], mkImm(comp[i] * 4));
else
src[0] = off[0];
 
const DataType stTy = typeOfSize(size[i] * 4);
 
if (useSt) {
Instruction *st =
mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
for (c = 1; c < size[i]; ++c)
st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
st->setIndirect(0, 0, src[0]);
} else {
// attach values to be stored
src.resize(s + size[i]);
for (c = 0; c < size[i]; ++c)
src[s + c] = fetchSrc(1, comp[i] + c);
mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
0, dummy, src)->setType(stTy);
}
}
} else {
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
src.push_back(fetchSrc(1, c));
 
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
dummy, src)->tex.mask = tgsi.getDst(0).getMask();
}
}
 
// XXX: These only work on resources with the single-component u32/s32 formats.
// Therefore the result is replicated. This might not be intended by TGSI, but
// operating on more than 1 component would produce undefined results because
// they do not exist.
void
Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
{
const int r = tgsi.getSrc(0).getIndex(0);
std::vector<Value *> srcv;
std::vector<Value *> defv;
LValue *dst = getScratch();
 
getResourceCoords(srcv, r, 1);
 
if (isResourceSpecial(r)) {
assert(r != TGSI_RESOURCE_INPUT);
Instruction *insn;
insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
insn->subOp = subOp;
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
insn->setSrc(2, fetchSrc(3, 0));
insn->setIndirect(0, 0, srcv.at(0));
} else {
operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
TexTarget targ = getResourceTarget(code, r);
int idx = code->resources[r].slot;
defv.push_back(dst);
srcv.push_back(fetchSrc(2, 0));
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
srcv.push_back(fetchSrc(3, 0));
TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
tex->subOp = subOp;
tex->tex.mask = 1;
tex->setType(ty);
}
 
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
}
 
void
Converter::handleINTERP(Value *dst[4])
{
// Check whether the input is linear. All other attributes ignored.
Instruction *insn;
Value *offset = NULL, *ptr = NULL, *w = NULL;
bool linear;
operation op;
int c, mode;
 
tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
assert(src.getFile() == TGSI_FILE_INPUT);
 
if (src.isIndirect(0))
ptr = fetchSrc(src.getIndirect(0), 0, NULL);
 
// XXX: no way to know interp mode if we don't know the index
linear = info->in[ptr ? 0 : src.getIndex(0)].linear;
if (linear) {
op = OP_LINTERP;
mode = NV50_IR_INTERP_LINEAR;
} else {
op = OP_PINTERP;
mode = NV50_IR_INTERP_PERSPECTIVE;
}
 
switch (tgsi.getOpcode()) {
case TGSI_OPCODE_INTERP_CENTROID:
mode |= NV50_IR_INTERP_CENTROID;
break;
case TGSI_OPCODE_INTERP_SAMPLE:
insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
mode |= NV50_IR_INTERP_OFFSET;
break;
case TGSI_OPCODE_INTERP_OFFSET: {
// The input in src1.xy is float, but we need a single 32-bit value
// where the upper and lower 16 bits are encoded in S0.12 format. We need
// to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
// and then convert to s32.
Value *offs[2];
for (c = 0; c < 2; c++) {
offs[c] = fetchSrc(1, c);
mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f));
mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
}
offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
offs[1], mkImm(0x1010), offs[0]);
mode |= NV50_IR_INTERP_OFFSET;
break;
}
}
 
if (op == OP_PINTERP) {
if (offset) {
w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
mkOp1(OP_RCP, TYPE_F32, w, w);
} else {
w = fragCoord[3];
}
}
 
 
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c));
if (op == OP_PINTERP)
insn->setSrc(1, w);
if (ptr)
insn->setIndirect(0, 0, ptr);
if (offset)
insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
 
insn->setInterpolate(mode);
}
}
 
Converter::Subroutine *
Converter::getSubroutine(unsigned ip)
{
std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
 
if (it == sub.map.end())
it = sub.map.insert(std::make_pair(
ip, Subroutine(new Function(prog, "SUB", ip)))).first;
 
return &it->second;
}
 
Converter::Subroutine *
Converter::getSubroutine(Function *f)
{
unsigned ip = f->getLabel();
std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
 
if (it == sub.map.end())
it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
 
return &it->second;
}
 
bool
Converter::isEndOfSubroutine(uint ip)
{
assert(ip < code->scan.num_instructions);
tgsi::Instruction insn(&code->insns[ip]);
return (insn.getOpcode() == TGSI_OPCODE_END ||
insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
// does END occur at end of main or the very end ?
insn.getOpcode() == TGSI_OPCODE_BGNSUB);
}
 
bool
Converter::handleInstruction(const struct tgsi_full_instruction *insn)
{
Instruction *geni;
 
Value *dst0[4], *rDst0[4];
Value *src0, *src1, *src2, *src3;
Value *val0, *val1;
int c;
 
tgsi = tgsi::Instruction(insn);
 
bool useScratchDst = tgsi.checkDstSrcAliasing();
 
operation op = tgsi.getOP();
dstTy = tgsi.inferDstType();
srcTy = tgsi.inferSrcType();
 
unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
 
if (tgsi.dstCount()) {
for (c = 0; c < 4; ++c) {
rDst0[c] = acquireDst(0, c);
dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
}
}
 
switch (tgsi.getOpcode()) {
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_AND:
case TGSI_OPCODE_DIV:
case TGSI_OPCODE_IDIV:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_MAX:
case TGSI_OPCODE_MIN:
case TGSI_OPCODE_IMAX:
case TGSI_OPCODE_IMIN:
case TGSI_OPCODE_UMAX:
case TGSI_OPCODE_UMIN:
case TGSI_OPCODE_MOD:
case TGSI_OPCODE_UMOD:
case TGSI_OPCODE_MUL:
case TGSI_OPCODE_UMUL:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_OR:
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_USHR:
case TGSI_OPCODE_SUB:
case TGSI_OPCODE_XOR:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
geni = mkOp2(op, dstTy, dst0[c], src0, src1);
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
}
break;
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_UMAD:
case TGSI_OPCODE_SAD:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
mkOp3(op, dstTy, dst0[c], src0, src1, src2);
}
break;
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_ABS:
case TGSI_OPCODE_CEIL:
case TGSI_OPCODE_FLR:
case TGSI_OPCODE_TRUNC:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_IABS:
case TGSI_OPCODE_INEG:
case TGSI_OPCODE_NOT:
case TGSI_OPCODE_DDX:
case TGSI_OPCODE_DDY:
case TGSI_OPCODE_DDX_FINE:
case TGSI_OPCODE_DDY_FINE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
break;
case TGSI_OPCODE_RSQ:
src0 = fetchSrc(0, 0);
val0 = getScratch();
mkOp1(OP_ABS, TYPE_F32, val0, src0);
mkOp1(OP_RSQ, TYPE_F32, val0, val0);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkMov(dst0[c], val0);
break;
case TGSI_OPCODE_ARL:
case TGSI_OPCODE_ARR:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
const RoundMode rnd =
tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
src0 = fetchSrc(0, c);
mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
}
break;
case TGSI_OPCODE_UARL:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
break;
case TGSI_OPCODE_POW:
val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
break;
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
break;
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
val0 = getScratch();
if (mask & 7) {
mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
mkOp1(op, TYPE_F32, val0, val0);
for (c = 0; c < 3; ++c)
if (dst0[c])
mkMov(dst0[c], val0);
}
if (dst0[3]) {
mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
mkOp1(op, TYPE_F32, dst0[3], val0);
}
break;
case TGSI_OPCODE_SCS:
if (mask & 3) {
val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
if (dst0[0])
mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
if (dst0[1])
mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
}
if (dst0[2])
loadImm(dst0[2], 0.0f);
if (dst0[3])
loadImm(dst0[3], 1.0f);
break;
case TGSI_OPCODE_EXP:
src0 = fetchSrc(0, 0);
val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
if (dst0[1])
mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
if (dst0[0])
mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
if (dst0[2])
mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
if (dst0[3])
loadImm(dst0[3], 1.0f);
break;
case TGSI_OPCODE_LOG:
src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
if (dst0[0] || dst0[1])
val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
if (dst0[1]) {
mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
}
if (dst0[3])
loadImm(dst0[3], 1.0f);
break;
case TGSI_OPCODE_DP2:
val0 = buildDot(2);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkMov(dst0[c], val0);
break;
case TGSI_OPCODE_DP3:
val0 = buildDot(3);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkMov(dst0[c], val0);
break;
case TGSI_OPCODE_DP4:
val0 = buildDot(4);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkMov(dst0[c], val0);
break;
case TGSI_OPCODE_DPH:
val0 = buildDot(3);
src1 = fetchSrc(1, 3);
mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkMov(dst0[c], val0);
break;
case TGSI_OPCODE_DST:
if (dst0[0])
loadImm(dst0[0], 1.0f);
if (dst0[1]) {
src0 = fetchSrc(0, 1);
src1 = fetchSrc(1, 1);
mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
}
if (dst0[2])
mkMov(dst0[2], fetchSrc(0, 2));
if (dst0[3])
mkMov(dst0[3], fetchSrc(1, 3));
break;
case TGSI_OPCODE_LRP:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
mkOp3(OP_MAD, TYPE_F32, dst0[c],
mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
}
break;
case TGSI_OPCODE_LIT:
handleLIT(dst0);
break;
case TGSI_OPCODE_XPD:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
if (c < 3) {
val0 = getSSA();
src0 = fetchSrc(1, (c + 1) % 3);
src1 = fetchSrc(0, (c + 2) % 3);
mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
mkOp1(OP_NEG, TYPE_F32, val0, val0);
 
src0 = fetchSrc(0, (c + 1) % 3);
src1 = fetchSrc(1, (c + 2) % 3);
mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
} else {
loadImm(dst0[c], 1.0f);
}
}
break;
case TGSI_OPCODE_ISSG:
case TGSI_OPCODE_SSG:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
val0 = getScratch();
val1 = getScratch();
mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
if (srcTy == TYPE_F32)
mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
else
mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
}
break;
case TGSI_OPCODE_UCMP:
case TGSI_OPCODE_CMP:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
if (src1 == src2)
mkMov(dst0[c], src1);
else
mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
srcTy, dst0[c], srcTy, src1, src2, src0);
}
break;
case TGSI_OPCODE_FRC:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
val0 = getScratch();
mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
}
break;
case TGSI_OPCODE_ROUND:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
->rnd = ROUND_NI;
break;
case TGSI_OPCODE_CLAMP:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
val0 = getScratch();
mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
}
break;
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_SGE:
case TGSI_OPCODE_SEQ:
case TGSI_OPCODE_SGT:
case TGSI_OPCODE_SLE:
case TGSI_OPCODE_SNE:
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_ISGE:
case TGSI_OPCODE_ISLT:
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_USGE:
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_USNE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
}
break;
case TGSI_OPCODE_KILL_IF:
val0 = new_LValue(func, FILE_PREDICATE);
mask = 0;
for (c = 0; c < 4; ++c) {
const int s = tgsi.getSrc(0).getSwizzle(c);
if (mask & (1 << s))
continue;
mask |= 1 << s;
mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
}
break;
case TGSI_OPCODE_KILL:
mkOp(OP_DISCARD, TYPE_NONE, NULL);
break;
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_LODQ:
// R S L C Dx Dy
handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
break;
case TGSI_OPCODE_TXD:
handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
break;
case TGSI_OPCODE_TG4:
handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
break;
case TGSI_OPCODE_TEX2:
handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
break;
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXL2:
handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
break;
case TGSI_OPCODE_SAMPLE:
case TGSI_OPCODE_SAMPLE_B:
case TGSI_OPCODE_SAMPLE_D:
case TGSI_OPCODE_SAMPLE_L:
case TGSI_OPCODE_SAMPLE_C:
case TGSI_OPCODE_SAMPLE_C_LZ:
handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
break;
case TGSI_OPCODE_TXF:
handleTXF(dst0, 1, 0x03);
break;
case TGSI_OPCODE_SAMPLE_I:
handleTXF(dst0, 1, 0x03);
break;
case TGSI_OPCODE_SAMPLE_I_MS:
handleTXF(dst0, 1, 0x20);
break;
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_SVIEWINFO:
handleTXQ(dst0, TXQ_DIMS);
break;
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
break;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
case TGSI_OPCODE_EMIT:
/* export the saved viewport index */
if (viewport != NULL) {
Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
info->out[info->io.viewportId].slot[0] * 4);
mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
}
/* fallthrough */
case TGSI_OPCODE_ENDPRIM:
{
// get vertex stream (must be immediate)
unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
if (stream && op == OP_RESTART)
break;
src0 = mkImm(stream);
mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
break;
}
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
{
BasicBlock *ifBB = new BasicBlock(func);
 
bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
condBBs.push(bb);
joinBBs.push(bb);
 
mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
 
setPosition(ifBB, true);
}
break;
case TGSI_OPCODE_ELSE:
{
BasicBlock *elseBB = new BasicBlock(func);
BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
 
forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
condBBs.push(bb);
 
forkBB->getExit()->asFlow()->target.bb = elseBB;
if (!bb->isTerminated())
mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
 
setPosition(elseBB, true);
}
break;
case TGSI_OPCODE_ENDIF:
{
BasicBlock *convBB = new BasicBlock(func);
BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
 
if (!bb->isTerminated()) {
// we only want join if none of the clauses ended with CONT/BREAK/RET
if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
insertConvergenceOps(convBB, forkBB);
mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
}
 
if (prevBB->getExit()->op == OP_BRA) {
prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
prevBB->getExit()->asFlow()->target.bb = convBB;
}
setPosition(convBB, true);
}
break;
case TGSI_OPCODE_BGNLOOP:
{
BasicBlock *lbgnBB = new BasicBlock(func);
BasicBlock *lbrkBB = new BasicBlock(func);
 
loopBBs.push(lbgnBB);
breakBBs.push(lbrkBB);
if (loopBBs.getSize() > func->loopNestingBound)
func->loopNestingBound++;
 
mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
 
bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
setPosition(lbgnBB, true);
mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
}
break;
case TGSI_OPCODE_ENDLOOP:
{
BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
 
if (!bb->isTerminated()) {
mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
}
setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
}
break;
case TGSI_OPCODE_BRK:
{
if (bb->isTerminated())
break;
BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
}
break;
case TGSI_OPCODE_CONT:
{
if (bb->isTerminated())
break;
BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
contBB->explicitCont = true;
bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
}
break;
case TGSI_OPCODE_BGNSUB:
{
Subroutine *s = getSubroutine(ip);
BasicBlock *entry = new BasicBlock(s->f);
BasicBlock *leave = new BasicBlock(s->f);
 
// multiple entrypoints possible, keep the graph connected
if (prog->getType() == Program::TYPE_COMPUTE)
prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
 
sub.cur = s;
s->f->setEntry(entry);
s->f->setExit(leave);
setPosition(entry, true);
return true;
}
case TGSI_OPCODE_ENDSUB:
{
sub.cur = getSubroutine(prog->main);
setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
return true;
}
case TGSI_OPCODE_CAL:
{
Subroutine *s = getSubroutine(tgsi.getLabel());
mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
func->call.attach(&s->f->call, Graph::Edge::TREE);
return true;
}
case TGSI_OPCODE_RET:
{
if (bb->isTerminated())
return true;
BasicBlock *leave = BasicBlock::get(func->cfgExit);
 
if (!isEndOfSubroutine(ip + 1)) {
// insert a PRERET at the entry if this is an early return
// (only needed for sharing code in the epilogue)
BasicBlock *pos = getBB();
setPosition(BasicBlock::get(func->cfg.getRoot()), false);
mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
setPosition(pos, true);
}
mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
}
break;
case TGSI_OPCODE_END:
{
// attach and generate epilogue code
BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
setPosition(epilogue, true);
if (prog->getType() == Program::TYPE_FRAGMENT)
exportOutputs();
if (info->io.genUserClip > 0)
handleUserClipPlanes();
mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
}
break;
case TGSI_OPCODE_SWITCH:
case TGSI_OPCODE_CASE:
ERROR("switch/case opcode encountered, should have been lowered\n");
abort();
break;
case TGSI_OPCODE_LOAD:
handleLOAD(dst0);
break;
case TGSI_OPCODE_STORE:
handleSTORE();
break;
case TGSI_OPCODE_BARRIER:
geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
geni->fixed = 1;
geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
break;
case TGSI_OPCODE_MFENCE:
case TGSI_OPCODE_LFENCE:
case TGSI_OPCODE_SFENCE:
geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
geni->fixed = 1;
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
case TGSI_OPCODE_ATOMAND:
case TGSI_OPCODE_ATOMOR:
case TGSI_OPCODE_ATOMXOR:
case TGSI_OPCODE_ATOMUMIN:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_ATOMUMAX:
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
}
break;
case TGSI_OPCODE_BFI:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
src1 = fetchSrc(1, c);
src2 = fetchSrc(2, c);
src3 = fetchSrc(3, c);
mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2);
mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0);
}
break;
case TGSI_OPCODE_LSB:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000));
geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0);
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
}
break;
case TGSI_OPCODE_IMSB:
case TGSI_OPCODE_UMSB:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
mkOp1(OP_BFIND, srcTy, dst0[c], src0);
}
break;
case TGSI_OPCODE_BREV:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
}
break;
case TGSI_OPCODE_POPC:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
}
break;
case TGSI_OPCODE_INTERP_CENTROID:
case TGSI_OPCODE_INTERP_SAMPLE:
case TGSI_OPCODE_INTERP_OFFSET:
handleINTERP(dst0);
break;
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F: {
int pos = 0;
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
Value *dreg = getSSA(8);
src0 = fetchSrc(0, pos);
src1 = fetchSrc(0, pos + 1);
mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
pos += 2;
}
break;
}
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
Value *dreg = getSSA(8);
mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
mkSplit(&dst0[c], 4, dreg);
c++;
}
break;
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DRCP:
case TGSI_OPCODE_DSQRT:
case TGSI_OPCODE_DRSQ:
case TGSI_OPCODE_DTRUNC:
case TGSI_OPCODE_DCEIL:
case TGSI_OPCODE_DFLR:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = getSSA(8);
Value *dst = getSSA(8), *tmp[2];
tmp[0] = fetchSrc(0, c);
tmp[1] = fetchSrc(0, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
mkOp1(op, dstTy, dst, src0);
mkSplit(&dst0[c], 4, dst);
c++;
}
break;
case TGSI_OPCODE_DFRAC:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = getSSA(8);
Value *dst = getSSA(8), *tmp[2];
tmp[0] = fetchSrc(0, c);
tmp[1] = fetchSrc(0, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
mkSplit(&dst0[c], 4, dst);
c++;
}
break;
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSNE: {
int pos = 0;
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
Value *tmp[2];
 
src0 = getSSA(8);
src1 = getSSA(8);
tmp[0] = fetchSrc(0, pos);
tmp[1] = fetchSrc(0, pos + 1);
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
tmp[0] = fetchSrc(1, pos);
tmp[1] = fetchSrc(1, pos + 1);
mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
pos += 2;
}
break;
}
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DMUL:
case TGSI_OPCODE_DMAX:
case TGSI_OPCODE_DMIN:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = getSSA(8);
src1 = getSSA(8);
Value *dst = getSSA(8), *tmp[2];
tmp[0] = fetchSrc(0, c);
tmp[1] = fetchSrc(0, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
tmp[0] = fetchSrc(1, c);
tmp[1] = fetchSrc(1, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
mkOp2(op, dstTy, dst, src0, src1);
mkSplit(&dst0[c], 4, dst);
c++;
}
break;
case TGSI_OPCODE_DMAD:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = getSSA(8);
src1 = getSSA(8);
src2 = getSSA(8);
Value *dst = getSSA(8), *tmp[2];
tmp[0] = fetchSrc(0, c);
tmp[1] = fetchSrc(0, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
tmp[0] = fetchSrc(1, c);
tmp[1] = fetchSrc(1, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
tmp[0] = fetchSrc(2, c);
tmp[1] = fetchSrc(2, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
mkOp3(op, dstTy, dst, src0, src1, src2);
mkSplit(&dst0[c], 4, dst);
c++;
}
break;
case TGSI_OPCODE_DROUND:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = getSSA(8);
Value *dst = getSSA(8), *tmp[2];
tmp[0] = fetchSrc(0, c);
tmp[1] = fetchSrc(0, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
->rnd = ROUND_NI;
mkSplit(&dst0[c], 4, dst);
c++;
}
break;
case TGSI_OPCODE_DSSG:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = getSSA(8);
Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
tmp[0] = fetchSrc(0, c);
tmp[1] = fetchSrc(0, c + 1);
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
 
val0 = getScratch();
val1 = getScratch();
// The zero is wrong here since it's only 32-bit, but it works out in
// the end since it gets replaced with $r63.
mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
mkSplit(&dst0[c], 4, dst);
c++;
}
break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
assert(0);
break;
}
 
if (tgsi.dstCount()) {
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
if (dst0[c] != rDst0[c])
mkMov(rDst0[c], dst0[c]);
storeDst(0, c, rDst0[c]);
}
}
vtxBaseValid = 0;
 
return true;
}
 
void
Converter::handleUserClipPlanes()
{
Value *res[8];
int n, i, c;
 
for (c = 0; c < 4; ++c) {
for (i = 0; i < info->io.genUserClip; ++i) {
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
if (c == 0)
res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
else
mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
}
}
 
const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
 
for (i = 0; i < info->io.genUserClip; ++i) {
n = i / 4 + first;
c = i % 4;
Symbol *sym =
mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
}
}
 
void
Converter::exportOutputs()
{
for (unsigned int i = 0; i < info->numOutputs; ++i) {
for (unsigned int c = 0; c < 4; ++c) {
if (!oData.exists(sub.cur->values, i, c))
continue;
Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
info->out[i].slot[c] * 4);
Value *val = oData.load(sub.cur->values, i, c, NULL);
if (val)
mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
}
}
}
 
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
code(code),
tgsi(NULL),
tData(this), aData(this), pData(this), oData(this)
{
info = code->info;
 
const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
 
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
 
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
 
zero = mkImm((uint32_t)0);
 
vtxBaseValid = 0;
}
 
Converter::~Converter()
{
}
 
inline const Converter::Location *
Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
{
ValueMap::l_iterator it = s->values.l.find(v);
return it == s->values.l.end() ? NULL : &it->second;
}
 
template<typename T> inline void
Converter::BindArgumentsPass::updateCallArgs(
Instruction *i, void (Instruction::*setArg)(int, Value *),
T (Function::*proto))
{
Function *g = i->asFlow()->target.fn;
Subroutine *subg = conv.getSubroutine(g);
 
for (unsigned a = 0; a < (g->*proto).size(); ++a) {
Value *v = (g->*proto)[a].get();
const Converter::Location &l = *getValueLocation(subg, v);
Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
 
(i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
}
}
 
template<typename T> inline void
Converter::BindArgumentsPass::updatePrototype(
BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
{
(func->*updateSet)();
 
for (unsigned i = 0; i < set->getSize(); ++i) {
Value *v = func->getLValue(i);
const Converter::Location *l = getValueLocation(sub, v);
 
// only include values with a matching TGSI register
if (set->test(i) && l && !conv.code->locals.count(*l))
(func->*proto).push_back(v);
}
}
 
bool
Converter::BindArgumentsPass::visit(Function *f)
{
sub = conv.getSubroutine(f);
 
for (ArrayList::Iterator bi = f->allBBlocks.iterator();
!bi.end(); bi.next()) {
for (Instruction *i = BasicBlock::get(bi)->getFirst();
i; i = i->next) {
if (i->op == OP_CALL && !i->asFlow()->builtin) {
updateCallArgs(i, &Instruction::setSrc, &Function::ins);
updateCallArgs(i, &Instruction::setDef, &Function::outs);
}
}
}
 
if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
return true;
updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
&Function::buildLiveSets, &Function::ins);
updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
&Function::buildDefSets, &Function::outs);
 
return true;
}
 
bool
Converter::run()
{
BasicBlock *entry = new BasicBlock(prog->main);
BasicBlock *leave = new BasicBlock(prog->main);
 
prog->main->setEntry(entry);
prog->main->setExit(leave);
 
setPosition(entry, true);
sub.cur = getSubroutine(prog->main);
 
if (info->io.genUserClip > 0) {
for (int c = 0; c < 4; ++c)
clipVtx[c] = getScratch();
}
 
if (prog->getType() == Program::TYPE_FRAGMENT) {
Symbol *sv = mkSysVal(SV_POSITION, 3);
fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
}
 
if (info->io.viewportId >= 0)
viewport = getScratch();
else
viewport = NULL;
 
for (ip = 0; ip < code->scan.num_instructions; ++ip) {
if (!handleInstruction(&code->insns[ip]))
return false;
}
 
if (!BindArgumentsPass(*this).run(prog))
return false;
 
return true;
}
 
} // unnamed namespace
 
namespace nv50_ir {
 
bool
Program::makeFromTGSI(struct nv50_ir_prog_info *info)
{
tgsi::Source src(info);
if (!src.scanSource())
return false;
tlsSize = info->bin.tlsSpace;
 
Converter builder(this, &src);
return builder.run();
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
0,0 → 1,446
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_graph.h"
#include <limits>
#include <list>
#include <stack>
#include "codegen/nv50_ir.h"
 
namespace nv50_ir {
 
Graph::Graph()
{
root = NULL;
size = 0;
sequence = 0;
}
 
Graph::~Graph()
{
for (IteratorRef it = safeIteratorDFS(); !it->end(); it->next())
reinterpret_cast<Node *>(it->get())->cut();
}
 
void Graph::insert(Node *node)
{
if (!root)
root = node;
 
node->graph = this;
size++;
}
 
void Graph::Edge::unlink()
{
if (origin) {
prev[0]->next[0] = next[0];
next[0]->prev[0] = prev[0];
if (origin->out == this)
origin->out = (next[0] == this) ? NULL : next[0];
 
--origin->outCount;
}
if (target) {
prev[1]->next[1] = next[1];
next[1]->prev[1] = prev[1];
if (target->in == this)
target->in = (next[1] == this) ? NULL : next[1];
 
--target->inCount;
}
}
 
const char *Graph::Edge::typeStr() const
{
switch (type) {
case TREE: return "tree";
case FORWARD: return "forward";
case BACK: return "back";
case CROSS: return "cross";
case DUMMY: return "dummy";
case UNKNOWN:
default:
return "unk";
}
}
 
Graph::Node::Node(void *priv) : data(priv),
in(0), out(0), graph(0),
visited(0),
inCount(0), outCount(0)
{
// nothing to do
}
 
void Graph::Node::attach(Node *node, Edge::Type kind)
{
Edge *edge = new Edge(this, node, kind);
 
// insert head
if (this->out) {
edge->next[0] = this->out;
edge->prev[0] = this->out->prev[0];
edge->prev[0]->next[0] = edge;
this->out->prev[0] = edge;
}
this->out = edge;
 
if (node->in) {
edge->next[1] = node->in;
edge->prev[1] = node->in->prev[1];
edge->prev[1]->next[1] = edge;
node->in->prev[1] = edge;
}
node->in = edge;
 
++this->outCount;
++node->inCount;
 
assert(graph || node->graph);
if (!node->graph)
graph->insert(node);
if (!graph)
node->graph->insert(this);
 
if (kind == Edge::UNKNOWN)
graph->classifyEdges();
}
 
bool Graph::Node::detach(Graph::Node *node)
{
EdgeIterator ei = this->outgoing();
for (; !ei.end(); ei.next())
if (ei.getNode() == node)
break;
if (ei.end()) {
ERROR("no such node attached\n");
return false;
}
delete ei.getEdge();
return true;
}
 
// Cut a node from the graph, deleting all attached edges.
void Graph::Node::cut()
{
while (out)
delete out;
while (in)
delete in;
 
if (graph) {
if (graph->root == this)
graph->root = NULL;
graph = NULL;
}
}
 
Graph::Edge::Edge(Node *org, Node *tgt, Type kind)
{
target = tgt;
origin = org;
type = kind;
 
next[0] = next[1] = this;
prev[0] = prev[1] = this;
}
 
bool
Graph::Node::reachableBy(const Node *node, const Node *term) const
{
std::stack<const Node *> stack;
const Node *pos = NULL;
const int seq = graph->nextSequence();
 
stack.push(node);
 
while (!stack.empty()) {
pos = stack.top();
stack.pop();
 
if (pos == this)
return true;
if (pos == term)
continue;
 
for (EdgeIterator ei = pos->outgoing(); !ei.end(); ei.next()) {
if (ei.getType() == Edge::BACK || ei.getType() == Edge::DUMMY)
continue;
if (ei.getNode()->visit(seq))
stack.push(ei.getNode());
}
}
return pos == this;
}
 
class DFSIterator : public Iterator
{
public:
DFSIterator(Graph *graph, const bool preorder)
{
unsigned int seq = graph->nextSequence();
 
nodes = new Graph::Node * [graph->getSize() + 1];
count = 0;
pos = 0;
nodes[graph->getSize()] = 0;
 
if (graph->getRoot()) {
graph->getRoot()->visit(seq);
search(graph->getRoot(), preorder, seq);
}
}
 
~DFSIterator()
{
if (nodes)
delete[] nodes;
}
 
void search(Graph::Node *node, const bool preorder, const int sequence)
{
if (preorder)
nodes[count++] = node;
 
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
if (ei.getNode()->visit(sequence))
search(ei.getNode(), preorder, sequence);
 
if (!preorder)
nodes[count++] = node;
}
 
virtual bool end() const { return pos >= count; }
virtual void next() { if (pos < count) ++pos; }
virtual void *get() const { return nodes[pos]; }
virtual void reset() { pos = 0; }
 
protected:
Graph::Node **nodes;
int count;
int pos;
};
 
IteratorRef Graph::iteratorDFS(bool preorder)
{
return IteratorRef(new DFSIterator(this, preorder));
}
 
IteratorRef Graph::safeIteratorDFS(bool preorder)
{
return this->iteratorDFS(preorder);
}
 
class CFGIterator : public Iterator
{
public:
CFGIterator(Graph *graph)
{
nodes = new Graph::Node * [graph->getSize() + 1];
count = 0;
pos = 0;
nodes[graph->getSize()] = 0;
 
// TODO: argh, use graph->sequence instead of tag and just raise it by > 1
for (IteratorRef it = graph->iteratorDFS(); !it->end(); it->next())
reinterpret_cast<Graph::Node *>(it->get())->tag = 0;
 
if (graph->getRoot())
search(graph->getRoot(), graph->nextSequence());
}
 
~CFGIterator()
{
if (nodes)
delete[] nodes;
}
 
virtual void *get() const { return nodes[pos]; }
virtual bool end() const { return pos >= count; }
virtual void next() { if (pos < count) ++pos; }
virtual void reset() { pos = 0; }
 
private:
void search(Graph::Node *node, const int sequence)
{
Stack bb, cross;
 
bb.push(node);
 
while (bb.getSize()) {
node = reinterpret_cast<Graph::Node *>(bb.pop().u.p);
assert(node);
if (!node->visit(sequence))
continue;
node->tag = 0;
 
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) {
switch (ei.getType()) {
case Graph::Edge::TREE:
case Graph::Edge::FORWARD:
case Graph::Edge::DUMMY:
if (++(ei.getNode()->tag) == ei.getNode()->incidentCountFwd())
bb.push(ei.getNode());
break;
case Graph::Edge::BACK:
continue;
case Graph::Edge::CROSS:
if (++(ei.getNode()->tag) == 1)
cross.push(ei.getNode());
break;
default:
assert(!"unknown edge kind in CFG");
break;
}
}
nodes[count++] = node;
 
if (bb.getSize() == 0)
cross.moveTo(bb);
}
}
 
private:
Graph::Node **nodes;
int count;
int pos;
};
 
IteratorRef Graph::iteratorCFG()
{
return IteratorRef(new CFGIterator(this));
}
 
IteratorRef Graph::safeIteratorCFG()
{
return this->iteratorCFG();
}
 
/**
* Edge classification:
*
* We have a graph and want to classify the edges into one of four types:
* - TREE: edges that belong to a spanning tree of the graph
* - FORWARD: edges from a node to a descendent in the spanning tree
* - BACK: edges from a node to a parent (or itself) in the spanning tree
* - CROSS: all other edges (because they cross between branches in the
* spanning tree)
*/
void Graph::classifyEdges()
{
int seq;
 
for (IteratorRef it = iteratorDFS(true); !it->end(); it->next()) {
Node *node = reinterpret_cast<Node *>(it->get());
node->visit(0);
node->tag = 0;
}
 
classifyDFS(root, (seq = 0));
 
sequence = seq;
}
 
void Graph::classifyDFS(Node *curr, int& seq)
{
Graph::Edge *edge;
Graph::Node *node;
 
curr->visit(++seq);
curr->tag = 1;
 
for (edge = curr->out; edge; edge = edge->next[0]) {
node = edge->target;
if (edge->type == Edge::DUMMY)
continue;
 
if (node->getSequence() == 0) {
edge->type = Edge::TREE;
classifyDFS(node, seq);
} else
if (node->getSequence() > curr->getSequence()) {
edge->type = Edge::FORWARD;
} else {
edge->type = node->tag ? Edge::BACK : Edge::CROSS;
}
}
 
for (edge = curr->in; edge; edge = edge->next[1]) {
node = edge->origin;
if (edge->type == Edge::DUMMY)
continue;
 
if (node->getSequence() == 0) {
edge->type = Edge::TREE;
classifyDFS(node, seq);
} else
if (node->getSequence() > curr->getSequence()) {
edge->type = Edge::FORWARD;
} else {
edge->type = node->tag ? Edge::BACK : Edge::CROSS;
}
}
 
curr->tag = 0;
}
 
// @dist is indexed by Node::tag, returns -1 if no path found
int
Graph::findLightestPathWeight(Node *a, Node *b, const std::vector<int> &weight)
{
std::vector<int> path(weight.size(), std::numeric_limits<int>::max());
std::list<Node *> nodeList;
const int seq = nextSequence();
 
path[a->tag] = 0;
for (Node *c = a; c && c != b;) {
const int p = path[c->tag] + weight[c->tag];
for (EdgeIterator ei = c->outgoing(); !ei.end(); ei.next()) {
Node *t = ei.getNode();
if (t->getSequence() < seq) {
if (path[t->tag] == std::numeric_limits<int>::max())
nodeList.push_front(t);
if (p < path[t->tag])
path[t->tag] = p;
}
}
c->visit(seq);
Node *next = NULL;
for (std::list<Node *>::iterator n = nodeList.begin();
n != nodeList.end(); ++n) {
if (!next || path[(*n)->tag] < path[next->tag])
next = *n;
if ((*n) == c) {
// erase visited
n = nodeList.erase(n);
--n;
}
}
c = next;
}
if (path[b->tag] == std::numeric_limits<int>::max())
return -1;
return path[b->tag];
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h
0,0 → 1,228
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef __NV50_IR_GRAPH_H__
#define __NV50_IR_GRAPH_H__
 
#include "codegen/nv50_ir_util.h"
#include <vector>
 
namespace nv50_ir {
 
#define ITER_NODE(x) reinterpret_cast<Graph::Node *>((x).get())
#define ITER_EDGE(x) reinterpret_cast<Graph::Edge *>((x).get())
 
// A connected graph.
class Graph
{
public:
class Node;
 
class Edge
{
public:
enum Type
{
UNKNOWN,
TREE,
FORWARD,
BACK,
CROSS, // e.g. loop break
DUMMY
};
 
Edge(Node *dst, Node *src, Type kind);
~Edge() { unlink(); }
 
inline Node *getOrigin() const { return origin; }
inline Node *getTarget() const { return target; }
 
inline Type getType() const { return type; }
const char *typeStr() const;
 
private:
Node *origin;
Node *target;
 
Type type;
Edge *next[2]; // next edge outgoing/incident from/to origin/target
Edge *prev[2];
 
void unlink();
 
friend class Graph;
};
 
class EdgeIterator : public Iterator
{
public:
EdgeIterator() : e(0), t(0), d(0), rev(false) { }
EdgeIterator(Graph::Edge *first, int dir, bool reverse)
: d(dir), rev(reverse)
{
t = e = ((rev && first) ? first->prev[d] : first);
}
 
virtual void next()
{
Graph::Edge *n = (rev ? e->prev[d] : e->next[d]);
e = (n == t ? NULL : n);
}
virtual bool end() const { return !e; }
virtual void *get() const { return e; }
 
inline Node *getNode() const { assert(e); return d ?
e->origin : e->target; }
inline Edge *getEdge() const { return e; }
inline Edge::Type getType() { return e ? e->getType() : Edge::UNKNOWN; }
 
private:
Graph::Edge *e;
Graph::Edge *t;
int d;
bool rev;
};
 
class Node
{
public:
Node(void *);
~Node() { cut(); }
 
void attach(Node *, Edge::Type);
bool detach(Node *);
void cut();
 
inline EdgeIterator outgoing(bool reverse = false) const;
inline EdgeIterator incident(bool reverse = false) const;
 
inline Node *parent() const; // returns NULL if count(incident edges) != 1
 
bool reachableBy(const Node *node, const Node *term) const;
 
inline bool visit(int);
inline int getSequence() const;
 
inline int incidentCountFwd() const; // count of incident non-back edges
inline int incidentCount() const { return inCount; }
inline int outgoingCount() const { return outCount; }
 
Graph *getGraph() const { return graph; }
 
void *data;
 
private:
Edge *in;
Edge *out;
Graph *graph;
 
int visited;
 
int16_t inCount;
int16_t outCount;
public:
int tag; // for temporary use
 
friend class Graph;
};
 
public:
Graph();
~Graph(); // does *not* free the nodes (make it an option ?)
 
inline Node *getRoot() const { return root; }
 
inline unsigned int getSize() const { return size; }
 
inline int nextSequence();
 
void insert(Node *node); // attach to or set as root
 
IteratorRef iteratorDFS(bool preorder = true);
IteratorRef iteratorCFG();
 
// safe iterators are unaffected by changes to the *edges* of the graph
IteratorRef safeIteratorDFS(bool preorder = true);
IteratorRef safeIteratorCFG();
 
void classifyEdges();
 
// @weights: indexed by Node::tag
int findLightestPathWeight(Node *, Node *, const std::vector<int>& weights);
 
private:
void classifyDFS(Node *, int&);
 
private:
Node *root;
unsigned int size;
int sequence;
};
 
int Graph::nextSequence()
{
return ++sequence;
}
 
Graph::Node *Graph::Node::parent() const
{
if (inCount != 1)
return NULL;
assert(in);
return in->origin;
}
 
bool Graph::Node::visit(int v)
{
if (visited == v)
return false;
visited = v;
return true;
}
 
int Graph::Node::getSequence() const
{
return visited;
}
 
Graph::EdgeIterator Graph::Node::outgoing(bool reverse) const
{
return EdgeIterator(out, 0, reverse);
}
 
Graph::EdgeIterator Graph::Node::incident(bool reverse) const
{
return EdgeIterator(in, 1, reverse);
}
 
int Graph::Node::incidentCountFwd() const
{
int n = 0;
for (EdgeIterator ei = incident(); !ei.end(); ei.next())
if (ei.getType() != Edge::BACK)
++n;
return n;
}
 
} // namespace nv50_ir
 
#endif // __NV50_IR_GRAPH_H__
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
0,0 → 1,420
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef __NV50_IR_INLINES_H__
#define __NV50_IR_INLINES_H__
 
static inline CondCode reverseCondCode(CondCode cc)
{
static const uint8_t ccRev[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 
return static_cast<CondCode>(ccRev[cc & 7] | (cc & ~7));
}
 
static inline CondCode inverseCondCode(CondCode cc)
{
return static_cast<CondCode>(cc ^ 7);
}
 
static inline bool isMemoryFile(DataFile f)
{
return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL);
}
 
// contrary to asTex(), this will never include SULD/SUST
static inline bool isTextureOp(operation op)
{
return (op >= OP_TEX && op <= OP_TEXPREP);
}
 
static inline bool isSurfaceOp(operation op)
{
return (op >= OP_SULDB && op <= OP_SULEA);
}
 
static inline unsigned int typeSizeof(DataType ty)
{
switch (ty) {
case TYPE_U8:
case TYPE_S8:
return 1;
case TYPE_F16:
case TYPE_U16:
case TYPE_S16:
return 2;
case TYPE_F32:
case TYPE_U32:
case TYPE_S32:
return 4;
case TYPE_F64:
case TYPE_U64:
case TYPE_S64:
return 8;
case TYPE_B96:
return 12;
case TYPE_B128:
return 16;
default:
return 0;
}
}
 
static inline unsigned int typeSizeofLog2(DataType ty)
{
switch (ty) {
case TYPE_F16:
case TYPE_U16:
case TYPE_S16:
return 1;
case TYPE_F32:
case TYPE_U32:
case TYPE_S32:
return 2;
case TYPE_F64:
case TYPE_U64:
case TYPE_S64:
return 3;
case TYPE_B96:
case TYPE_B128:
return 4;
case TYPE_U8:
case TYPE_S8:
default:
return 0;
}
}
 
static inline DataType typeOfSize(unsigned int size,
bool flt = false, bool sgn = false)
{
switch (size) {
case 1: return sgn ? TYPE_S8 : TYPE_U8;
case 2: return flt ? TYPE_F16 : (sgn ? TYPE_S16 : TYPE_U16);
case 8: return flt ? TYPE_F64 : (sgn ? TYPE_S64 : TYPE_U64);
case 12: return TYPE_B96;
case 16: return TYPE_B128;
case 4:
return flt ? TYPE_F32 : (sgn ? TYPE_S32 : TYPE_U32);
default:
return TYPE_NONE;
}
}
 
static inline bool isFloatType(DataType ty)
{
return (ty >= TYPE_F16 && ty <= TYPE_F64);
}
 
static inline bool isSignedIntType(DataType ty)
{
return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32);
}
 
static inline bool isSignedType(DataType ty)
{
switch (ty) {
case TYPE_NONE:
case TYPE_U8:
case TYPE_U16:
case TYPE_U32:
case TYPE_B96:
case TYPE_B128:
return false;
default:
return true;
}
}
 
static inline DataType intTypeToSigned(DataType ty)
{
switch (ty) {
case TYPE_U32: return TYPE_S32;
case TYPE_U16: return TYPE_S16;
case TYPE_U8: return TYPE_S8;
default:
return ty;
}
}
 
const ValueRef *ValueRef::getIndirect(int dim) const
{
return isIndirect(dim) ? &insn->src(indirect[dim]) : NULL;
}
 
DataFile ValueRef::getFile() const
{
return value ? value->reg.file : FILE_NULL;
}
 
unsigned int ValueRef::getSize() const
{
return value ? value->reg.size : 0;
}
 
Value *ValueRef::rep() const
{
assert(value);
return value->join;
}
 
Value *ValueDef::rep() const
{
assert(value);
return value->join;
}
 
DataFile ValueDef::getFile() const
{
return value ? value->reg.file : FILE_NULL;
}
 
unsigned int ValueDef::getSize() const
{
return value ? value->reg.size : 0;
}
 
void ValueDef::setSSA(LValue *lval)
{
origin = value->asLValue();
set(lval);
}
 
const LValue *ValueDef::preSSA() const
{
return origin;
}
 
Instruction *Value::getInsn() const
{
return defs.empty() ? NULL : defs.front()->getInsn();
}
 
Instruction *Value::getUniqueInsn() const
{
if (defs.empty())
return NULL;
 
// after regalloc, the definitions of coalesced values are linked
if (join != this) {
for (DefCIterator it = defs.begin(); it != defs.end(); ++it)
if ((*it)->get() == this)
return (*it)->getInsn();
// should be unreachable and trigger assertion at the end
}
#ifdef DEBUG
if (reg.data.id < 0) {
int n = 0;
for (DefCIterator it = defs.begin(); n < 2 && it != defs.end(); ++it)
if ((*it)->get() == this) // don't count joined values
++n;
if (n > 1)
WARN("value %%%i not uniquely defined\n", id); // return NULL ?
}
#endif
assert(defs.front()->get() == this);
return defs.front()->getInsn();
}
 
inline bool Instruction::constrainedDefs() const
{
return defExists(1) || op == OP_UNION;
}
 
Value *Instruction::getIndirect(int s, int dim) const
{
return srcs[s].isIndirect(dim) ? getSrc(srcs[s].indirect[dim]) : NULL;
}
 
Value *Instruction::getPredicate() const
{
return (predSrc >= 0) ? getSrc(predSrc) : NULL;
}
 
void Instruction::setFlagsDef(int d, Value *val)
{
if (val) {
if (flagsDef < 0)
flagsDef = d;
setDef(flagsDef, val);
} else {
if (flagsDef >= 0) {
setDef(flagsDef, NULL);
flagsDef = -1;
}
}
}
 
void Instruction::setFlagsSrc(int s, Value *val)
{
flagsSrc = s;
setSrc(flagsSrc, val);
}
 
Value *TexInstruction::getIndirectR() const
{
return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL;
}
 
Value *TexInstruction::getIndirectS() const
{
return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL;
}
 
CmpInstruction *Instruction::asCmp()
{
if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP)
return static_cast<CmpInstruction *>(this);
return NULL;
}
 
const CmpInstruction *Instruction::asCmp() const
{
if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP)
return static_cast<const CmpInstruction *>(this);
return NULL;
}
 
FlowInstruction *Instruction::asFlow()
{
if (op >= OP_BRA && op <= OP_JOIN)
return static_cast<FlowInstruction *>(this);
return NULL;
}
 
const FlowInstruction *Instruction::asFlow() const
{
if (op >= OP_BRA && op <= OP_JOIN)
return static_cast<const FlowInstruction *>(this);
return NULL;
}
 
TexInstruction *Instruction::asTex()
{
if (op >= OP_TEX && op <= OP_SULEA)
return static_cast<TexInstruction *>(this);
return NULL;
}
 
const TexInstruction *Instruction::asTex() const
{
if (op >= OP_TEX && op <= OP_SULEA)
return static_cast<const TexInstruction *>(this);
return NULL;
}
 
static inline Instruction *cloneForward(Function *ctx, Instruction *obj)
{
DeepClonePolicy<Function> pol(ctx);
 
for (int i = 0; obj->srcExists(i); ++i)
pol.set(obj->getSrc(i), obj->getSrc(i));
 
return obj->clone(pol);
}
 
// XXX: use a virtual function so we're really really safe ?
LValue *Value::asLValue()
{
if (reg.file >= FILE_GPR && reg.file <= FILE_ADDRESS)
return static_cast<LValue *>(this);
return NULL;
}
 
Symbol *Value::asSym()
{
if (reg.file >= FILE_MEMORY_CONST)
return static_cast<Symbol *>(this);
return NULL;
}
 
const Symbol *Value::asSym() const
{
if (reg.file >= FILE_MEMORY_CONST)
return static_cast<const Symbol *>(this);
return NULL;
}
 
void Symbol::setOffset(int32_t offset)
{
reg.data.offset = offset;
}
 
void Symbol::setAddress(Symbol *base, int32_t offset)
{
baseSym = base;
reg.data.offset = offset;
}
 
void Symbol::setSV(SVSemantic sv, uint32_t index)
{
reg.data.sv.sv = sv;
reg.data.sv.index = index;
}
 
ImmediateValue *Value::asImm()
{
if (reg.file == FILE_IMMEDIATE)
return static_cast<ImmediateValue *>(this);
return NULL;
}
 
const ImmediateValue *Value::asImm() const
{
if (reg.file == FILE_IMMEDIATE)
return static_cast<const ImmediateValue *>(this);
return NULL;
}
 
Value *Value::get(Iterator &it)
{
return reinterpret_cast<Value *>(it.get());
}
 
bool BasicBlock::reachableBy(const BasicBlock *by, const BasicBlock *term)
{
return cfg.reachableBy(&by->cfg, &term->cfg);
}
 
BasicBlock *BasicBlock::get(Iterator &iter)
{
return reinterpret_cast<BasicBlock *>(iter.get());
}
 
BasicBlock *BasicBlock::get(Graph::Node *node)
{
assert(node);
return reinterpret_cast<BasicBlock *>(node->data);
}
 
Function *Function::get(Graph::Node *node)
{
assert(node);
return reinterpret_cast<Function *>(node->data);
}
 
LValue *Function::getLValue(int id)
{
assert((unsigned int)id < (unsigned int)allLValues.getSize());
return reinterpret_cast<LValue *>(allLValues.get(id));
}
 
#endif // __NV50_IR_INLINES_H__
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
0,0 → 1,292
/*
* Copyright 2011 Christoph Bumiller
* 2014 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
 
#include "codegen/nv50_ir_target_nvc0.h"
#include "codegen/nv50_ir_lowering_gm107.h"
 
#include <limits>
 
namespace nv50_ir {
 
#define QOP_ADD 0
#define QOP_SUBR 1
#define QOP_SUB 2
#define QOP_MOV2 3
 
// UL UR LL LR
#define QUADOP(q, r, s, t) \
((QOP_##q << 6) | (QOP_##r << 4) | \
(QOP_##s << 2) | (QOP_##t << 0))
 
bool
GM107LoweringPass::handleManualTXD(TexInstruction *i)
{
static const uint8_t qOps[4][2] =
{
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
};
Value *def[4][4];
Value *crd[3];
Value *tmp;
Instruction *tex, *add;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim();
const int array = i->tex.target.isArray();
 
i->op = OP_TEX; // no need to clone dPdx/dPdy later
 
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
tmp = bld.getScratch();
 
for (l = 0; l < 4; ++l) {
// mov coordinates from lane l to all lanes
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (c = 0; c < dim; ++c) {
bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
add->subOp = 0x00;
add->lanes = 1; /* abused for .ndv */
}
 
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c) {
bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
add->subOp = qOps[l][0];
add->lanes = 1; /* abused for .ndv */
}
 
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c) {
bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
add->subOp = qOps[l][1];
add->lanes = 1; /* abused for .ndv */
}
 
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
tex->setSrc(c + array, crd[c]);
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
 
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
def[c][l] = bld.getSSA();
mov = bld.mkMov(def[c][l], tex->getDef(c));
mov->fixed = 1;
mov->lanes = 1 << l;
}
}
 
for (c = 0; i->defExists(c); ++c) {
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
for (l = 0; l < 4; ++l)
u->setSrc(l, def[c][l]);
}
 
i->bb->remove(i);
return true;
}
 
bool
GM107LoweringPass::handleDFDX(Instruction *insn)
{
Instruction *shfl;
int qop = 0, xid = 0;
 
switch (insn->op) {
case OP_DFDX:
qop = QUADOP(SUB, SUBR, SUB, SUBR);
xid = 1;
break;
case OP_DFDY:
qop = QUADOP(SUB, SUB, SUBR, SUBR);
xid = 2;
break;
default:
assert(!"invalid dfdx opcode");
break;
}
 
shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
insn->getSrc(0), bld.mkImm(xid));
shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
insn->op = OP_QUADOP;
insn->subOp = qop;
insn->lanes = 0; /* abused for !.ndv */
insn->setSrc(1, insn->getSrc(0));
insn->setSrc(0, shfl->getDef(0));
return true;
}
 
bool
GM107LoweringPass::handlePFETCH(Instruction *i)
{
Value *tmp0 = bld.getScratch();
Value *tmp1 = bld.getScratch();
Value *tmp2 = bld.getScratch();
bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
if (i->getSrc(1))
bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
else
bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
i->setSrc(0, tmp0);
i->setSrc(1, NULL);
return true;
}
 
bool
GM107LoweringPass::handlePOPCNT(Instruction *i)
{
Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
i->getSrc(0), i->getSrc(1));
i->setSrc(0, tmp);
i->setSrc(1, NULL);
return TRUE;
}
 
//
// - add quadop dance for texturing
// - put FP outputs in GPRs
// - convert instruction sequences
//
bool
GM107LoweringPass::visit(Instruction *i)
{
bld.setPosition(i, false);
 
if (i->cc != CC_ALWAYS)
checkPredicate(i);
 
switch (i->op) {
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXF:
case OP_TXG:
return handleTEX(i->asTex());
case OP_TXD:
return handleTXD(i->asTex());
case OP_TXLQ:
return handleTXLQ(i->asTex());
case OP_TXQ:
return handleTXQ(i->asTex());
case OP_EX2:
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
i->setSrc(0, i->getDef(0));
break;
case OP_POW:
return handlePOW(i);
case OP_DIV:
return handleDIV(i);
case OP_MOD:
return handleMOD(i);
case OP_SQRT:
return handleSQRT(i);
case OP_EXPORT:
return handleEXPORT(i);
case OP_PFETCH:
return handlePFETCH(i);
case OP_EMIT:
case OP_RESTART:
return handleOUT(i);
case OP_RDSV:
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
i->getSrc(0)->reg.fileIndex = 0;
} else
if (prog->getType() == Program::TYPE_GEOMETRY &&
i->src(0).isIndirect(0)) {
// XXX: this assumes vec4 units
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
i->op = OP_VFETCH;
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
if (i->src(0).isIndirect(1)) {
Value *ptr;
if (i->src(0).isIndirect(0))
ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(0x1010),
i->getIndirect(0, 0));
else
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(16));
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
i->subOp = NV50_IR_SUBOP_LDC_IS;
}
}
break;
case OP_ATOM:
{
const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
handleATOM(i);
handleCasExch(i, cctl);
}
break;
case OP_SULDB:
case OP_SULDP:
case OP_SUSTB:
case OP_SUSTP:
case OP_SUREDB:
case OP_SUREDP:
handleSurfaceOpNVE4(i->asTex());
break;
case OP_DFDX:
case OP_DFDY:
handleDFDX(i);
break;
case OP_POPCNT:
handlePOPCNT(i);
break;
default:
break;
}
return true;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
0,0 → 1,18
#include "codegen/nv50_ir_lowering_nvc0.h"
 
namespace nv50_ir {
 
class GM107LoweringPass : public NVC0LoweringPass
{
public:
GM107LoweringPass(Program *p) : NVC0LoweringPass(p) {}
private:
virtual bool visit(Instruction *);
 
virtual bool handleManualTXD(TexInstruction *);
bool handleDFDX(Instruction *);
bool handlePFETCH(Instruction *);
bool handlePOPCNT(Instruction *);
};
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
0,0 → 1,1394
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
 
#include "codegen/nv50_ir_target_nv50.h"
 
namespace nv50_ir {
 
// nv50 doesn't support 32 bit integer multiplication
//
// ah al * bh bl = LO32: (al * bh + ah * bl) << 16 + (al * bl)
// -------------------
// al*bh 00 HI32: (al * bh + ah * bl) >> 16 + (ah * bh) +
// ah*bh 00 00 ( carry1) << 16 + ( carry2)
// al*bl
// ah*bl 00
//
// fffe0001 + fffe0001
//
// Note that this sort of splitting doesn't work for signed values, so we
// compute the sign on those manually and then perform an unsigned multiply.
static bool
expandIntegerMUL(BuildUtil *bld, Instruction *mul)
{
const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
 
DataType fTy; // full type
switch (mul->sType) {
case TYPE_S32: fTy = TYPE_U32; break;
case TYPE_S64: fTy = TYPE_U64; break;
default: fTy = mul->sType; break;
}
 
DataType hTy; // half type
switch (fTy) {
case TYPE_U32: hTy = TYPE_U16; break;
case TYPE_U64: hTy = TYPE_U32; break;
default:
return false;
}
unsigned int fullSize = typeSizeof(fTy);
unsigned int halfSize = typeSizeof(hTy);
 
Instruction *i[9];
 
bld->setPosition(mul, true);
 
Value *s[2];
Value *a[2], *b[2];
Value *t[4];
for (int j = 0; j < 4; ++j)
t[j] = bld->getSSA(fullSize);
 
s[0] = mul->getSrc(0);
s[1] = mul->getSrc(1);
 
if (isSignedType(mul->sType)) {
s[0] = bld->getSSA(fullSize);
s[1] = bld->getSSA(fullSize);
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
}
 
// split sources into halves
i[0] = bld->mkSplit(a, halfSize, s[0]);
i[1] = bld->mkSplit(b, halfSize, s[1]);
 
i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8));
i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
 
if (highResult) {
Value *c[2];
Value *r[5];
Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8));
c[0] = bld->getSSA(1, FILE_FLAGS);
c[1] = bld->getSSA(1, FILE_FLAGS);
for (int j = 0; j < 5; ++j)
r[j] = bld->getSSA(fullSize);
 
i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8));
i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm);
bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]);
bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]);
i[5] = bld->mkOp3(OP_MAD, fTy, r[4], a[1], b[1], r[2]);
 
// set carry defs / sources
i[3]->setFlagsDef(1, c[0]);
// actual result required in negative case, but ignored for
// unsigned. for some reason the compiler ends up dropping the whole
// instruction if the destination is unused but the flags are.
if (isSignedType(mul->sType))
i[4]->setFlagsDef(1, c[1]);
else
i[4]->setFlagsDef(0, c[1]);
i[6]->setPredicate(CC_C, c[0]);
i[5]->setFlagsSrc(3, c[1]);
 
if (isSignedType(mul->sType)) {
Value *cc[2];
Value *rr[7];
Value *one = bld->getSSA(fullSize);
bld->loadImm(one, 1);
for (int j = 0; j < 7; j++)
rr[j] = bld->getSSA(fullSize);
 
// NOTE: this logic uses predicates because splitting basic blocks is
// ~impossible during the SSA phase. The RA relies on a correlation
// between edge order and phi node sources.
 
// Set the sign of the result based on the inputs
bld->mkOp2(OP_XOR, fTy, NULL, mul->getSrc(0), mul->getSrc(1))
->setFlagsDef(0, (cc[0] = bld->getSSA(1, FILE_FLAGS)));
 
// 1s complement of 64-bit value
bld->mkOp1(OP_NOT, fTy, rr[0], r[4])
->setPredicate(CC_S, cc[0]);
bld->mkOp1(OP_NOT, fTy, rr[1], t[3])
->setPredicate(CC_S, cc[0]);
 
// add to low 32-bits, keep track of the carry
Instruction *n = bld->mkOp2(OP_ADD, fTy, NULL, rr[1], one);
n->setPredicate(CC_S, cc[0]);
n->setFlagsDef(0, (cc[1] = bld->getSSA(1, FILE_FLAGS)));
 
// If there was a carry, add 1 to the upper 32 bits
// XXX: These get executed even if they shouldn't be
bld->mkOp2(OP_ADD, fTy, rr[2], rr[0], one)
->setPredicate(CC_C, cc[1]);
bld->mkMov(rr[3], rr[0])
->setPredicate(CC_NC, cc[1]);
bld->mkOp2(OP_UNION, fTy, rr[4], rr[2], rr[3]);
 
// Merge the results from the negative and non-negative paths
bld->mkMov(rr[5], rr[4])
->setPredicate(CC_S, cc[0]);
bld->mkMov(rr[6], r[4])
->setPredicate(CC_NS, cc[0]);
bld->mkOp2(OP_UNION, mul->sType, mul->getDef(0), rr[5], rr[6]);
} else {
bld->mkMov(mul->getDef(0), r[4]);
}
} else {
bld->mkMov(mul->getDef(0), t[3]);
}
delete_Instruction(bld->getProgram(), mul);
 
for (int j = 2; j <= (highResult ? 5 : 4); ++j)
if (i[j])
i[j]->sType = hTy;
 
return true;
}
 
#define QOP_ADD 0
#define QOP_SUBR 1
#define QOP_SUB 2
#define QOP_MOV2 3
 
// UL UR LL LR
#define QUADOP(q, r, s, t) \
((QOP_##q << 6) | (QOP_##r << 4) | \
(QOP_##s << 2) | (QOP_##t << 0))
 
class NV50LegalizePostRA : public Pass
{
private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
 
void handlePRERET(FlowInstruction *);
void replaceZero(Instruction *);
 
LValue *r63;
};
 
bool
NV50LegalizePostRA::visit(Function *fn)
{
Program *prog = fn->getProgram();
 
r63 = new_LValue(fn, FILE_GPR);
r63->reg.data.id = 63;
 
// this is actually per-program, but we can do it all on visiting main()
std::list<Instruction *> *outWrites =
reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv);
 
if (outWrites) {
for (std::list<Instruction *>::iterator it = outWrites->begin();
it != outWrites->end(); ++it)
(*it)->getSrc(1)->defs.front()->getInsn()->setDef(0, (*it)->getSrc(0));
// instructions will be deleted on exit
outWrites->clear();
}
 
return true;
}
 
void
NV50LegalizePostRA::replaceZero(Instruction *i)
{
for (int s = 0; i->srcExists(s); ++s) {
ImmediateValue *imm = i->getSrc(s)->asImm();
if (imm && imm->reg.data.u64 == 0)
i->setSrc(s, r63);
}
}
 
// Emulate PRERET: jump to the target and call to the origin from there
//
// WARNING: atm only works if BBs are affected by at most a single PRERET
//
// BB:0
// preret BB:3
// (...)
// BB:3
// (...)
// --->
// BB:0
// bra BB:3 + n0 (directly to the call; move to beginning of BB and fixate)
// (...)
// BB:3
// bra BB:3 + n1 (skip the call)
// call BB:0 + n2 (skip bra at beginning of BB:0)
// (...)
void
NV50LegalizePostRA::handlePRERET(FlowInstruction *pre)
{
BasicBlock *bbE = pre->bb;
BasicBlock *bbT = pre->target.bb;
 
pre->subOp = NV50_IR_SUBOP_EMU_PRERET + 0;
bbE->remove(pre);
bbE->insertHead(pre);
 
Instruction *skip = new_FlowInstruction(func, OP_PRERET, bbT);
Instruction *call = new_FlowInstruction(func, OP_PRERET, bbE);
 
bbT->insertHead(call);
bbT->insertHead(skip);
 
// NOTE: maybe split blocks to prevent the instructions from moving ?
 
skip->subOp = NV50_IR_SUBOP_EMU_PRERET + 1;
call->subOp = NV50_IR_SUBOP_EMU_PRERET + 2;
}
 
bool
NV50LegalizePostRA::visit(BasicBlock *bb)
{
Instruction *i, *next;
 
// remove pseudo operations and non-fixed no-ops, split 64 bit operations
for (i = bb->getFirst(); i; i = next) {
next = i->next;
if (i->isNop()) {
bb->remove(i);
} else
if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) {
handlePRERET(i->asFlow());
} else {
// TODO: We will want to do this before register allocation,
// since have to use a $c register for the carry flag.
if (typeSizeof(i->dType) == 8) {
Instruction *hi = BuildUtil::split64BitOpPostRA(func, i, r63, NULL);
if (hi)
next = hi;
}
 
if (i->op != OP_MOV && i->op != OP_PFETCH &&
i->op != OP_BAR &&
(!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS))
replaceZero(i);
}
}
if (!bb->getEntry())
return true;
 
return true;
}
 
class NV50LegalizeSSA : public Pass
{
public:
NV50LegalizeSSA(Program *);
 
virtual bool visit(BasicBlock *bb);
 
private:
void propagateWriteToOutput(Instruction *);
void handleDIV(Instruction *);
void handleMOD(Instruction *);
void handleMUL(Instruction *);
void handleAddrDef(Instruction *);
 
inline bool isARL(const Instruction *) const;
 
BuildUtil bld;
 
std::list<Instruction *> *outWrites;
};
 
NV50LegalizeSSA::NV50LegalizeSSA(Program *prog)
{
bld.setProgram(prog);
 
if (prog->optLevel >= 2 &&
(prog->getType() == Program::TYPE_GEOMETRY ||
prog->getType() == Program::TYPE_VERTEX))
outWrites =
reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv);
else
outWrites = NULL;
}
 
void
NV50LegalizeSSA::propagateWriteToOutput(Instruction *st)
{
if (st->src(0).isIndirect(0) || st->getSrc(1)->refCount() != 1)
return;
 
// check def instruction can store
Instruction *di = st->getSrc(1)->defs.front()->getInsn();
 
// TODO: move exports (if beneficial) in common opt pass
if (di->isPseudo() || isTextureOp(di->op) || di->defCount(0xff, true) > 1)
return;
 
for (int s = 0; di->srcExists(s); ++s)
if (di->src(s).getFile() == FILE_IMMEDIATE)
return;
 
if (prog->getType() == Program::TYPE_GEOMETRY) {
// Only propagate output writes in geometry shaders when we can be sure
// that we are propagating to the same output vertex.
if (di->bb != st->bb)
return;
Instruction *i;
for (i = di; i != st; i = i->next) {
if (i->op == OP_EMIT || i->op == OP_RESTART)
return;
}
assert(i); // st after di
}
 
// We cannot set defs to non-lvalues before register allocation, so
// save & remove (to save registers) the exports and replace later.
outWrites->push_back(st);
st->bb->remove(st);
}
 
bool
NV50LegalizeSSA::isARL(const Instruction *i) const
{
ImmediateValue imm;
 
if (i->op != OP_SHL || i->src(0).getFile() != FILE_GPR)
return false;
if (!i->src(1).getImmediate(imm))
return false;
return imm.isInteger(0);
}
 
void
NV50LegalizeSSA::handleAddrDef(Instruction *i)
{
Instruction *arl;
 
i->getDef(0)->reg.size = 2; // $aX are only 16 bit
 
// PFETCH can always write to $a
if (i->op == OP_PFETCH)
return;
// only ADDR <- SHL(GPR, IMM) and ADDR <- ADD(ADDR, IMM) are valid
if (i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE) {
if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR)
return;
if (i->op == OP_ADD && i->src(0).getFile() == FILE_ADDRESS)
return;
}
 
// turn $a sources into $r sources (can't operate on $a)
for (int s = 0; i->srcExists(s); ++s) {
Value *a = i->getSrc(s);
Value *r;
if (a->reg.file == FILE_ADDRESS) {
if (a->getInsn() && isARL(a->getInsn())) {
i->setSrc(s, a->getInsn()->getSrc(0));
} else {
bld.setPosition(i, false);
r = bld.getSSA();
bld.mkMov(r, a);
i->setSrc(s, r);
}
}
}
if (i->op == OP_SHL && i->src(1).getFile() == FILE_IMMEDIATE)
return;
 
// turn result back into $a
bld.setPosition(i, true);
arl = bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.getSSA(), bld.mkImm(0));
i->setDef(0, arl->getSrc(0));
}
 
void
NV50LegalizeSSA::handleMUL(Instruction *mul)
{
if (isFloatType(mul->sType) || typeSizeof(mul->sType) <= 2)
return;
Value *def = mul->getDef(0);
Value *pred = mul->getPredicate();
CondCode cc = mul->cc;
if (pred)
mul->setPredicate(CC_ALWAYS, NULL);
 
if (mul->op == OP_MAD) {
Instruction *add = mul;
bld.setPosition(add, false);
Value *res = cloneShallow(func, mul->getDef(0));
mul = bld.mkOp2(OP_MUL, add->sType, res, add->getSrc(0), add->getSrc(1));
add->op = OP_ADD;
add->setSrc(0, mul->getDef(0));
add->setSrc(1, add->getSrc(2));
for (int s = 2; add->srcExists(s); ++s)
add->setSrc(s, NULL);
mul->subOp = add->subOp;
add->subOp = 0;
}
expandIntegerMUL(&bld, mul);
if (pred)
def->getInsn()->setPredicate(cc, pred);
}
 
// Use f32 division: first compute an approximate result, use it to reduce
// the dividend, which should then be representable as f32, divide the reduced
// dividend, and add the quotients.
void
NV50LegalizeSSA::handleDIV(Instruction *div)
{
const DataType ty = div->sType;
 
if (ty != TYPE_U32 && ty != TYPE_S32)
return;
 
Value *q, *q0, *qf, *aR, *aRf, *qRf, *qR, *t, *s, *m, *cond;
 
bld.setPosition(div, false);
 
Value *a, *af = bld.getSSA();
Value *b, *bf = bld.getSSA();
 
bld.mkCvt(OP_CVT, TYPE_F32, af, ty, div->getSrc(0));
bld.mkCvt(OP_CVT, TYPE_F32, bf, ty, div->getSrc(1));
 
if (isSignedType(ty)) {
af->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS);
bf->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS);
a = bld.getSSA();
b = bld.getSSA();
bld.mkOp1(OP_ABS, ty, a, div->getSrc(0));
bld.mkOp1(OP_ABS, ty, b, div->getSrc(1));
} else {
a = div->getSrc(0);
b = div->getSrc(1);
}
 
bf = bld.mkOp1v(OP_RCP, TYPE_F32, bld.getSSA(), bf);
bf = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), bf, bld.mkImm(-2));
 
bld.mkOp2(OP_MUL, TYPE_F32, (qf = bld.getSSA()), af, bf)->rnd = ROUND_Z;
bld.mkCvt(OP_CVT, ty, (q0 = bld.getSSA()), TYPE_F32, qf)->rnd = ROUND_Z;
 
// get error of 1st result
expandIntegerMUL(&bld,
bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q0, b));
bld.mkOp2(OP_SUB, TYPE_U32, (aRf = bld.getSSA()), a, t);
 
bld.mkCvt(OP_CVT, TYPE_F32, (aR = bld.getSSA()), TYPE_U32, aRf);
 
bld.mkOp2(OP_MUL, TYPE_F32, (qRf = bld.getSSA()), aR, bf)->rnd = ROUND_Z;
bld.mkCvt(OP_CVT, TYPE_U32, (qR = bld.getSSA()), TYPE_F32, qRf)
->rnd = ROUND_Z;
bld.mkOp2(OP_ADD, ty, (q = bld.getSSA()), q0, qR); // add quotients
 
// correction: if modulus >= divisor, add 1
expandIntegerMUL(&bld,
bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q, b));
bld.mkOp2(OP_SUB, TYPE_U32, (m = bld.getSSA()), a, t);
bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), TYPE_U32, m, b);
if (!isSignedType(ty)) {
div->op = OP_SUB;
div->setSrc(0, q);
div->setSrc(1, s);
} else {
t = q;
bld.mkOp2(OP_SUB, TYPE_U32, (q = bld.getSSA()), t, s);
s = bld.getSSA();
t = bld.getSSA();
// fix the sign
bld.mkOp2(OP_XOR, TYPE_U32, NULL, div->getSrc(0), div->getSrc(1))
->setFlagsDef(0, (cond = bld.getSSA(1, FILE_FLAGS)));
bld.mkOp1(OP_NEG, ty, s, q)->setPredicate(CC_S, cond);
bld.mkOp1(OP_MOV, ty, t, q)->setPredicate(CC_NS, cond);
 
div->op = OP_UNION;
div->setSrc(0, s);
div->setSrc(1, t);
}
}
 
void
NV50LegalizeSSA::handleMOD(Instruction *mod)
{
if (mod->dType != TYPE_U32 && mod->dType != TYPE_S32)
return;
bld.setPosition(mod, false);
 
Value *q = bld.getSSA();
Value *m = bld.getSSA();
 
bld.mkOp2(OP_DIV, mod->dType, q, mod->getSrc(0), mod->getSrc(1));
handleDIV(q->getInsn());
 
bld.setPosition(mod, false);
expandIntegerMUL(&bld, bld.mkOp2(OP_MUL, TYPE_U32, m, q, mod->getSrc(1)));
 
mod->op = OP_SUB;
mod->setSrc(1, m);
}
 
bool
NV50LegalizeSSA::visit(BasicBlock *bb)
{
Instruction *insn, *next;
// skipping PHIs (don't pass them to handleAddrDef) !
for (insn = bb->getEntry(); insn; insn = next) {
next = insn->next;
 
if (insn->defExists(0) && insn->getDef(0)->reg.file == FILE_ADDRESS)
handleAddrDef(insn);
 
switch (insn->op) {
case OP_EXPORT:
if (outWrites)
propagateWriteToOutput(insn);
break;
case OP_DIV:
handleDIV(insn);
break;
case OP_MOD:
handleMOD(insn);
break;
case OP_MAD:
case OP_MUL:
handleMUL(insn);
break;
default:
break;
}
}
return true;
}
 
class NV50LoweringPreSSA : public Pass
{
public:
NV50LoweringPreSSA(Program *);
 
private:
virtual bool visit(Instruction *);
virtual bool visit(Function *);
 
bool handleRDSV(Instruction *);
bool handleWRSV(Instruction *);
 
bool handlePFETCH(Instruction *);
bool handleEXPORT(Instruction *);
bool handleLOAD(Instruction *);
 
bool handleDIV(Instruction *);
bool handleSQRT(Instruction *);
bool handlePOW(Instruction *);
 
bool handleSET(Instruction *);
bool handleSLCT(CmpInstruction *);
bool handleSELP(Instruction *);
 
bool handleTEX(TexInstruction *);
bool handleTXB(TexInstruction *); // I really
bool handleTXL(TexInstruction *); // hate
bool handleTXD(TexInstruction *); // these 3
bool handleTXLQ(TexInstruction *);
 
bool handleCALL(Instruction *);
bool handlePRECONT(Instruction *);
bool handleCONT(Instruction *);
 
void checkPredicate(Instruction *);
void loadTexMsInfo(uint32_t off, Value **ms, Value **ms_x, Value **ms_y);
void loadMsInfo(Value *ms, Value *s, Value **dx, Value **dy);
 
private:
const Target *const targ;
 
BuildUtil bld;
 
Value *tid;
};
 
NV50LoweringPreSSA::NV50LoweringPreSSA(Program *prog) :
targ(prog->getTarget()), tid(NULL)
{
bld.setProgram(prog);
}
 
bool
NV50LoweringPreSSA::visit(Function *f)
{
BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
 
if (prog->getType() == Program::TYPE_COMPUTE) {
// Add implicit "thread id" argument in $r0 to the function
Value *arg = new_LValue(func, FILE_GPR);
arg->reg.data.id = 0;
f->ins.push_back(arg);
 
bld.setPosition(root, false);
tid = bld.mkMov(bld.getScratch(), arg, TYPE_U32)->getDef(0);
}
 
return true;
}
 
void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
Value **ms_x, Value **ms_y) {
// This loads the texture-indexed ms setting from the constant buffer
Value *tmp = new_LValue(func, FILE_GPR);
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
if (prog->getType() > Program::TYPE_VERTEX)
off += 16 * 2 * 4;
if (prog->getType() > Program::TYPE_GEOMETRY)
off += 16 * 2 * 4;
*ms_x = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32, off + 0), NULL);
*ms_y = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32, off + 4), NULL);
*ms = bld.mkOp2v(OP_ADD, TYPE_U32, tmp, *ms_x, *ms_y);
}
 
void NV50LoweringPreSSA::loadMsInfo(Value *ms, Value *s, Value **dx, Value **dy) {
// Given a MS level, and a sample id, compute the delta x/y
uint8_t b = prog->driver->io.msInfoCBSlot;
Value *off = new_LValue(func, FILE_ADDRESS), *t = new_LValue(func, FILE_GPR);
 
// The required information is at mslevel * 16 * 4 + sample * 8
// = (mslevel * 8 + sample) * 8
bld.mkOp2(OP_SHL,
TYPE_U32,
off,
bld.mkOp2v(OP_ADD, TYPE_U32, t,
bld.mkOp2v(OP_SHL, TYPE_U32, t, ms, bld.mkImm(3)),
s),
bld.mkImm(3));
*dx = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32,
prog->driver->io.msInfoBase), off);
*dy = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32,
prog->driver->io.msInfoBase + 4), off);
}
 
bool
NV50LoweringPreSSA::handleTEX(TexInstruction *i)
{
const int arg = i->tex.target.getArgCount();
const int dref = arg;
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
 
// handle MS, which means looking up the MS params for this texture, and
// adjusting the input coordinates to point at the right sample.
if (i->tex.target.isMS()) {
Value *x = i->getSrc(0);
Value *y = i->getSrc(1);
Value *s = i->getSrc(arg - 1);
Value *tx = new_LValue(func, FILE_GPR), *ty = new_LValue(func, FILE_GPR),
*ms, *ms_x, *ms_y, *dx, *dy;
 
i->tex.target.clearMS();
 
loadTexMsInfo(i->tex.r * 4 * 2, &ms, &ms_x, &ms_y);
loadMsInfo(ms, s, &dx, &dy);
 
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx);
bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy);
i->setSrc(0, tx);
i->setSrc(1, ty);
i->setSrc(arg - 1, bld.loadImm(NULL, 0));
}
 
// dref comes before bias/lod
if (i->tex.target.isShadow())
if (i->op == OP_TXB || i->op == OP_TXL)
i->swapSources(dref, lod);
 
if (i->tex.target.isArray()) {
if (i->op != OP_TXF) {
// array index must be converted to u32, but it's already an integer
// for TXF
Value *layer = i->getSrc(arg - 1);
LValue *src = new_LValue(func, FILE_GPR);
bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer);
bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511));
i->setSrc(arg - 1, src);
}
if (i->tex.target.isCube() && i->srcCount() > 4) {
std::vector<Value *> acube, a2d;
int c;
 
acube.resize(4);
for (c = 0; c < 4; ++c)
acube[c] = i->getSrc(c);
a2d.resize(4);
for (c = 0; c < 3; ++c)
a2d[c] = new_LValue(func, FILE_GPR);
a2d[3] = NULL;
 
bld.mkTex(OP_TEXPREP, TEX_TARGET_CUBE_ARRAY, i->tex.r, i->tex.s,
a2d, acube)->asTex()->tex.mask = 0x7;
 
for (c = 0; c < 3; ++c)
i->setSrc(c, a2d[c]);
for (; i->srcExists(c + 1); ++c)
i->setSrc(c, i->getSrc(c + 1));
i->setSrc(c, NULL);
assert(c <= 4);
 
i->tex.target = i->tex.target.isShadow() ?
TEX_TARGET_2D_ARRAY_SHADOW : TEX_TARGET_2D_ARRAY;
}
}
 
// texel offsets are 3 immediate fields in the instruction,
// nv50 cannot do textureGatherOffsets
assert(i->tex.useOffsets <= 1);
if (i->tex.useOffsets) {
for (int c = 0; c < 3; ++c) {
ImmediateValue val;
if (!i->offset[0][c].getImmediate(val))
assert(!"non-immediate offset");
i->tex.offset[c] = val.reg.data.u32;
i->offset[0][c].set(NULL);
}
}
 
return true;
}
 
// Bias must be equal for all threads of a quad or lod calculation will fail.
//
// The lanes of a quad are grouped by the bit in the condition register they
// have set, which is selected by differing bias values.
// Move the input values for TEX into a new register set for each group and
// execute TEX only for a specific group.
// We always need to use 4 new registers for the inputs/outputs because the
// implicitly calculated derivatives must be correct.
//
// TODO: move to SSA phase so we can easily determine whether bias is constant
bool
NV50LoweringPreSSA::handleTXB(TexInstruction *i)
{
const CondCode cc[4] = { CC_EQU, CC_S, CC_C, CC_O };
int l, d;
 
// We can't actually apply bias *and* do a compare for a cube
// texture. Since the compare has to be done before the filtering, just
// drop the bias on the floor.
if (i->tex.target == TEX_TARGET_CUBE_SHADOW) {
i->op = OP_TEX;
i->setSrc(3, i->getSrc(4));
i->setSrc(4, NULL);
return handleTEX(i);
}
 
handleTEX(i);
Value *bias = i->getSrc(i->tex.target.getArgCount());
if (bias->isUniform())
return true;
 
Instruction *cond = bld.mkOp1(OP_UNION, TYPE_U32, bld.getScratch(),
bld.loadImm(NULL, 1));
bld.setPosition(cond, false);
 
for (l = 1; l < 4; ++l) {
const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR);
Value *bit = bld.getSSA();
Value *pred = bld.getScratch(1, FILE_FLAGS);
Value *imm = bld.loadImm(NULL, (1 << l));
bld.mkQuadop(qop, pred, l, bias, bias)->flagsDef = 0;
bld.mkMov(bit, imm)->setPredicate(CC_EQ, pred);
cond->setSrc(l, bit);
}
Value *flags = bld.getScratch(1, FILE_FLAGS);
bld.setPosition(cond, true);
bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0));
 
Instruction *tex[4];
for (l = 0; l < 4; ++l) {
(tex[l] = cloneForward(func, i))->setPredicate(cc[l], flags);
bld.insert(tex[l]);
}
 
Value *res[4][4];
for (d = 0; i->defExists(d); ++d)
res[0][d] = tex[0]->getDef(d);
for (l = 1; l < 4; ++l) {
for (d = 0; tex[l]->defExists(d); ++d) {
res[l][d] = cloneShallow(func, res[0][d]);
bld.mkMov(res[l][d], tex[l]->getDef(d))->setPredicate(cc[l], flags);
}
}
 
for (d = 0; i->defExists(d); ++d) {
Instruction *dst = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(d));
for (l = 0; l < 4; ++l)
dst->setSrc(l, res[l][d]);
}
delete_Instruction(prog, i);
return true;
}
 
// LOD must be equal for all threads of a quad.
// Unlike with TXB, here we can just diverge since there's no LOD calculation
// that would require all 4 threads' sources to be set up properly.
bool
NV50LoweringPreSSA::handleTXL(TexInstruction *i)
{
handleTEX(i);
Value *lod = i->getSrc(i->tex.target.getArgCount());
if (lod->isUniform())
return true;
 
BasicBlock *currBB = i->bb;
BasicBlock *texiBB = i->bb->splitBefore(i, false);
BasicBlock *joinBB = i->bb->splitAfter(i);
 
bld.setPosition(currBB, true);
currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
 
for (int l = 0; l <= 3; ++l) {
const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR);
Value *pred = bld.getScratch(1, FILE_FLAGS);
bld.setPosition(currBB, true);
bld.mkQuadop(qop, pred, l, lod, lod)->flagsDef = 0;
bld.mkFlow(OP_BRA, texiBB, CC_EQ, pred)->fixed = 1;
currBB->cfg.attach(&texiBB->cfg, Graph::Edge::FORWARD);
if (l <= 2) {
BasicBlock *laneBB = new BasicBlock(func);
currBB->cfg.attach(&laneBB->cfg, Graph::Edge::TREE);
currBB = laneBB;
}
}
bld.setPosition(joinBB, false);
bld.mkOp(OP_JOIN, TYPE_NONE, NULL);
return true;
}
 
bool
NV50LoweringPreSSA::handleTXD(TexInstruction *i)
{
static const uint8_t qOps[4][2] =
{
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
};
Value *def[4][4];
Value *crd[3];
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim();
 
handleTEX(i);
i->op = OP_TEX; // no need to clone dPdx/dPdy later
 
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
 
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
tex->setSrc(c, crd[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
def[c][l] = bld.getSSA();
mov = bld.mkMov(def[c][l], tex->getDef(c));
mov->fixed = 1;
mov->lanes = 1 << l;
}
}
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
 
for (c = 0; i->defExists(c); ++c) {
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
for (l = 0; l < 4; ++l)
u->setSrc(l, def[c][l]);
}
 
i->bb->remove(i);
return true;
}
 
bool
NV50LoweringPreSSA::handleTXLQ(TexInstruction *i)
{
handleTEX(i);
bld.setPosition(i, true);
 
/* The returned values are not quite what we want:
* (a) convert from s32 to f32
* (b) multiply by 1/256
*/
for (int def = 0; def < 2; ++def) {
if (!i->defExists(def))
continue;
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), TYPE_S32, i->getDef(def));
bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def),
i->getDef(def), bld.loadImm(NULL, 1.0f / 256));
}
return true;
}
 
bool
NV50LoweringPreSSA::handleSET(Instruction *i)
{
if (i->dType == TYPE_F32) {
bld.setPosition(i, true);
i->dType = TYPE_U32;
bld.mkOp1(OP_ABS, TYPE_S32, i->getDef(0), i->getDef(0));
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_S32, i->getDef(0));
}
return true;
}
 
bool
NV50LoweringPreSSA::handleSLCT(CmpInstruction *i)
{
Value *src0 = bld.getSSA();
Value *src1 = bld.getSSA();
Value *pred = bld.getScratch(1, FILE_FLAGS);
 
Value *v0 = i->getSrc(0);
Value *v1 = i->getSrc(1);
// XXX: these probably shouldn't be immediates in the first place ...
if (v0->asImm())
v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0);
if (v1->asImm())
v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0);
 
bld.setPosition(i, true);
bld.mkMov(src0, v0)->setPredicate(CC_NE, pred);
bld.mkMov(src1, v1)->setPredicate(CC_EQ, pred);
bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1);
 
bld.setPosition(i, false);
i->op = OP_SET;
i->setFlagsDef(0, pred);
i->dType = TYPE_U8;
i->setSrc(0, i->getSrc(2));
i->setSrc(2, NULL);
i->setSrc(1, bld.loadImm(NULL, 0));
 
return true;
}
 
bool
NV50LoweringPreSSA::handleSELP(Instruction *i)
{
Value *src0 = bld.getSSA();
Value *src1 = bld.getSSA();
 
Value *v0 = i->getSrc(0);
Value *v1 = i->getSrc(1);
if (v0->asImm())
v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0);
if (v1->asImm())
v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0);
 
bld.mkMov(src0, v0)->setPredicate(CC_NE, i->getSrc(2));
bld.mkMov(src1, v1)->setPredicate(CC_EQ, i->getSrc(2));
bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1);
delete_Instruction(prog, i);
return true;
}
 
bool
NV50LoweringPreSSA::handleWRSV(Instruction *i)
{
Symbol *sym = i->getSrc(0)->asSym();
 
// these are all shader outputs, $sreg are not writeable
uint32_t addr = targ->getSVAddress(FILE_SHADER_OUTPUT, sym);
if (addr >= 0x400)
return false;
sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
 
bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), i->getSrc(1));
 
bld.getBB()->remove(i);
return true;
}
 
bool
NV50LoweringPreSSA::handleCALL(Instruction *i)
{
if (prog->getType() == Program::TYPE_COMPUTE) {
// Add implicit "thread id" argument in $r0 to the function
i->setSrc(i->srcCount(), tid);
}
return true;
}
 
bool
NV50LoweringPreSSA::handlePRECONT(Instruction *i)
{
delete_Instruction(prog, i);
return true;
}
 
bool
NV50LoweringPreSSA::handleCONT(Instruction *i)
{
i->op = OP_BRA;
return true;
}
 
bool
NV50LoweringPreSSA::handleRDSV(Instruction *i)
{
Symbol *sym = i->getSrc(0)->asSym();
uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
Value *def = i->getDef(0);
SVSemantic sv = sym->reg.data.sv.sv;
int idx = sym->reg.data.sv.index;
 
if (addr >= 0x400) // mov $sreg
return true;
 
switch (sv) {
case SV_POSITION:
assert(prog->getType() == Program::TYPE_FRAGMENT);
bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
break;
case SV_FACE:
bld.mkInterp(NV50_IR_INTERP_FLAT, def, addr, NULL);
if (i->dType == TYPE_F32) {
bld.mkOp2(OP_OR, TYPE_U32, def, def, bld.mkImm(0x00000001));
bld.mkOp1(OP_NEG, TYPE_S32, def, def);
bld.mkCvt(OP_CVT, TYPE_F32, def, TYPE_S32, def);
}
break;
case SV_NCTAID:
case SV_CTAID:
case SV_NTID:
if ((sv == SV_NCTAID && idx >= 2) ||
(sv == SV_NTID && idx >= 3)) {
bld.mkMov(def, bld.mkImm(1));
} else if (sv == SV_CTAID && idx >= 2) {
bld.mkMov(def, bld.mkImm(0));
} else {
Value *x = bld.getSSA(2);
bld.mkOp1(OP_LOAD, TYPE_U16, x,
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U16, addr));
bld.mkCvt(OP_CVT, TYPE_U32, def, TYPE_U16, x);
}
break;
case SV_TID:
if (idx == 0) {
bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x0000ffff));
} else if (idx == 1) {
bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x03ff0000));
bld.mkOp2(OP_SHR, TYPE_U32, def, def, bld.mkImm(16));
} else if (idx == 2) {
bld.mkOp2(OP_SHR, TYPE_U32, def, tid, bld.mkImm(26));
} else {
bld.mkMov(def, bld.mkImm(0));
}
break;
case SV_SAMPLE_POS: {
Value *off = new_LValue(func, FILE_ADDRESS);
bld.mkOp1(OP_RDSV, TYPE_U32, def, bld.mkSysVal(SV_SAMPLE_INDEX, 0));
bld.mkOp2(OP_SHL, TYPE_U32, off, def, bld.mkImm(3));
bld.mkLoad(TYPE_F32,
def,
bld.mkSymbol(
FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx),
off);
break;
}
default:
bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), NULL);
break;
}
bld.getBB()->remove(i);
return true;
}
 
bool
NV50LoweringPreSSA::handleDIV(Instruction *i)
{
if (!isFloatType(i->dType))
return true;
bld.setPosition(i, false);
Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
i->op = OP_MUL;
i->setSrc(1, rcp->getDef(0));
return true;
}
 
bool
NV50LoweringPreSSA::handleSQRT(Instruction *i)
{
Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
bld.getSSA(), i->getSrc(0));
i->op = OP_MUL;
i->setSrc(1, rsq->getDef(0));
 
return true;
}
 
bool
NV50LoweringPreSSA::handlePOW(Instruction *i)
{
LValue *val = bld.getScratch();
 
bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
 
i->op = OP_EX2;
i->setSrc(0, val);
i->setSrc(1, NULL);
 
return true;
}
 
bool
NV50LoweringPreSSA::handleEXPORT(Instruction *i)
{
if (prog->getType() == Program::TYPE_FRAGMENT) {
if (i->getIndirect(0, 0)) {
// TODO: redirect to l[] here, load to GPRs at exit
return false;
} else {
int id = i->getSrc(0)->reg.data.offset / 4; // in 32 bit reg units
 
i->op = OP_MOV;
i->subOp = NV50_IR_SUBOP_MOV_FINAL;
i->src(0).set(i->src(1));
i->setSrc(1, NULL);
i->setDef(0, new_LValue(func, FILE_GPR));
i->getDef(0)->reg.data.id = id;
 
prog->maxGPR = MAX2(prog->maxGPR, id);
}
}
return true;
}
 
// Handle indirect addressing in geometry shaders:
//
// ld $r0 a[$a1][$a2+k] ->
// ld $r0 a[($a1 + $a2 * $vstride) + k], where k *= $vstride is implicit
//
bool
NV50LoweringPreSSA::handleLOAD(Instruction *i)
{
ValueRef src = i->src(0);
 
if (src.isIndirect(1)) {
assert(prog->getType() == Program::TYPE_GEOMETRY);
Value *addr = i->getIndirect(0, 1);
 
if (src.isIndirect(0)) {
// base address is in an address register, so move to a GPR
Value *base = bld.getScratch();
bld.mkMov(base, addr);
 
Symbol *sv = bld.mkSysVal(SV_VERTEX_STRIDE, 0);
Value *vstride = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getSSA(), sv);
Value *attrib = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(2));
 
// Calculate final address: addr = base + attr*vstride; use 16-bit
// multiplication since 32-bit would be lowered to multiple
// instructions, and we only need the low 16 bits of the result
Value *a[2], *b[2];
bld.mkSplit(a, 2, attrib);
bld.mkSplit(b, 2, vstride);
Value *sum = bld.mkOp3v(OP_MAD, TYPE_U16, bld.getSSA(), a[0], b[0],
base);
 
// move address from GPR into an address register
addr = bld.getSSA(2, FILE_ADDRESS);
bld.mkMov(addr, sum);
}
 
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, addr);
}
 
return true;
}
 
bool
NV50LoweringPreSSA::handlePFETCH(Instruction *i)
{
assert(prog->getType() == Program::TYPE_GEOMETRY);
 
// NOTE: cannot use getImmediate here, not in SSA form yet, move to
// later phase if that assertion ever triggers:
 
ImmediateValue *imm = i->getSrc(0)->asImm();
assert(imm);
 
assert(imm->reg.data.u32 <= 127); // TODO: use address reg if that happens
 
if (i->srcExists(1)) {
// indirect addressing of vertex in primitive space
 
LValue *val = bld.getScratch();
Value *ptr = bld.getSSA(2, FILE_ADDRESS);
bld.mkOp2v(OP_SHL, TYPE_U32, ptr, i->getSrc(1), bld.mkImm(2));
bld.mkOp2v(OP_PFETCH, TYPE_U32, val, imm, ptr);
 
// NOTE: PFETCH directly to an $aX only works with direct addressing
i->op = OP_SHL;
i->setSrc(0, val);
i->setSrc(1, bld.mkImm(0));
}
 
return true;
}
 
// Set flags according to predicate and make the instruction read $cX.
void
NV50LoweringPreSSA::checkPredicate(Instruction *insn)
{
Value *pred = insn->getPredicate();
Value *cdst;
 
// FILE_PREDICATE will simply be changed to FLAGS on conversion to SSA
if (!pred ||
pred->reg.file == FILE_FLAGS || pred->reg.file == FILE_PREDICATE)
return;
 
cdst = bld.getSSA(1, FILE_FLAGS);
 
bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, insn->dType, bld.loadImm(NULL, 0), pred);
 
insn->setPredicate(insn->cc, cdst);
}
 
//
// - add quadop dance for texturing
// - put FP outputs in GPRs
// - convert instruction sequences
//
bool
NV50LoweringPreSSA::visit(Instruction *i)
{
bld.setPosition(i, false);
 
if (i->cc != CC_ALWAYS)
checkPredicate(i);
 
switch (i->op) {
case OP_TEX:
case OP_TXF:
case OP_TXG:
return handleTEX(i->asTex());
case OP_TXB:
return handleTXB(i->asTex());
case OP_TXL:
return handleTXL(i->asTex());
case OP_TXD:
return handleTXD(i->asTex());
case OP_TXLQ:
return handleTXLQ(i->asTex());
case OP_EX2:
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
i->setSrc(0, i->getDef(0));
break;
case OP_SET:
return handleSET(i);
case OP_SLCT:
return handleSLCT(i->asCmp());
case OP_SELP:
return handleSELP(i);
case OP_POW:
return handlePOW(i);
case OP_DIV:
return handleDIV(i);
case OP_SQRT:
return handleSQRT(i);
case OP_EXPORT:
return handleEXPORT(i);
case OP_LOAD:
return handleLOAD(i);
case OP_RDSV:
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
case OP_CALL:
return handleCALL(i);
case OP_PRECONT:
return handlePRECONT(i);
case OP_CONT:
return handleCONT(i);
case OP_PFETCH:
return handlePFETCH(i);
default:
break;
}
return true;
}
 
bool
TargetNV50::runLegalizePass(Program *prog, CGStage stage) const
{
bool ret = false;
 
if (stage == CG_STAGE_PRE_SSA) {
NV50LoweringPreSSA pass(prog);
ret = pass.run(prog, false, true);
} else
if (stage == CG_STAGE_SSA) {
if (!prog->targetPriv)
prog->targetPriv = new std::list<Instruction *>();
NV50LegalizeSSA pass(prog);
ret = pass.run(prog, false, true);
} else
if (stage == CG_STAGE_POST_RA) {
NV50LegalizePostRA pass;
ret = pass.run(prog, false, true);
if (prog->targetPriv)
delete reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv);
}
return ret;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
0,0 → 1,1814
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
 
#include "codegen/nv50_ir_target_nvc0.h"
#include "codegen/nv50_ir_lowering_nvc0.h"
 
#include <limits>
 
namespace nv50_ir {
 
#define QOP_ADD 0
#define QOP_SUBR 1
#define QOP_SUB 2
#define QOP_MOV2 3
 
// UL UR LL LR
#define QUADOP(q, r, s, t) \
((QOP_##q << 6) | (QOP_##r << 4) | \
(QOP_##s << 2) | (QOP_##t << 0))
 
void
NVC0LegalizeSSA::handleDIV(Instruction *i)
{
FlowInstruction *call;
int builtin;
Value *def[2];
 
bld.setPosition(i, false);
def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
switch (i->dType) {
case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
default:
return;
}
call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
 
call->fixed = 1;
call->absolute = call->builtin = 1;
call->target.builtin = builtin;
delete_Instruction(prog, i);
}
 
void
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
{
assert(i->dType == TYPE_F64);
// There are instructions that will compute the high 32 bits of the 64-bit
// float. We will just stick 0 in the bottom 32 bits.
 
bld.setPosition(i, false);
 
// 1. Take the source and it up.
Value *src[2], *dst[2], *def = i->getDef(0);
bld.mkSplit(src, 4, i->getSrc(0));
 
// 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
dst[0] = bld.loadImm(NULL, 0);
dst[1] = bld.getSSA();
 
// 3. The new version of the instruction takes the high 32 bits of the
// source and outputs the high 32 bits of the destination.
i->setSrc(0, src[1]);
i->setDef(0, dst[1]);
i->setType(TYPE_F32);
i->subOp = NV50_IR_SUBOP_RCPRSQ_64H;
 
// 4. Recombine the two dst pieces back into the original destination.
bld.setPosition(i, true);
bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
}
 
void
NVC0LegalizeSSA::handleFTZ(Instruction *i)
{
// Only want to flush float inputs
assert(i->sType == TYPE_F32);
 
// If we're already flushing denorms (and NaN's) to zero, no need for this.
if (i->dnz)
return;
 
// Only certain classes of operations can flush
OpClass cls = prog->getTarget()->getOpClass(i->op);
if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
cls != OPCLASS_CONVERT)
return;
 
i->ftz = true;
}
 
bool
NVC0LegalizeSSA::visit(Function *fn)
{
bld.setProgram(fn->getProgram());
return true;
}
 
bool
NVC0LegalizeSSA::visit(BasicBlock *bb)
{
Instruction *next;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
if (i->sType == TYPE_F32) {
if (prog->getType() != Program::TYPE_COMPUTE)
handleFTZ(i);
continue;
}
switch (i->op) {
case OP_DIV:
case OP_MOD:
handleDIV(i);
break;
case OP_RCP:
case OP_RSQ:
if (i->dType == TYPE_F64)
handleRCPRSQ(i);
break;
default:
break;
}
}
return true;
}
 
NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
: rZero(NULL),
carry(NULL),
needTexBar(prog->getTarget()->getChipset() >= 0xe0)
{
}
 
bool
NVC0LegalizePostRA::insnDominatedBy(const Instruction *later,
const Instruction *early) const
{
if (early->bb == later->bb)
return early->serial < later->serial;
return later->bb->dominatedBy(early->bb);
}
 
void
NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
Instruction *usei, const Instruction *insn)
{
bool add = true;
for (std::list<TexUse>::iterator it = uses.begin();
it != uses.end();) {
if (insnDominatedBy(usei, it->insn)) {
add = false;
break;
}
if (insnDominatedBy(it->insn, usei))
it = uses.erase(it);
else
++it;
}
if (add)
uses.push_back(TexUse(usei, insn));
}
 
void
NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
Instruction *insn,
const BasicBlock *term,
std::list<TexUse> &uses)
{
while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
insn = insn->getSrc(0)->getUniqueInsn();
 
if (!insn->bb->reachableBy(texi->bb, term))
return;
 
switch (insn->op) {
/* Values not connected to the tex's definition through any of these should
* not be conflicting.
*/
case OP_SPLIT:
case OP_MERGE:
case OP_PHI:
case OP_UNION:
/* recurse again */
for (int s = 0; insn->srcExists(s); ++s)
findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term,
uses);
break;
default:
// if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ?
addTexUse(uses, insn, texi);
break;
}
}
 
void
NVC0LegalizePostRA::findFirstUses(
const Instruction *texi,
const Instruction *insn,
std::list<TexUse> &uses,
std::tr1::unordered_set<const Instruction *>& visited)
{
for (int d = 0; insn->defExists(d); ++d) {
Value *v = insn->getDef(d);
for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
Instruction *usei = (*u)->getInsn();
 
// NOTE: In case of a loop that overwrites a value but never uses
// it, it can happen that we have a cycle of uses that consists only
// of phis and no-op moves and will thus cause an infinite loop here
// since these are not considered actual uses.
// The most obvious (and perhaps the only) way to prevent this is to
// remember which instructions we've already visited.
 
if (visited.find(usei) != visited.end())
continue;
 
visited.insert(usei);
 
if (usei->op == OP_PHI || usei->op == OP_UNION) {
// need a barrier before WAW cases
for (int s = 0; usei->srcExists(s); ++s) {
Instruction *defi = usei->getSrc(s)->getUniqueInsn();
if (defi && &usei->src(s) != *u)
findOverwritingDefs(texi, defi, usei->bb, uses);
}
}
 
if (usei->op == OP_SPLIT ||
usei->op == OP_MERGE ||
usei->op == OP_PHI ||
usei->op == OP_UNION) {
// these uses don't manifest in the machine code
findFirstUses(texi, usei, uses, visited);
} else
if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
findFirstUses(texi, usei, uses, visited);
} else {
addTexUse(uses, usei, insn);
}
}
}
}
 
// Texture barriers:
// This pass is a bit long and ugly and can probably be optimized.
//
// 1. obtain a list of TEXes and their outputs' first use(s)
// 2. calculate the barrier level of each first use (minimal number of TEXes,
// over all paths, between the TEX and the use in question)
// 3. for each barrier, if all paths from the source TEX to that barrier
// contain a barrier of lesser level, it can be culled
bool
NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
{
std::list<TexUse> *uses;
std::vector<Instruction *> texes;
std::vector<int> bbFirstTex;
std::vector<int> bbFirstUse;
std::vector<int> texCounts;
std::vector<TexUse> useVec;
ArrayList insns;
 
fn->orderInstructions(insns);
 
texCounts.resize(fn->allBBlocks.getSize(), 0);
bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize());
bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize());
 
// tag BB CFG nodes by their id for later
for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) {
BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get());
if (bb)
bb->cfg.tag = bb->getId();
}
 
// gather the first uses for each TEX
for (int i = 0; i < insns.getSize(); ++i) {
Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i));
if (isTextureOp(tex->op)) {
texes.push_back(tex);
if (!texCounts.at(tex->bb->getId()))
bbFirstTex[tex->bb->getId()] = texes.size() - 1;
texCounts[tex->bb->getId()]++;
}
}
insns.clear();
if (texes.empty())
return false;
uses = new std::list<TexUse>[texes.size()];
if (!uses)
return false;
for (size_t i = 0; i < texes.size(); ++i) {
std::tr1::unordered_set<const Instruction *> visited;
findFirstUses(texes[i], texes[i], uses[i], visited);
}
 
// determine the barrier level at each use
for (size_t i = 0; i < texes.size(); ++i) {
for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end();
++u) {
BasicBlock *tb = texes[i]->bb;
BasicBlock *ub = u->insn->bb;
if (tb == ub) {
u->level = 0;
for (size_t j = i + 1; j < texes.size() &&
texes[j]->bb == tb && texes[j]->serial < u->insn->serial;
++j)
u->level++;
} else {
u->level = fn->cfg.findLightestPathWeight(&tb->cfg,
&ub->cfg, texCounts);
if (u->level < 0) {
WARN("Failed to find path TEX -> TEXBAR\n");
u->level = 0;
continue;
}
// this counted all TEXes in the origin block, correct that
u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */;
// and did not count the TEXes in the destination block, add those
for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() &&
texes[j]->bb == ub && texes[j]->serial < u->insn->serial;
++j)
u->level++;
}
assert(u->level >= 0);
useVec.push_back(*u);
}
}
delete[] uses;
 
// insert the barriers
for (size_t i = 0; i < useVec.size(); ++i) {
Instruction *prev = useVec[i].insn->prev;
if (useVec[i].level < 0)
continue;
if (prev && prev->op == OP_TEXBAR) {
if (prev->subOp > useVec[i].level)
prev->subOp = useVec[i].level;
prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0));
} else {
Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE);
bar->fixed = 1;
bar->subOp = useVec[i].level;
// make use explicit to ease latency calculation
bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0));
useVec[i].insn->bb->insertBefore(useVec[i].insn, bar);
}
}
 
if (fn->getProgram()->optLevel < 3)
return true;
 
std::vector<Limits> limitT, limitB, limitS; // entry, exit, single
 
limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0));
limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0));
limitS.resize(fn->allBBlocks.getSize());
 
// cull unneeded barriers (should do that earlier, but for simplicity)
IteratorRef bi = fn->cfg.iteratorCFG();
// first calculate min/max outstanding TEXes for each BB
for (bi->reset(); !bi->end(); bi->next()) {
Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
BasicBlock *bb = BasicBlock::get(n);
int min = 0;
int max = std::numeric_limits<int>::max();
for (Instruction *i = bb->getFirst(); i; i = i->next) {
if (isTextureOp(i->op)) {
min++;
if (max < std::numeric_limits<int>::max())
max++;
} else
if (i->op == OP_TEXBAR) {
min = MIN2(min, i->subOp);
max = MIN2(max, i->subOp);
}
}
// limits when looking at an isolated block
limitS[bb->getId()].min = min;
limitS[bb->getId()].max = max;
}
// propagate the min/max values
for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) {
for (bi->reset(); !bi->end(); bi->next()) {
Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
BasicBlock *bb = BasicBlock::get(n);
const int bbId = bb->getId();
for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) {
BasicBlock *in = BasicBlock::get(ei.getNode());
const int inId = in->getId();
limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min);
limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max);
}
// I just hope this is correct ...
if (limitS[bbId].max == std::numeric_limits<int>::max()) {
// no barrier
limitB[bbId].min = limitT[bbId].min + limitS[bbId].min;
limitB[bbId].max = limitT[bbId].max + limitS[bbId].min;
} else {
// block contained a barrier
limitB[bbId].min = MIN2(limitS[bbId].max,
limitT[bbId].min + limitS[bbId].min);
limitB[bbId].max = MIN2(limitS[bbId].max,
limitT[bbId].max + limitS[bbId].min);
}
}
}
// finally delete unnecessary barriers
for (bi->reset(); !bi->end(); bi->next()) {
Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get());
BasicBlock *bb = BasicBlock::get(n);
Instruction *prev = NULL;
Instruction *next;
int max = limitT[bb->getId()].max;
for (Instruction *i = bb->getFirst(); i; i = next) {
next = i->next;
if (i->op == OP_TEXBAR) {
if (i->subOp >= max) {
delete_Instruction(prog, i);
i = NULL;
} else {
max = i->subOp;
if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) {
delete_Instruction(prog, prev);
prev = NULL;
}
}
} else
if (isTextureOp(i->op)) {
max++;
}
if (i && !i->isNop())
prev = i;
}
}
return true;
}
 
bool
NVC0LegalizePostRA::visit(Function *fn)
{
if (needTexBar)
insertTextureBarriers(fn);
 
rZero = new_LValue(fn, FILE_GPR);
carry = new_LValue(fn, FILE_FLAGS);
 
rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
carry->reg.data.id = 0;
 
return true;
}
 
void
NVC0LegalizePostRA::replaceZero(Instruction *i)
{
for (int s = 0; i->srcExists(s); ++s) {
if (s == 2 && i->op == OP_SUCLAMP)
continue;
ImmediateValue *imm = i->getSrc(s)->asImm();
if (imm && imm->reg.data.u64 == 0)
i->setSrc(s, rZero);
}
}
 
// replace CONT with BRA for single unconditional continue
bool
NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb)
{
if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT)
return false;
Graph::EdgeIterator ei = bb->cfg.incident();
if (ei.getType() != Graph::Edge::BACK)
ei.next();
if (ei.getType() != Graph::Edge::BACK)
return false;
BasicBlock *contBB = BasicBlock::get(ei.getNode());
 
if (!contBB->getExit() || contBB->getExit()->op != OP_CONT ||
contBB->getExit()->getPredicate())
return false;
contBB->getExit()->op = OP_BRA;
bb->remove(bb->getEntry()); // delete PRECONT
 
ei.next();
assert(ei.end() || ei.getType() != Graph::Edge::BACK);
return true;
}
 
// replace branches to join blocks with join ops
void
NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
{
if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit)
return;
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
BasicBlock *in = BasicBlock::get(ei.getNode());
Instruction *exit = in->getExit();
if (!exit) {
in->insertTail(new FlowInstruction(func, OP_JOIN, bb));
// there should always be a terminator instruction
WARN("inserted missing terminator in BB:%i\n", in->getId());
} else
if (exit->op == OP_BRA) {
exit->op = OP_JOIN;
exit->asFlow()->limit = 1; // must-not-propagate marker
}
}
bb->remove(bb->getEntry());
}
 
bool
NVC0LegalizePostRA::visit(BasicBlock *bb)
{
Instruction *i, *next;
 
// remove pseudo operations and non-fixed no-ops, split 64 bit operations
for (i = bb->getFirst(); i; i = next) {
next = i->next;
if (i->op == OP_EMIT || i->op == OP_RESTART) {
if (!i->getDef(0)->refCount())
i->setDef(0, NULL);
if (i->src(0).getFile() == FILE_IMMEDIATE)
i->setSrc(0, rZero); // initial value must be 0
replaceZero(i);
} else
if (i->isNop()) {
bb->remove(i);
} else {
// TODO: Move this to before register allocation for operations that
// need the $c register !
if (typeSizeof(i->dType) == 8) {
Instruction *hi;
hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry);
if (hi)
next = hi;
}
 
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
}
}
if (!bb->getEntry())
return true;
 
if (!tryReplaceContWithBra(bb))
propagateJoin(bb);
 
return true;
}
 
NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
{
bld.setProgram(prog);
gMemBase = NULL;
}
 
bool
NVC0LoweringPass::visit(Function *fn)
{
if (prog->getType() == Program::TYPE_GEOMETRY) {
assert(!strncmp(fn->getName(), "MAIN", 4));
// TODO: when we generate actual functions pass this value along somehow
bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false);
gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
if (fn->cfgExit) {
bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false);
bld.mkMovToReg(0, gpEmitAddress);
}
}
return true;
}
 
bool
NVC0LoweringPass::visit(BasicBlock *bb)
{
return true;
}
 
inline Value *
NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot)
{
uint8_t b = prog->driver->io.resInfoCBSlot;
uint32_t off = prog->driver->io.texBindBase + slot * 4;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
 
// move array source to first slot, convert to u16, add indirections
bool
NVC0LoweringPass::handleTEX(TexInstruction *i)
{
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int arg = i->tex.target.getArgCount();
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
const int chipset = prog->getTarget()->getChipset();
 
// Arguments to the TEX instruction are a little insane. Even though the
// encoding is identical between SM20 and SM30, the arguments mean
// different things between Fermi and Kepler+. A lot of arguments are
// optional based on flags passed to the instruction. This summarizes the
// order of things.
//
// Fermi:
// array/indirect
// coords
// sample
// lod bias
// depth compare
// offsets:
// - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg)
// - other: 4 bits each, single reg
//
// Kepler+:
// indirect handle
// array (+ offsets for txd in upper 16 bits)
// coords
// sample
// lod bias
// depth compare
// offsets (same as fermi, except txd which takes it with array)
//
// Maxwell (tex):
// array
// coords
// indirect handle
// sample
// lod bias
// depth compare
// offsets
//
// Maxwell (txd):
// indirect handle
// coords
// array + offsets
// derivatives
 
if (chipset >= NVISA_GK104_CHIPSET) {
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
// XXX this ignores tsc, and assumes a 1:1 mapping
assert(i->tex.rIndirectSrc >= 0);
Value *hnd = loadTexHandle(
bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirectR(), bld.mkImm(2)),
i->tex.r);
i->tex.r = 0xff;
i->tex.s = 0x1f;
i->setIndirectR(hnd);
i->setIndirectS(NULL);
} else if (i->tex.r == i->tex.s) {
i->tex.r += prog->driver->io.texBindBase / 4;
i->tex.s = 0; // only a single cX[] value possible here
} else {
Value *hnd = bld.getScratch();
Value *rHnd = loadTexHandle(NULL, i->tex.r);
Value *sHnd = loadTexHandle(NULL, i->tex.s);
 
bld.mkOp3(OP_INSBF, TYPE_U32, hnd, rHnd, bld.mkImm(0x1400), sHnd);
 
i->tex.r = 0; // not used for indirect tex
i->tex.s = 0;
i->setIndirectR(hnd);
}
if (i->tex.target.isArray()) {
LValue *layer = new_LValue(func, FILE_GPR);
Value *src = i->getSrc(lyr);
const int sat = (i->op == OP_TXF) ? 1 : 0;
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) {
for (int s = dim; s >= 1; --s)
i->setSrc(s, i->getSrc(s - 1));
i->setSrc(0, layer);
} else {
i->setSrc(dim, layer);
}
}
// Move the indirect reference to the first place
if (i->tex.rIndirectSrc >= 0 && (
i->op == OP_TXD || chipset < NVISA_GM107_CHIPSET)) {
Value *hnd = i->getIndirectR();
 
i->setIndirectR(NULL);
i->moveSources(0, 1);
i->setSrc(0, hnd);
i->tex.rIndirectSrc = 0;
i->tex.sIndirectSrc = -1;
}
} else
// (nvc0) generate and move the tsc/tic/array source to the front
if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
 
Value *ticRel = i->getIndirectR();
Value *tscRel = i->getIndirectS();
 
if (ticRel) {
i->setSrc(i->tex.rIndirectSrc, NULL);
if (i->tex.r)
ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
ticRel, bld.mkImm(i->tex.r));
}
if (tscRel) {
i->setSrc(i->tex.sIndirectSrc, NULL);
if (i->tex.s)
tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
tscRel, bld.mkImm(i->tex.s));
}
 
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
for (int s = dim; s >= 1; --s)
i->setSrc(s, i->getSrc(s - 1));
i->setSrc(0, arrayIndex);
 
if (arrayIndex) {
int sat = (i->op == OP_TXF) ? 1 : 0;
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat;
} else {
bld.loadImm(src, 0);
}
 
if (ticRel)
bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
if (tscRel)
bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
 
i->setSrc(0, src);
}
 
// For nvc0, the sample id has to be in the second operand, as the offset
// does. Right now we don't know how to pass both in, and this case can't
// happen with OpenGL. On nve0, the sample id is part of the texture
// coordinate argument.
assert(chipset >= NVISA_GK104_CHIPSET ||
!i->tex.useOffsets || !i->tex.target.isMS());
 
// offset is between lod and dc
if (i->tex.useOffsets) {
int n, c;
int s = i->srcCount(0xff, true);
if (i->op != OP_TXD || chipset < NVISA_GK104_CHIPSET) {
if (i->tex.target.isShadow())
s--;
if (i->srcExists(s)) // move potential predicate out of the way
i->moveSources(s, 1);
if (i->tex.useOffsets == 4 && i->srcExists(s + 1))
i->moveSources(s + 1, 1);
}
if (i->op == OP_TXG) {
// Either there is 1 offset, which goes into the 2 low bytes of the
// first source, or there are 4 offsets, which go into 2 sources (8
// values, 1 byte each).
Value *offs[2] = {NULL, NULL};
for (n = 0; n < i->tex.useOffsets; n++) {
for (c = 0; c < 2; ++c) {
if ((n % 2) == 0 && c == 0)
offs[n / 2] = i->offset[n][c].get();
else
bld.mkOp3(OP_INSBF, TYPE_U32,
offs[n / 2],
i->offset[n][c].get(),
bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)),
offs[n / 2]);
}
}
i->setSrc(s, offs[0]);
if (offs[1])
i->setSrc(s + 1, offs[1]);
} else {
unsigned imm = 0;
assert(i->tex.useOffsets == 1);
for (c = 0; c < 3; ++c) {
ImmediateValue val;
if (!i->offset[0][c].getImmediate(val))
assert(!"non-immediate offset passed to non-TXG");
imm |= (val.reg.data.u32 & 0xf) << (c * 4);
}
if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) {
// The offset goes into the upper 16 bits of the array index. So
// create it if it's not already there, and INSBF it if it already
// is.
s = (i->tex.rIndirectSrc >= 0) ? 1 : 0;
if (chipset >= NVISA_GM107_CHIPSET)
s += dim;
if (i->tex.target.isArray()) {
bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s),
bld.loadImm(NULL, imm), bld.mkImm(0xc10),
i->getSrc(s));
} else {
i->moveSources(s, 1);
i->setSrc(s, bld.loadImm(NULL, imm << 16));
}
} else {
i->setSrc(s, bld.loadImm(NULL, imm));
}
}
}
 
if (chipset >= NVISA_GK104_CHIPSET) {
//
// If TEX requires more than 4 sources, the 2nd register tuple must be
// aligned to 4, even if it consists of just a single 4-byte register.
//
// XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case.
//
int s = i->srcCount(0xff, true);
if (s > 4 && s < 7) {
if (i->srcExists(s)) // move potential predicate out of the way
i->moveSources(s, 7 - s);
while (s < 7)
i->setSrc(s++, bld.loadImm(NULL, 0));
}
}
 
return true;
}
 
bool
NVC0LoweringPass::handleManualTXD(TexInstruction *i)
{
static const uint8_t qOps[4][2] =
{
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
};
Value *def[4][4];
Value *crd[3];
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim();
const int array = i->tex.target.isArray();
 
i->op = OP_TEX; // no need to clone dPdx/dPdy later
 
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
 
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
tex->setSrc(c + array, crd[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
def[c][l] = bld.getSSA();
mov = bld.mkMov(def[c][l], tex->getDef(c));
mov->fixed = 1;
mov->lanes = 1 << l;
}
}
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
 
for (c = 0; i->defExists(c); ++c) {
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
for (l = 0; l < 4; ++l)
u->setSrc(l, def[c][l]);
}
 
i->bb->remove(i);
return true;
}
 
bool
NVC0LoweringPass::handleTXD(TexInstruction *txd)
{
int dim = txd->tex.target.getDim();
unsigned arg = txd->tex.target.getArgCount();
unsigned expected_args = arg;
const int chipset = prog->getTarget()->getChipset();
 
if (chipset >= NVISA_GK104_CHIPSET) {
if (!txd->tex.target.isArray() && txd->tex.useOffsets)
expected_args++;
if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0)
expected_args++;
} else {
if (txd->tex.useOffsets)
expected_args++;
if (!txd->tex.target.isArray() && (
txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0))
expected_args++;
}
 
if (expected_args > 4 ||
dim > 2 ||
txd->tex.target.isShadow() ||
txd->tex.target.isCube())
txd->op = OP_TEX;
 
handleTEX(txd);
while (txd->srcExists(arg))
++arg;
 
txd->tex.derivAll = true;
if (txd->op == OP_TEX)
return handleManualTXD(txd);
 
assert(arg == expected_args);
for (int c = 0; c < dim; ++c) {
txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]);
txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]);
txd->dPdx[c].set(NULL);
txd->dPdy[c].set(NULL);
}
return true;
}
 
bool
NVC0LoweringPass::handleTXQ(TexInstruction *txq)
{
// TODO: indirect resource/sampler index
return true;
}
 
bool
NVC0LoweringPass::handleTXLQ(TexInstruction *i)
{
/* The outputs are inverted compared to what the TGSI instruction
* expects. Take that into account in the mask.
*/
assert((i->tex.mask & ~3) == 0);
if (i->tex.mask == 1)
i->tex.mask = 2;
else if (i->tex.mask == 2)
i->tex.mask = 1;
handleTEX(i);
bld.setPosition(i, true);
 
/* The returned values are not quite what we want:
* (a) convert from s16/u16 to f32
* (b) multiply by 1/256
*/
for (int def = 0; def < 2; ++def) {
if (!i->defExists(def))
continue;
enum DataType type = TYPE_S16;
if (i->tex.mask == 2 || def > 0)
type = TYPE_U16;
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), type, i->getDef(def));
bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def),
i->getDef(def), bld.loadImm(NULL, 1.0f / 256));
}
if (i->tex.mask == 3) {
LValue *t = new_LValue(func, FILE_GPR);
bld.mkMov(t, i->getDef(0));
bld.mkMov(i->getDef(0), i->getDef(1));
bld.mkMov(i->getDef(1), t);
}
return true;
}
 
 
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
 
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
sv = SV_LBASE;
break;
case FILE_MEMORY_SHARED:
sv = SV_SBASE;
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
return true;
}
Value *base =
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
Value *ptr = atom->getIndirect(0, 0);
 
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr);
atom->setIndirect(0, 0, base);
 
return true;
}
 
bool
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
{
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS &&
cas->subOp != NV50_IR_SUBOP_ATOM_EXCH)
return false;
bld.setPosition(cas, true);
 
if (needCctl) {
Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0));
cctl->setIndirect(0, 0, cas->getIndirect(0, 0));
cctl->fixed = 1;
cctl->subOp = NV50_IR_SUBOP_CCTL_IV;
if (cas->isPredicated())
cctl->setPredicate(cas->cc, cas->getPredicate());
}
 
if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source
// should be set to the high part of the double reg or bad things will
// happen elsewhere in the universe.
// Also, it sometimes returns the new value instead of the old one
// under mysterious circumstances.
Value *dreg = bld.getSSA(8);
bld.setPosition(cas, false);
bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2));
cas->setSrc(1, dreg);
}
 
return true;
}
 
inline Value *
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
 
inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
uint8_t b = prog->driver->io.msInfoCBSlot;
off += prog->driver->io.msInfoBase;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
 
/* On nvc0, surface info is obtained via the surface binding points passed
* to the SULD/SUST instructions.
* On nve4, surface info is stored in c[] and is used by various special
* instructions, e.g. for clamping coordiantes or generating an address.
* They couldn't just have added an equivalent to TIC now, couldn't they ?
*/
#define NVE4_SU_INFO_ADDR 0x00
#define NVE4_SU_INFO_FMT 0x04
#define NVE4_SU_INFO_DIM_X 0x08
#define NVE4_SU_INFO_PITCH 0x0c
#define NVE4_SU_INFO_DIM_Y 0x10
#define NVE4_SU_INFO_ARRAY 0x14
#define NVE4_SU_INFO_DIM_Z 0x18
#define NVE4_SU_INFO_UNK1C 0x1c
#define NVE4_SU_INFO_WIDTH 0x20
#define NVE4_SU_INFO_HEIGHT 0x24
#define NVE4_SU_INFO_DEPTH 0x28
#define NVE4_SU_INFO_TARGET 0x2c
#define NVE4_SU_INFO_CALL 0x30
#define NVE4_SU_INFO_RAW_X 0x34
#define NVE4_SU_INFO_MS_X 0x38
#define NVE4_SU_INFO_MS_Y 0x3c
 
#define NVE4_SU_INFO__STRIDE 0x40
 
#define NVE4_SU_INFO_DIM(i) (0x08 + (i) * 8)
#define NVE4_SU_INFO_SIZE(i) (0x20 + (i) * 4)
#define NVE4_SU_INFO_MS(i) (0x38 + (i) * 4)
 
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
{
switch (su->tex.target.getEnum()) {
case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1);
case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
case TEX_TARGET_1D_ARRAY: return (c == 1) ?
NV50_IR_SUBOP_SUCLAMP_PL(0, 2) :
NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2);
case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2);
case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
default:
assert(0);
return 0;
}
}
 
void
NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
{
const uint16_t base = tex->tex.r * NVE4_SU_INFO__STRIDE;
const int arg = tex->tex.target.getArgCount();
 
if (tex->tex.target == TEX_TARGET_2D_MS)
tex->tex.target = TEX_TARGET_2D;
else
if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY)
tex->tex.target = TEX_TARGET_2D_ARRAY;
else
return;
 
Value *x = tex->getSrc(0);
Value *y = tex->getSrc(1);
Value *s = tex->getSrc(arg - 1);
 
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
 
Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0));
Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1));
 
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
 
s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7));
s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3));
 
Value *dx = loadMsInfo32(ts, 0x0);
Value *dy = loadMsInfo32(ts, 0x4);
 
bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx);
bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy);
 
tex->setSrc(0, tx);
tex->setSrc(1, ty);
tex->moveSources(arg, -1);
}
 
// Sets 64-bit "generic address", predicate and format sources for SULD/SUST.
// They're computed from the coordinates using the surface info in c[] space.
void
NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
{
Instruction *insn;
const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP;
const bool raw =
su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB;
const int idx = su->tex.r;
const int dim = su->tex.target.getDim();
const int arg = dim + (su->tex.target.isArray() ? 1 : 0);
const uint16_t base = idx * NVE4_SU_INFO__STRIDE;
int c;
Value *zero = bld.mkImm(0);
Value *p1 = NULL;
Value *v;
Value *src[3];
Value *bf, *eau, *off;
Value *addr, *pred;
 
off = bld.getScratch(4);
bf = bld.getScratch(4);
addr = bld.getSSA(8);
pred = bld.getScratch(1, FILE_PREDICATE);
 
bld.setPosition(su, false);
 
adjustCoordinatesMS(su);
 
// calculate clamped coordinates
for (c = 0; c < arg; ++c) {
src[c] = bld.getScratch();
if (c == 0 && raw)
v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X);
else
v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c));
bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero)
->subOp = getSuClampSubOp(su, c);
}
for (; c < 3; ++c)
src[c] = zero;
 
// set predicate output
if (su->tex.target == TEX_TARGET_BUFFER) {
src[0]->getInsn()->setFlagsDef(1, pred);
} else
if (su->tex.target.isArray()) {
p1 = bld.getSSA(1, FILE_PREDICATE);
src[dim]->getInsn()->setFlagsDef(1, p1);
}
 
// calculate pixel offset
if (dim == 1) {
if (su->tex.target != TEX_TARGET_BUFFER)
bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff));
} else
if (dim == 3) {
v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
 
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
} else {
assert(dim == 2);
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH);
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
->subOp = su->tex.target.isArray() ?
NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
}
 
// calculate effective address part 1
if (su->tex.target == TEX_TARGET_BUFFER) {
if (raw) {
bf = src[0];
} else {
v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero)
->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
}
} else {
Value *y = src[1];
Value *z = src[2];
uint16_t subOp = 0;
 
switch (dim) {
case 1:
y = zero;
z = zero;
break;
case 2:
z = off;
if (!su->tex.target.isArray()) {
z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C);
subOp = NV50_IR_SUBOP_SUBFM_3D;
}
break;
default:
subOp = NV50_IR_SUBOP_SUBFM_3D;
assert(dim == 3);
break;
}
insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z);
insn->subOp = subOp;
insn->setFlagsDef(1, pred);
}
 
// part 2
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR);
 
if (su->tex.target == TEX_TARGET_BUFFER) {
eau = v;
} else {
eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v);
}
// add array layer offset
if (su->tex.target.isArray()) {
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY);
if (dim == 1)
bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
else
bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau)
->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32
// combine predicates
assert(p1);
bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1);
}
 
if (atom) {
Value *lo = bf;
if (su->tex.target == TEX_TARGET_BUFFER) {
lo = zero;
bld.mkMov(off, bf);
}
// bf == g[] address & 0xff
// eau == g[] address >> 8
bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau);
bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau);
} else
if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) {
// Convert from u32 to u8 address format, which is what the library code
// doing SULDP currently uses.
// XXX: can SUEAU do this ?
// XXX: does it matter that we don't mask high bytes in bf ?
// Grrr.
bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8));
bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off);
}
 
bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau);
 
if (atom && su->tex.target == TEX_TARGET_BUFFER)
bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off);
 
// let's just set it 0 for raw access and hope it works
v = raw ?
bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT);
 
// get rid of old coordinate sources, make space for fmt info and predicate
su->moveSources(arg, 3 - arg);
// set 64 bit address and 32-bit format sources
su->setSrc(0, addr);
su->setSrc(1, v);
su->setSrc(2, pred);
}
 
void
NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
{
processSurfaceCoordsNVE4(su);
 
// Who do we hate more ? The person who decided that nvc0's SULD doesn't
// have to support conversion or the person who decided that, in OpenCL,
// you don't have to specify the format here like you do in OpenGL ?
 
if (su->op == OP_SULDP) {
// We don't patch shaders. Ever.
// You get an indirect call to our library blob here.
// But at least it's uniform.
FlowInstruction *call;
LValue *p[3];
LValue *r[5];
uint16_t base = su->tex.r * NVE4_SU_INFO__STRIDE + NVE4_SU_INFO_CALL;
 
for (int i = 0; i < 4; ++i)
(r[i] = bld.getScratch(4, FILE_GPR))->reg.data.id = i;
for (int i = 0; i < 3; ++i)
(p[i] = bld.getScratch(1, FILE_PREDICATE))->reg.data.id = i;
(r[4] = bld.getScratch(8, FILE_GPR))->reg.data.id = 4;
 
bld.mkMov(p[1], bld.mkImm((su->cache == CACHE_CA) ? 1 : 0), TYPE_U8);
bld.mkMov(p[2], bld.mkImm((su->cache == CACHE_CG) ? 1 : 0), TYPE_U8);
bld.mkMov(p[0], su->getSrc(2), TYPE_U8);
bld.mkMov(r[4], su->getSrc(0), TYPE_U64);
bld.mkMov(r[2], su->getSrc(1), TYPE_U32);
 
call = bld.mkFlow(OP_CALL, NULL, su->cc, su->getPredicate());
 
call->indirect = 1;
call->absolute = 1;
call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST,
prog->driver->io.resInfoCBSlot, TYPE_U32,
prog->driver->io.suInfoBase + base));
call->setSrc(1, r[2]);
call->setSrc(2, r[4]);
for (int i = 0; i < 3; ++i)
call->setSrc(3 + i, p[i]);
for (int i = 0; i < 4; ++i) {
call->setDef(i, r[i]);
bld.mkMov(su->getDef(i), r[i]);
}
call->setDef(4, p[1]);
delete_Instruction(bld.getProgram(), su);
}
 
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
// FIXME: for out of bounds access, destination value will be undefined !
Value *pred = su->getSrc(2);
CondCode cc = CC_NOT_P;
if (su->getPredicate()) {
pred = bld.getScratch(1, FILE_PREDICATE);
cc = su->cc;
if (cc == CC_NOT_P) {
bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2));
} else {
bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2));
pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT);
}
}
Instruction *red = bld.mkOp(OP_ATOM, su->dType, su->getDef(0));
red->subOp = su->subOp;
if (!gMemBase)
gMemBase = bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0);
red->setSrc(0, gMemBase);
red->setSrc(1, su->getSrc(3));
if (su->subOp == NV50_IR_SUBOP_ATOM_CAS)
red->setSrc(2, su->getSrc(4));
red->setIndirect(0, 0, su->getSrc(0));
red->setPredicate(cc, pred);
delete_Instruction(bld.getProgram(), su);
handleCasExch(red, true);
} else {
su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8;
}
}
 
bool
NVC0LoweringPass::handleWRSV(Instruction *i)
{
Instruction *st;
Symbol *sym;
uint32_t addr;
 
// must replace, $sreg are not writeable
addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym());
if (addr >= 0x400)
return false;
sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr);
 
st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0),
i->getSrc(1));
st->perPatch = i->perPatch;
 
bld.getBB()->remove(i);
return true;
}
 
void
NVC0LoweringPass::readTessCoord(LValue *dst, int c)
{
Value *laneid = bld.getSSA();
Value *x, *y;
 
bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0));
 
if (c == 0) {
x = dst;
y = NULL;
} else
if (c == 1) {
x = NULL;
y = dst;
} else {
assert(c == 2);
x = bld.getSSA();
y = bld.getSSA();
}
if (x)
bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid);
if (y)
bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid);
 
if (c == 2) {
bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y);
bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst);
}
}
 
bool
NVC0LoweringPass::handleRDSV(Instruction *i)
{
Symbol *sym = i->getSrc(0)->asSym();
const SVSemantic sv = sym->reg.data.sv.sv;
Value *vtx = NULL;
Instruction *ld;
uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym);
 
if (addr >= 0x400) {
// mov $sreg
if (sym->reg.data.sv.index == 3) {
// TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID
i->op = OP_MOV;
i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
}
return true;
}
 
switch (sv) {
case SV_POSITION:
assert(prog->getType() == Program::TYPE_FRAGMENT);
if (i->srcExists(1)) {
// Pass offset through to the interpolation logic
ld = bld.mkInterp(NV50_IR_INTERP_LINEAR | NV50_IR_INTERP_OFFSET,
i->getDef(0), addr, NULL);
ld->setSrc(1, i->getSrc(1));
} else {
bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
}
break;
case SV_FACE:
{
Value *face = i->getDef(0);
bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL);
if (i->dType == TYPE_F32) {
bld.mkOp2(OP_OR, TYPE_U32, face, face, bld.mkImm(0x00000001));
bld.mkOp1(OP_NEG, TYPE_S32, face, face);
bld.mkCvt(OP_CVT, TYPE_F32, face, TYPE_S32, face);
}
}
break;
case SV_TESS_COORD:
assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL);
readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index);
break;
case SV_NTID:
case SV_NCTAID:
case SV_GRIDID:
assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise
if (sym->reg.data.sv.index == 3) {
i->op = OP_MOV;
i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
return true;
}
addr += prog->driver->prop.cp.gridInfoBase;
bld.mkLoad(TYPE_U32, i->getDef(0),
bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
break;
case SV_SAMPLE_INDEX:
// TODO: Properly pass source as an address in the PIX address space
// (which can be of the form [r0+offset]). But this is currently
// unnecessary.
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
break;
case SV_SAMPLE_POS: {
Value *off = new_LValue(func, FILE_GPR);
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
bld.mkLoad(TYPE_F32,
i->getDef(0),
bld.mkSymbol(
FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
TYPE_U32, prog->driver->io.sampleInfoBase +
4 * sym->reg.data.sv.index),
off);
break;
}
case SV_SAMPLE_MASK:
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
ld = bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
ld->perPatch = i->perPatch;
break;
}
bld.getBB()->remove(i);
return true;
}
 
bool
NVC0LoweringPass::handleDIV(Instruction *i)
{
if (!isFloatType(i->dType))
return true;
bld.setPosition(i, false);
Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1));
i->op = OP_MUL;
i->setSrc(1, rcp->getDef(0));
return true;
}
 
bool
NVC0LoweringPass::handleMOD(Instruction *i)
{
if (!isFloatType(i->dType))
return true;
LValue *value = bld.getScratch(typeSizeof(i->dType));
bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1));
bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value);
bld.mkOp1(OP_TRUNC, i->dType, value, value);
bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value);
i->op = OP_SUB;
i->setSrc(1, value);
return true;
}
 
bool
NVC0LoweringPass::handleSQRT(Instruction *i)
{
Value *pred = bld.getSSA(1, FILE_PREDICATE);
Value *zero = bld.getSSA();
Instruction *rsq;
 
bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0));
if (i->dType == TYPE_F64)
zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero);
bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred);
rsq = bld.mkOp1(OP_RSQ, i->dType,
bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
rsq->setPredicate(CC_NOT_P, pred);
i->op = OP_MUL;
i->setSrc(1, rsq->getDef(0));
i->setPredicate(CC_NOT_P, pred);
 
 
return true;
}
 
bool
NVC0LoweringPass::handlePOW(Instruction *i)
{
LValue *val = bld.getScratch();
 
bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
 
i->op = OP_EX2;
i->setSrc(0, val);
i->setSrc(1, NULL);
 
return true;
}
 
bool
NVC0LoweringPass::handleEXPORT(Instruction *i)
{
if (prog->getType() == Program::TYPE_FRAGMENT) {
int id = i->getSrc(0)->reg.data.offset / 4;
 
if (i->src(0).isIndirect(0)) // TODO, ugly
return false;
i->op = OP_MOV;
i->subOp = NV50_IR_SUBOP_MOV_FINAL;
i->src(0).set(i->src(1));
i->setSrc(1, NULL);
i->setDef(0, new_LValue(func, FILE_GPR));
i->getDef(0)->reg.data.id = id;
 
prog->maxGPR = MAX2(prog->maxGPR, id);
} else
if (prog->getType() == Program::TYPE_GEOMETRY) {
i->setIndirect(0, 1, gpEmitAddress);
}
return true;
}
 
bool
NVC0LoweringPass::handleOUT(Instruction *i)
{
Instruction *prev = i->prev;
ImmediateValue stream, prevStream;
 
// Only merge if the stream ids match. Also, note that the previous
// instruction would have already been lowered, so we take arg1 from it.
if (i->op == OP_RESTART && prev && prev->op == OP_EMIT &&
i->src(0).getImmediate(stream) &&
prev->src(1).getImmediate(prevStream) &&
stream.reg.data.u32 == prevStream.reg.data.u32) {
i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
delete_Instruction(prog, i);
} else {
assert(gpEmitAddress);
i->setDef(0, gpEmitAddress);
i->setSrc(1, i->getSrc(0));
i->setSrc(0, gpEmitAddress);
}
return true;
}
 
// Generate a binary predicate if an instruction is predicated by
// e.g. an f32 value.
void
NVC0LoweringPass::checkPredicate(Instruction *insn)
{
Value *pred = insn->getPredicate();
Value *pdst;
 
if (!pred || pred->reg.file == FILE_PREDICATE)
return;
pdst = new_LValue(func, FILE_PREDICATE);
 
// CAUTION: don't use pdst->getInsn, the definition might not be unique,
// delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
 
bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, insn->dType, bld.mkImm(0), pred);
 
insn->setPredicate(insn->cc, pdst);
}
 
//
// - add quadop dance for texturing
// - put FP outputs in GPRs
// - convert instruction sequences
//
bool
NVC0LoweringPass::visit(Instruction *i)
{
bld.setPosition(i, false);
 
if (i->cc != CC_ALWAYS)
checkPredicate(i);
 
switch (i->op) {
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXF:
case OP_TXG:
return handleTEX(i->asTex());
case OP_TXD:
return handleTXD(i->asTex());
case OP_TXLQ:
return handleTXLQ(i->asTex());
case OP_TXQ:
return handleTXQ(i->asTex());
case OP_EX2:
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
i->setSrc(0, i->getDef(0));
break;
case OP_POW:
return handlePOW(i);
case OP_DIV:
return handleDIV(i);
case OP_MOD:
return handleMOD(i);
case OP_SQRT:
return handleSQRT(i);
case OP_EXPORT:
return handleEXPORT(i);
case OP_EMIT:
case OP_RESTART:
return handleOUT(i);
case OP_RDSV:
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
i->getSrc(0)->reg.fileIndex = 0;
} else
if (prog->getType() == Program::TYPE_GEOMETRY &&
i->src(0).isIndirect(0)) {
// XXX: this assumes vec4 units
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 0), bld.mkImm(4));
i->setIndirect(0, 0, ptr);
i->op = OP_VFETCH;
} else {
i->op = OP_VFETCH;
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
}
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
if (i->src(0).isIndirect(1)) {
Value *ptr;
if (i->src(0).isIndirect(0))
ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(0x1010),
i->getIndirect(0, 0));
else
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
i->getIndirect(0, 1), bld.mkImm(16));
i->setIndirect(0, 1, NULL);
i->setIndirect(0, 0, ptr);
i->subOp = NV50_IR_SUBOP_LDC_IS;
}
}
break;
case OP_ATOM:
{
const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
handleATOM(i);
handleCasExch(i, cctl);
}
break;
case OP_SULDB:
case OP_SULDP:
case OP_SUSTB:
case OP_SUSTP:
case OP_SUREDB:
case OP_SUREDP:
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
handleSurfaceOpNVE4(i->asTex());
break;
default:
break;
}
return true;
}
 
bool
TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
{
if (stage == CG_STAGE_PRE_SSA) {
NVC0LoweringPass pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_POST_RA) {
NVC0LegalizePostRA pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_SSA) {
NVC0LegalizeSSA pass;
return pass.run(prog, false, true);
}
return false;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
0,0 → 1,138
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include <tr1/unordered_set>
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
 
namespace nv50_ir {
 
class NVC0LegalizeSSA : public Pass
{
private:
virtual bool visit(BasicBlock *);
virtual bool visit(Function *);
 
// we want to insert calls to the builtin library only after optimization
void handleDIV(Instruction *); // integer division, modulus
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
void handleFTZ(Instruction *);
 
private:
BuildUtil bld;
};
 
class NVC0LegalizePostRA : public Pass
{
public:
NVC0LegalizePostRA(const Program *);
 
private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
 
void replaceZero(Instruction *);
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);
 
struct TexUse
{
TexUse(Instruction *use, const Instruction *tex)
: insn(use), tex(tex), level(-1) { }
Instruction *insn;
const Instruction *tex; // or split / mov
int level;
};
struct Limits
{
Limits() { }
Limits(int min, int max) : min(min), max(max) { }
int min, max;
};
bool insertTextureBarriers(Function *);
inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
void findFirstUses(const Instruction *tex, const Instruction *def,
std::list<TexUse>&,
std::tr1::unordered_set<const Instruction *>&);
void findOverwritingDefs(const Instruction *tex, Instruction *insn,
const BasicBlock *term,
std::list<TexUse>&);
void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
const Instruction *recurseDef(const Instruction *);
 
private:
LValue *rZero;
LValue *carry;
const bool needTexBar;
};
 
class NVC0LoweringPass : public Pass
{
public:
NVC0LoweringPass(Program *);
 
protected:
bool handleRDSV(Instruction *);
bool handleWRSV(Instruction *);
bool handleEXPORT(Instruction *);
bool handleOUT(Instruction *);
bool handleDIV(Instruction *);
bool handleMOD(Instruction *);
bool handleSQRT(Instruction *);
bool handlePOW(Instruction *);
bool handleTEX(TexInstruction *);
bool handleTXD(TexInstruction *);
bool handleTXQ(TexInstruction *);
virtual bool handleManualTXD(TexInstruction *);
bool handleTXLQ(TexInstruction *);
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
 
void checkPredicate(Instruction *);
 
private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
virtual bool visit(Instruction *);
 
void readTessCoord(LValue *dst, int c);
 
Value *loadResInfo32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);
 
void adjustCoordinatesMS(TexInstruction *);
void processSurfaceCoordsNVE4(TexInstruction *);
 
protected:
BuildUtil bld;
 
private:
const Target *const targ;
 
Symbol *gMemBase;
LValue *gpEmitAddress;
};
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
0,0 → 1,2740
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target.h"
#include "codegen/nv50_ir_build_util.h"
 
extern "C" {
#include "util/u_math.h"
}
 
namespace nv50_ir {
 
bool
Instruction::isNop() const
{
if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT)
return true;
if (terminator || join) // XXX: should terminator imply flow ?
return false;
if (op == OP_ATOM)
return false;
if (!fixed && op == OP_NOP)
return true;
 
if (defExists(0) && def(0).rep()->reg.data.id < 0) {
for (int d = 1; defExists(d); ++d)
if (def(d).rep()->reg.data.id >= 0)
WARN("part of vector result is unused !\n");
return true;
}
 
if (op == OP_MOV || op == OP_UNION) {
if (!getDef(0)->equals(getSrc(0)))
return false;
if (op == OP_UNION)
if (!def(0).rep()->equals(getSrc(1)))
return false;
return true;
}
 
return false;
}
 
bool Instruction::isDead() const
{
if (op == OP_STORE ||
op == OP_EXPORT ||
op == OP_ATOM ||
op == OP_SUSTB || op == OP_SUSTP || op == OP_SUREDP || op == OP_SUREDB ||
op == OP_WRSV)
return false;
 
for (int d = 0; defExists(d); ++d)
if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0)
return false;
 
if (terminator || asFlow())
return false;
if (fixed)
return false;
 
return true;
};
 
// =============================================================================
 
class CopyPropagation : public Pass
{
private:
virtual bool visit(BasicBlock *);
};
 
// Propagate all MOVs forward to make subsequent optimization easier, except if
// the sources stem from a phi, in which case we don't want to mess up potential
// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def.
bool
CopyPropagation::visit(BasicBlock *bb)
{
Instruction *mov, *si, *next;
 
for (mov = bb->getEntry(); mov; mov = next) {
next = mov->next;
if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue())
continue;
if (mov->getPredicate())
continue;
if (mov->def(0).getFile() != mov->src(0).getFile())
continue;
si = mov->getSrc(0)->getInsn();
if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) {
// propagate
mov->def(0).replace(mov->getSrc(0), false);
delete_Instruction(prog, mov);
}
}
return true;
}
 
// =============================================================================
 
class MergeSplits : public Pass
{
private:
virtual bool visit(BasicBlock *);
};
 
// For SPLIT / MERGE pairs that operate on the same registers, replace the
// post-merge def with the SPLIT's source.
bool
MergeSplits::visit(BasicBlock *bb)
{
Instruction *i, *next, *si;
 
for (i = bb->getEntry(); i; i = next) {
next = i->next;
if (i->op != OP_MERGE || typeSizeof(i->dType) != 8)
continue;
si = i->getSrc(0)->getInsn();
if (si->op != OP_SPLIT || si != i->getSrc(1)->getInsn())
continue;
i->def(0).replace(si->getSrc(0), false);
delete_Instruction(prog, i);
}
 
return true;
}
 
// =============================================================================
 
class LoadPropagation : public Pass
{
private:
virtual bool visit(BasicBlock *);
 
void checkSwapSrc01(Instruction *);
 
bool isCSpaceLoad(Instruction *);
bool isImmd32Load(Instruction *);
bool isAttribOrSharedLoad(Instruction *);
};
 
bool
LoadPropagation::isCSpaceLoad(Instruction *ld)
{
return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST;
}
 
bool
LoadPropagation::isImmd32Load(Instruction *ld)
{
if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4))
return false;
return ld->src(0).getFile() == FILE_IMMEDIATE;
}
 
bool
LoadPropagation::isAttribOrSharedLoad(Instruction *ld)
{
return ld &&
(ld->op == OP_VFETCH ||
(ld->op == OP_LOAD &&
(ld->src(0).getFile() == FILE_SHADER_INPUT ||
ld->src(0).getFile() == FILE_MEMORY_SHARED)));
}
 
void
LoadPropagation::checkSwapSrc01(Instruction *insn)
{
if (!prog->getTarget()->getOpInfo(insn).commutative)
if (insn->op != OP_SET && insn->op != OP_SLCT)
return;
if (insn->src(1).getFile() != FILE_GPR)
return;
 
Instruction *i0 = insn->getSrc(0)->getInsn();
Instruction *i1 = insn->getSrc(1)->getInsn();
 
if (isCSpaceLoad(i0)) {
if (!isCSpaceLoad(i1))
insn->swapSources(0, 1);
else
return;
} else
if (isImmd32Load(i0)) {
if (!isCSpaceLoad(i1) && !isImmd32Load(i1))
insn->swapSources(0, 1);
else
return;
} else
if (isAttribOrSharedLoad(i1)) {
if (!isAttribOrSharedLoad(i0))
insn->swapSources(0, 1);
else
return;
} else {
return;
}
 
if (insn->op == OP_SET || insn->op == OP_SET_AND ||
insn->op == OP_SET_OR || insn->op == OP_SET_XOR)
insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
else
if (insn->op == OP_SLCT)
insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond);
}
 
bool
LoadPropagation::visit(BasicBlock *bb)
{
const Target *targ = prog->getTarget();
Instruction *next;
 
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
 
if (i->op == OP_CALL) // calls have args as sources, they must be in regs
continue;
 
if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg
continue;
 
if (i->srcExists(1))
checkSwapSrc01(i);
 
for (int s = 0; i->srcExists(s); ++s) {
Instruction *ld = i->getSrc(s)->getInsn();
 
if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV))
continue;
if (!targ->insnCanLoad(i, s, ld))
continue;
 
// propagate !
i->setSrc(s, ld->getSrc(0));
if (ld->src(0).isIndirect(0))
i->setIndirect(s, 0, ld->getIndirect(0, 0));
 
if (ld->getDef(0)->refCount() == 0)
delete_Instruction(prog, ld);
}
}
return true;
}
 
// =============================================================================
 
// Evaluate constant expressions.
class ConstantFolding : public Pass
{
public:
bool foldAll(Program *);
 
private:
virtual bool visit(BasicBlock *);
 
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
 
void unary(Instruction *, const ImmediateValue&);
 
void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
 
// TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
CmpInstruction *findOriginForTestWithZero(Value *);
 
unsigned int foldCount;
 
BuildUtil bld;
};
 
// TODO: remember generated immediates and only revisit these
bool
ConstantFolding::foldAll(Program *prog)
{
unsigned int iterCount = 0;
do {
foldCount = 0;
if (!run(prog))
return false;
} while (foldCount && ++iterCount < 2);
return true;
}
 
bool
ConstantFolding::visit(BasicBlock *bb)
{
Instruction *i, *next;
 
for (i = bb->getEntry(); i; i = next) {
next = i->next;
if (i->op == OP_MOV || i->op == OP_CALL)
continue;
 
ImmediateValue src0, src1, src2;
 
if (i->srcExists(2) &&
i->src(0).getImmediate(src0) &&
i->src(1).getImmediate(src1) &&
i->src(2).getImmediate(src2))
expr(i, src0, src1, src2);
else
if (i->srcExists(1) &&
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
expr(i, src0, src1);
else
if (i->srcExists(0) && i->src(0).getImmediate(src0))
opnd(i, src0, 0);
else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
opnd(i, src1, 1);
}
return true;
}
 
CmpInstruction *
ConstantFolding::findOriginForTestWithZero(Value *value)
{
if (!value)
return NULL;
Instruction *insn = value->getInsn();
 
while (insn && insn->op != OP_SET) {
Instruction *next = NULL;
switch (insn->op) {
case OP_NEG:
case OP_ABS:
case OP_CVT:
next = insn->getSrc(0)->getInsn();
if (insn->sType != next->dType)
return NULL;
break;
case OP_MOV:
next = insn->getSrc(0)->getInsn();
break;
default:
return NULL;
}
insn = next;
}
return insn ? insn->asCmp() : NULL;
}
 
void
Modifier::applyTo(ImmediateValue& imm) const
{
if (!bits) // avoid failure if imm.reg.type is unhandled (e.g. b128)
return;
switch (imm.reg.type) {
case TYPE_F32:
if (bits & NV50_IR_MOD_ABS)
imm.reg.data.f32 = fabsf(imm.reg.data.f32);
if (bits & NV50_IR_MOD_NEG)
imm.reg.data.f32 = -imm.reg.data.f32;
if (bits & NV50_IR_MOD_SAT) {
if (imm.reg.data.f32 < 0.0f)
imm.reg.data.f32 = 0.0f;
else
if (imm.reg.data.f32 > 1.0f)
imm.reg.data.f32 = 1.0f;
}
assert(!(bits & NV50_IR_MOD_NOT));
break;
 
case TYPE_S8: // NOTE: will be extended
case TYPE_S16:
case TYPE_S32:
case TYPE_U8: // NOTE: treated as signed
case TYPE_U16:
case TYPE_U32:
if (bits & NV50_IR_MOD_ABS)
imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ?
imm.reg.data.s32 : -imm.reg.data.s32;
if (bits & NV50_IR_MOD_NEG)
imm.reg.data.s32 = -imm.reg.data.s32;
if (bits & NV50_IR_MOD_NOT)
imm.reg.data.s32 = ~imm.reg.data.s32;
break;
 
case TYPE_F64:
if (bits & NV50_IR_MOD_ABS)
imm.reg.data.f64 = fabs(imm.reg.data.f64);
if (bits & NV50_IR_MOD_NEG)
imm.reg.data.f64 = -imm.reg.data.f64;
if (bits & NV50_IR_MOD_SAT) {
if (imm.reg.data.f64 < 0.0)
imm.reg.data.f64 = 0.0;
else
if (imm.reg.data.f64 > 1.0)
imm.reg.data.f64 = 1.0;
}
assert(!(bits & NV50_IR_MOD_NOT));
break;
 
default:
assert(!"invalid/unhandled type");
imm.reg.data.u64 = 0;
break;
}
}
 
operation
Modifier::getOp() const
{
switch (bits) {
case NV50_IR_MOD_ABS: return OP_ABS;
case NV50_IR_MOD_NEG: return OP_NEG;
case NV50_IR_MOD_SAT: return OP_SAT;
case NV50_IR_MOD_NOT: return OP_NOT;
case 0:
return OP_MOV;
default:
return OP_CVT;
}
}
 
void
ConstantFolding::expr(Instruction *i,
ImmediateValue &imm0, ImmediateValue &imm1)
{
struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
struct Storage res;
 
memset(&res.data, 0, sizeof(res.data));
 
switch (i->op) {
case OP_MAD:
case OP_FMA:
case OP_MUL:
if (i->dnz && i->dType == TYPE_F32) {
if (!isfinite(a->data.f32))
a->data.f32 = 0.0f;
if (!isfinite(b->data.f32))
b->data.f32 = 0.0f;
}
switch (i->dType) {
case TYPE_F32:
res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor);
break;
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
case TYPE_S32:
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
break;
}
/* fallthrough */
case TYPE_U32:
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
break;
}
res.data.u32 = a->data.u32 * b->data.u32; break;
default:
return;
}
break;
case OP_DIV:
if (b->data.u32 == 0)
break;
switch (i->dType) {
case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break;
case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break;
case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break;
case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break;
default:
return;
}
break;
case OP_ADD:
switch (i->dType) {
case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break;
case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break;
case TYPE_S32:
case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break;
default:
return;
}
break;
case OP_POW:
switch (i->dType) {
case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break;
case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break;
default:
return;
}
break;
case OP_MAX:
switch (i->dType) {
case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break;
case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break;
case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break;
case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break;
default:
return;
}
break;
case OP_MIN:
switch (i->dType) {
case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break;
case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break;
case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break;
case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break;
default:
return;
}
break;
case OP_AND:
res.data.u64 = a->data.u64 & b->data.u64;
break;
case OP_OR:
res.data.u64 = a->data.u64 | b->data.u64;
break;
case OP_XOR:
res.data.u64 = a->data.u64 ^ b->data.u64;
break;
case OP_SHL:
res.data.u32 = a->data.u32 << b->data.u32;
break;
case OP_SHR:
switch (i->dType) {
case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break;
case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break;
default:
return;
}
break;
case OP_SLCT:
if (a->data.u32 != b->data.u32)
return;
res.data.u32 = a->data.u32;
break;
case OP_EXTBF: {
int offset = b->data.u32 & 0xff;
int width = (b->data.u32 >> 8) & 0xff;
int rshift = offset;
int lshift = 0;
if (width == 0) {
res.data.u32 = 0;
break;
}
if (width + offset < 32) {
rshift = 32 - width;
lshift = 32 - width - offset;
}
if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
res.data.u32 = util_bitreverse(a->data.u32);
else
res.data.u32 = a->data.u32;
switch (i->dType) {
case TYPE_S32: res.data.s32 = (res.data.s32 << lshift) >> rshift; break;
case TYPE_U32: res.data.u32 = (res.data.u32 << lshift) >> rshift; break;
default:
return;
}
break;
}
case OP_POPCNT:
res.data.u32 = util_bitcount(a->data.u32 & b->data.u32);
break;
case OP_PFETCH:
// The two arguments to pfetch are logically added together. Normally
// the second argument will not be constant, but that can happen.
res.data.u32 = a->data.u32 + b->data.u32;
break;
default:
return;
}
++foldCount;
 
i->src(0).mod = Modifier(0);
i->src(1).mod = Modifier(0);
i->postFactor = 0;
 
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
i->setSrc(1, NULL);
 
i->getSrc(0)->reg.data = res.data;
 
switch (i->op) {
case OP_MAD:
case OP_FMA: {
i->op = OP_ADD;
 
i->setSrc(1, i->getSrc(0));
i->src(1).mod = i->src(2).mod;
i->setSrc(0, i->getSrc(2));
i->setSrc(2, NULL);
 
ImmediateValue src0;
if (i->src(0).getImmediate(src0))
expr(i, src0, *i->getSrc(1)->asImm());
if (i->saturate && !prog->getTarget()->isSatSupported(i)) {
bld.setPosition(i, false);
i->setSrc(1, bld.loadImm(NULL, res.data.u32));
}
break;
}
case OP_PFETCH:
// Leave PFETCH alone... we just folded its 2 args into 1.
break;
default:
i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
break;
}
i->subOp = 0;
}
 
void
ConstantFolding::expr(Instruction *i,
ImmediateValue &imm0,
ImmediateValue &imm1,
ImmediateValue &imm2)
{
struct Storage *const a = &imm0.reg, *const b = &imm1.reg, *const c = &imm2.reg;
struct Storage res;
 
memset(&res.data, 0, sizeof(res.data));
 
switch (i->op) {
case OP_INSBF: {
int offset = b->data.u32 & 0xff;
int width = (b->data.u32 >> 8) & 0xff;
unsigned bitmask = ((1 << width) - 1) << offset;
res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask);
break;
}
default:
return;
}
 
++foldCount;
i->src(0).mod = Modifier(0);
i->src(1).mod = Modifier(0);
i->src(2).mod = Modifier(0);
 
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
i->setSrc(1, NULL);
i->setSrc(2, NULL);
 
i->getSrc(0)->reg.data = res.data;
 
i->op = OP_MOV;
}
 
void
ConstantFolding::unary(Instruction *i, const ImmediateValue &imm)
{
Storage res;
 
if (i->dType != TYPE_F32)
return;
switch (i->op) {
case OP_NEG: res.data.f32 = -imm.reg.data.f32; break;
case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break;
case OP_SAT: res.data.f32 = CLAMP(imm.reg.data.f32, 0.0f, 1.0f); break;
case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break;
case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break;
case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break;
case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break;
case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break;
case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break;
case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break;
case OP_PRESIN:
case OP_PREEX2:
// these should be handled in subsequent OP_SIN/COS/EX2
res.data.f32 = imm.reg.data.f32;
break;
default:
return;
}
i->op = OP_MOV;
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32));
i->src(0).mod = Modifier(0);
}
 
void
ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
const int s, ImmediateValue& imm2)
{
const int t = s ? 0 : 1;
Instruction *insn;
Instruction *mul1 = NULL; // mul1 before mul2
int e = 0;
float f = imm2.reg.data.f32 * exp2f(mul2->postFactor);
ImmediateValue imm1;
 
assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
 
if (mul2->getSrc(t)->refCount() == 1) {
insn = mul2->getSrc(t)->getInsn();
if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32)
mul1 = insn;
if (mul1 && !mul1->saturate) {
int s1;
 
if (mul1->src(s1 = 0).getImmediate(imm1) ||
mul1->src(s1 = 1).getImmediate(imm1)) {
bld.setPosition(mul1, false);
// a = mul r, imm1
// d = mul a, imm2 -> d = mul r, (imm1 * imm2)
mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
mul1->src(s1).mod = Modifier(0);
mul2->def(0).replace(mul1->getDef(0), false);
mul1->saturate = mul2->saturate;
} else
if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
// c = mul a, b
// d = mul c, imm -> d = mul_x_imm a, b
mul1->postFactor = e;
mul2->def(0).replace(mul1->getDef(0), false);
if (f < 0)
mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG);
mul1->saturate = mul2->saturate;
}
return;
}
}
if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) {
// b = mul a, imm
// d = mul b, c -> d = mul_x_imm a, c
int s2, t2;
insn = (*mul2->getDef(0)->uses.begin())->getInsn();
if (!insn)
return;
mul1 = mul2;
mul2 = NULL;
s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1;
t2 = s2 ? 0 : 1;
if (insn->op == OP_MUL && insn->dType == TYPE_F32)
if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1))
mul2 = insn;
if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
mul2->postFactor = e;
mul2->setSrc(s2, mul1->src(t));
if (f < 0)
mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG);
}
}
}
 
void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
 
switch (i->op) {
case OP_MUL:
if (i->dType == TYPE_F32)
tryCollapseChainedMULs(i, s, imm0);
 
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
assert(!isFloatType(i->sType));
if (imm0.isInteger(1) && i->dType == TYPE_S32) {
bld.setPosition(i, false);
// Need to set to the sign value, which is a compare.
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
TYPE_S32, i->getSrc(t), bld.mkImm(0));
delete_Instruction(prog, i);
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
// The high bits can't be set in this case (either mul by 0 or
// unsigned by 1)
i->op = OP_MOV;
i->subOp = 0;
i->setSrc(0, new_ImmediateValue(prog, 0u));
i->src(0).mod = Modifier(0);
i->setSrc(1, NULL);
} else if (!imm0.isNegative() && imm0.isPow2()) {
// Translate into a shift
imm0.applyLog2();
i->op = OP_SHR;
i->subOp = 0;
imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
i->setSrc(0, i->getSrc(t));
i->src(0).mod = i->src(t).mod;
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
i->src(1).mod = 0;
}
} else
if (imm0.isInteger(0)) {
i->op = OP_MOV;
i->setSrc(0, new_ImmediateValue(prog, 0u));
i->src(0).mod = Modifier(0);
i->postFactor = 0;
i->setSrc(1, NULL);
} else
if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) {
if (imm0.isNegative())
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
i->op = i->src(t).mod.getOp();
if (s == 0) {
i->setSrc(0, i->getSrc(1));
i->src(0).mod = i->src(1).mod;
i->src(1).mod = 0;
}
if (i->op != OP_CVT)
i->src(0).mod = 0;
i->setSrc(1, NULL);
} else
if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) {
if (imm0.isNegative())
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
i->op = OP_ADD;
i->setSrc(s, i->getSrc(t));
i->src(s).mod = i->src(t).mod;
} else
if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) {
i->op = OP_SHL;
imm0.applyLog2();
i->setSrc(0, i->getSrc(t));
i->src(0).mod = i->src(t).mod;
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
i->src(1).mod = 0;
}
break;
case OP_MAD:
if (imm0.isInteger(0)) {
i->setSrc(0, i->getSrc(2));
i->src(0).mod = i->src(2).mod;
i->setSrc(1, NULL);
i->setSrc(2, NULL);
i->op = i->src(0).mod.getOp();
if (i->op != OP_CVT)
i->src(0).mod = 0;
} else
if (imm0.isInteger(1) || imm0.isInteger(-1)) {
if (imm0.isNegative())
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
if (s == 0) {
i->setSrc(0, i->getSrc(1));
i->src(0).mod = i->src(1).mod;
}
i->setSrc(1, i->getSrc(2));
i->src(1).mod = i->src(2).mod;
i->setSrc(2, NULL);
i->op = OP_ADD;
}
break;
case OP_ADD:
if (i->usesFlags())
break;
if (imm0.isInteger(0)) {
if (s == 0) {
i->setSrc(0, i->getSrc(1));
i->src(0).mod = i->src(1).mod;
}
i->setSrc(1, NULL);
i->op = i->src(0).mod.getOp();
if (i->op != OP_CVT)
i->src(0).mod = Modifier(0);
}
break;
 
case OP_DIV:
if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32))
break;
bld.setPosition(i, false);
if (imm0.reg.data.u32 == 0) {
break;
} else
if (imm0.reg.data.u32 == 1) {
i->op = OP_MOV;
i->setSrc(1, NULL);
} else
if (i->dType == TYPE_U32 && imm0.isPow2()) {
i->op = OP_SHR;
i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32)));
} else
if (i->dType == TYPE_U32) {
Instruction *mul;
Value *tA, *tB;
const uint32_t d = imm0.reg.data.u32;
uint32_t m;
int r, s;
uint32_t l = util_logbase2(d);
if (((uint32_t)1 << l) < d)
++l;
m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1;
r = l ? 1 : 0;
s = l ? (l - 1) : 0;
 
tA = bld.getSSA();
tB = bld.getSSA();
mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0),
bld.loadImm(NULL, m));
mul->subOp = NV50_IR_SUBOP_MUL_HIGH;
bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA);
tA = bld.getSSA();
if (r)
bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r));
else
tA = tB;
tB = s ? bld.getSSA() : i->getDef(0);
newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
if (s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
 
delete_Instruction(prog, i);
} else
if (imm0.reg.data.s32 == -1) {
i->op = OP_NEG;
i->setSrc(1, NULL);
} else {
LValue *tA, *tB;
LValue *tD;
const int32_t d = imm0.reg.data.s32;
int32_t m;
int32_t l = util_logbase2(static_cast<unsigned>(abs(d)));
if ((1 << l) < abs(d))
++l;
if (!l)
l = 1;
m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32);
 
tA = bld.getSSA();
tB = bld.getSSA();
bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m),
i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH;
if (l > 1)
bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1));
else
tB = tA;
tA = bld.getSSA();
bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
if (d < 0)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
 
delete_Instruction(prog, i);
}
break;
 
case OP_MOD:
if (i->sType == TYPE_U32 && imm0.isPow2()) {
bld.setPosition(i, false);
i->op = OP_AND;
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
}
break;
 
case OP_SET: // TODO: SET_AND,OR,XOR
{
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
if (i->src(t).mod != Modifier(0))
return;
if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
return;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
if (s == 0)
ccZ = reverseCondCode(ccZ);
switch (ccZ) {
case CC_LT: cc = CC_FL; break;
case CC_GE: cc = CC_TR; break;
case CC_EQ: cc = inverseCondCode(cc); break;
case CC_LE: cc = inverseCondCode(cc); break;
case CC_GT: break;
case CC_NE: break;
default:
return;
}
i->asCmp()->setCond = cc;
i->setSrc(0, si->src(0));
i->setSrc(1, si->src(1));
i->sType = si->sType;
}
break;
 
case OP_SHL:
{
if (s != 1 || i->src(0).mod != Modifier(0))
break;
// try to concatenate shifts
Instruction *si = i->getSrc(0)->getInsn();
if (!si || si->op != OP_SHL)
break;
ImmediateValue imm1;
if (si->src(1).getImmediate(imm1)) {
bld.setPosition(i, false);
i->setSrc(0, si->getSrc(0));
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
}
}
break;
 
case OP_ABS:
case OP_NEG:
case OP_SAT:
case OP_LG2:
case OP_RCP:
case OP_SQRT:
case OP_RSQ:
case OP_PRESIN:
case OP_SIN:
case OP_COS:
case OP_PREEX2:
case OP_EX2:
unary(i, imm0);
break;
case OP_BFIND: {
int32_t res;
switch (i->dType) {
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
default:
return;
}
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
res = 31 - res;
bld.setPosition(i, false); /* make sure bld is init'ed */
i->setSrc(0, bld.mkImm(res));
i->setSrc(1, NULL);
i->op = OP_MOV;
i->subOp = 0;
break;
}
case OP_POPCNT: {
// Only deal with 1-arg POPCNT here
if (i->srcExists(1))
break;
uint32_t res = util_bitcount(imm0.reg.data.u32);
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
i->setSrc(1, NULL);
i->op = OP_MOV;
break;
}
default:
return;
}
if (newi->op != op)
foldCount++;
}
 
// =============================================================================
 
// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed.
class ModifierFolding : public Pass
{
private:
virtual bool visit(BasicBlock *);
};
 
bool
ModifierFolding::visit(BasicBlock *bb)
{
const Target *target = prog->getTarget();
 
Instruction *i, *next, *mi;
Modifier mod;
 
for (i = bb->getEntry(); i; i = next) {
next = i->next;
 
if (0 && i->op == OP_SUB) {
// turn "sub" into "add neg" (do we really want this ?)
i->op = OP_ADD;
i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
}
 
for (int s = 0; s < 3 && i->srcExists(s); ++s) {
mi = i->getSrc(s)->getInsn();
if (!mi ||
mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8)
continue;
if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) {
if ((i->op != OP_ADD &&
i->op != OP_MUL) ||
(mi->op != OP_ABS &&
mi->op != OP_NEG))
continue;
} else
if (i->sType != mi->dType) {
continue;
}
if ((mod = Modifier(mi->op)) == Modifier(0))
continue;
mod *= mi->src(0).mod;
 
if ((i->op == OP_ABS) || i->src(s).mod.abs()) {
// abs neg [abs] = abs
mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS));
} else
if ((i->op == OP_NEG) && mod.neg()) {
assert(s == 0);
// neg as both opcode and modifier on same insn is prohibited
// neg neg abs = abs, neg neg = identity
mod = mod & Modifier(~NV50_IR_MOD_NEG);
i->op = mod.getOp();
mod = mod & Modifier(~NV50_IR_MOD_ABS);
if (mod == Modifier(0))
i->op = OP_MOV;
}
 
if (target->isModSupported(i, s, mod)) {
i->setSrc(s, mi->getSrc(0));
i->src(s).mod *= mod;
}
}
 
if (i->op == OP_SAT) {
mi = i->getSrc(0)->getInsn();
if (mi &&
mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) {
mi->saturate = 1;
mi->setDef(0, i->getDef(0));
delete_Instruction(prog, i);
}
}
}
 
return true;
}
 
// =============================================================================
 
// MUL + ADD -> MAD/FMA
// MIN/MAX(a, a) -> a, etc.
// SLCT(a, b, const) -> cc(const) ? a : b
// RCP(RCP(a)) -> a
// MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
class AlgebraicOpt : public Pass
{
private:
virtual bool visit(BasicBlock *);
 
void handleABS(Instruction *);
bool handleADD(Instruction *);
bool tryADDToMADOrSAD(Instruction *, operation toOp);
void handleMINMAX(Instruction *);
void handleRCP(Instruction *);
void handleSLCT(Instruction *);
void handleLOGOP(Instruction *);
void handleCVT(Instruction *);
void handleSUCLAMP(Instruction *);
 
BuildUtil bld;
};
 
void
AlgebraicOpt::handleABS(Instruction *abs)
{
Instruction *sub = abs->getSrc(0)->getInsn();
DataType ty;
if (!sub ||
!prog->getTarget()->isOpSupported(OP_SAD, abs->dType))
return;
// expect not to have mods yet, if we do, bail
if (sub->src(0).mod || sub->src(1).mod)
return;
// hidden conversion ?
ty = intTypeToSigned(sub->dType);
if (abs->dType != abs->sType || ty != abs->sType)
return;
 
if ((sub->op != OP_ADD && sub->op != OP_SUB) ||
sub->src(0).getFile() != FILE_GPR || sub->src(0).mod ||
sub->src(1).getFile() != FILE_GPR || sub->src(1).mod)
return;
 
Value *src0 = sub->getSrc(0);
Value *src1 = sub->getSrc(1);
 
if (sub->op == OP_ADD) {
Instruction *neg = sub->getSrc(1)->getInsn();
if (neg && neg->op != OP_NEG) {
neg = sub->getSrc(0)->getInsn();
src0 = sub->getSrc(1);
}
if (!neg || neg->op != OP_NEG ||
neg->dType != neg->sType || neg->sType != ty)
return;
src1 = neg->getSrc(0);
}
 
// found ABS(SUB))
abs->moveSources(1, 2); // move sources >=1 up by 2
abs->op = OP_SAD;
abs->setType(sub->dType);
abs->setSrc(0, src0);
abs->setSrc(1, src1);
bld.setPosition(abs, false);
abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0));
}
 
bool
AlgebraicOpt::handleADD(Instruction *add)
{
Value *src0 = add->getSrc(0);
Value *src1 = add->getSrc(1);
 
if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
return false;
 
bool changed = false;
if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType))
changed = tryADDToMADOrSAD(add, OP_MAD);
if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
changed = tryADDToMADOrSAD(add, OP_SAD);
return changed;
}
 
// ADD(SAD(a,b,0), c) -> SAD(a,b,c)
// ADD(MUL(a,b), c) -> MAD(a,b,c)
bool
AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
{
Value *src0 = add->getSrc(0);
Value *src1 = add->getSrc(1);
Value *src;
int s;
const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL;
const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0));
Modifier mod[4];
 
if (src0->refCount() == 1 &&
src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp)
s = 0;
else
if (src1->refCount() == 1 &&
src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp)
s = 1;
else
return false;
 
if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) ||
(src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb))
return false;
 
src = add->getSrc(s);
 
if (src->getInsn()->postFactor)
return false;
if (toOp == OP_SAD) {
ImmediateValue imm;
if (!src->getInsn()->src(2).getImmediate(imm))
return false;
if (!imm.isInteger(0))
return false;
}
 
mod[0] = add->src(0).mod;
mod[1] = add->src(1).mod;
mod[2] = src->getUniqueInsn()->src(0).mod;
mod[3] = src->getUniqueInsn()->src(1).mod;
 
if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
return false;
 
add->op = toOp;
add->subOp = src->getInsn()->subOp; // potentially mul-high
 
add->setSrc(2, add->src(s ? 0 : 1));
 
add->setSrc(0, src->getInsn()->getSrc(0));
add->src(0).mod = mod[2] ^ mod[s];
add->setSrc(1, src->getInsn()->getSrc(1));
add->src(1).mod = mod[3];
 
return true;
}
 
void
AlgebraicOpt::handleMINMAX(Instruction *minmax)
{
Value *src0 = minmax->getSrc(0);
Value *src1 = minmax->getSrc(1);
 
if (src0 != src1 || src0->reg.file != FILE_GPR)
return;
if (minmax->src(0).mod == minmax->src(1).mod) {
if (minmax->def(0).mayReplace(minmax->src(0))) {
minmax->def(0).replace(minmax->src(0), false);
minmax->bb->remove(minmax);
} else {
minmax->op = OP_CVT;
minmax->setSrc(1, NULL);
}
} else {
// TODO:
// min(x, -x) = -abs(x)
// min(x, -abs(x)) = -abs(x)
// min(x, abs(x)) = x
// max(x, -abs(x)) = x
// max(x, abs(x)) = abs(x)
// max(x, -x) = abs(x)
}
}
 
void
AlgebraicOpt::handleRCP(Instruction *rcp)
{
Instruction *si = rcp->getSrc(0)->getUniqueInsn();
 
if (si && si->op == OP_RCP) {
Modifier mod = rcp->src(0).mod * si->src(0).mod;
rcp->op = mod.getOp();
rcp->setSrc(0, si->getSrc(0));
}
}
 
void
AlgebraicOpt::handleSLCT(Instruction *slct)
{
if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) {
if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f))
slct->setSrc(0, slct->getSrc(1));
} else
if (slct->getSrc(0) != slct->getSrc(1)) {
return;
}
slct->op = OP_MOV;
slct->setSrc(1, NULL);
slct->setSrc(2, NULL);
}
 
void
AlgebraicOpt::handleLOGOP(Instruction *logop)
{
Value *src0 = logop->getSrc(0);
Value *src1 = logop->getSrc(1);
 
if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
return;
 
if (src0 == src1) {
if ((logop->op == OP_AND || logop->op == OP_OR) &&
logop->def(0).mayReplace(logop->src(0))) {
logop->def(0).replace(logop->src(0), false);
delete_Instruction(prog, logop);
}
} else {
// try AND(SET, SET) -> SET_AND(SET)
Instruction *set0 = src0->getInsn();
Instruction *set1 = src1->getInsn();
 
if (!set0 || set0->fixed || !set1 || set1->fixed)
return;
if (set1->op != OP_SET) {
Instruction *xchg = set0;
set0 = set1;
set1 = xchg;
if (set1->op != OP_SET)
return;
}
operation redOp = (logop->op == OP_AND ? OP_SET_AND :
logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR);
if (!prog->getTarget()->isOpSupported(redOp, set1->sType))
return;
if (set0->op != OP_SET &&
set0->op != OP_SET_AND &&
set0->op != OP_SET_OR &&
set0->op != OP_SET_XOR)
return;
if (set0->getDef(0)->refCount() > 1 &&
set1->getDef(0)->refCount() > 1)
return;
if (set0->getPredicate() || set1->getPredicate())
return;
// check that they don't source each other
for (int s = 0; s < 2; ++s)
if (set0->getSrc(s) == set1->getDef(0) ||
set1->getSrc(s) == set0->getDef(0))
return;
 
set0 = cloneForward(func, set0);
set1 = cloneShallow(func, set1);
logop->bb->insertAfter(logop, set1);
logop->bb->insertAfter(logop, set0);
 
set0->dType = TYPE_U8;
set0->getDef(0)->reg.file = FILE_PREDICATE;
set0->getDef(0)->reg.size = 1;
set1->setSrc(2, set0->getDef(0));
set1->op = redOp;
set1->setDef(0, logop->getDef(0));
delete_Instruction(prog, logop);
}
}
 
// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0
// nv50:
// F2I(NEG(I2F(ABS(SET))))
void
AlgebraicOpt::handleCVT(Instruction *cvt)
{
if (cvt->sType != TYPE_F32 ||
cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0))
return;
Instruction *insn = cvt->getSrc(0)->getInsn();
if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
return;
if (insn->src(0).mod != Modifier(0))
return;
insn = insn->getSrc(0)->getInsn();
 
// check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET
if (insn && insn->op == OP_CVT &&
insn->dType == TYPE_F32 &&
insn->sType == TYPE_S32) {
insn = insn->getSrc(0)->getInsn();
if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 ||
insn->src(0).mod)
return;
insn = insn->getSrc(0)->getInsn();
if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32)
return;
} else
if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) {
return;
}
 
Instruction *bset = cloneShallow(func, insn);
bset->dType = TYPE_U32;
bset->setDef(0, cvt->getDef(0));
cvt->bb->insertAfter(cvt, bset);
delete_Instruction(prog, cvt);
}
 
// SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6)
void
AlgebraicOpt::handleSUCLAMP(Instruction *insn)
{
ImmediateValue imm;
int32_t val = insn->getSrc(2)->asImm()->reg.data.s32;
int s;
Instruction *add;
 
assert(insn->srcExists(0) && insn->src(0).getFile() == FILE_GPR);
 
// look for ADD (TODO: only count references by non-SUCLAMP)
if (insn->getSrc(0)->refCount() > 1)
return;
add = insn->getSrc(0)->getInsn();
if (!add || add->op != OP_ADD ||
(add->dType != TYPE_U32 &&
add->dType != TYPE_S32))
return;
 
// look for immediate
for (s = 0; s < 2; ++s)
if (add->src(s).getImmediate(imm))
break;
if (s >= 2)
return;
s = s ? 0 : 1;
// determine if immediate fits
val += imm.reg.data.s32;
if (val > 31 || val < -32)
return;
// determine if other addend fits
if (add->src(s).getFile() != FILE_GPR || add->src(s).mod != Modifier(0))
return;
 
bld.setPosition(insn, false); // make sure bld is init'ed
// replace sources
insn->setSrc(2, bld.mkImm(val));
insn->setSrc(0, add->getSrc(s));
}
 
bool
AlgebraicOpt::visit(BasicBlock *bb)
{
Instruction *next;
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
switch (i->op) {
case OP_ABS:
handleABS(i);
break;
case OP_ADD:
handleADD(i);
break;
case OP_RCP:
handleRCP(i);
break;
case OP_MIN:
case OP_MAX:
handleMINMAX(i);
break;
case OP_SLCT:
handleSLCT(i);
break;
case OP_AND:
case OP_OR:
case OP_XOR:
handleLOGOP(i);
break;
case OP_CVT:
handleCVT(i);
break;
case OP_SUCLAMP:
handleSUCLAMP(i);
break;
default:
break;
}
}
 
return true;
}
 
// =============================================================================
 
static inline void
updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
{
if (offset != ldst->getSrc(0)->reg.data.offset) {
if (ldst->getSrc(0)->refCount() > 1)
ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0)));
ldst->getSrc(0)->reg.data.offset = offset;
}
}
 
// Combine loads and stores, forward stores to loads where possible.
class MemoryOpt : public Pass
{
private:
class Record
{
public:
Record *next;
Instruction *insn;
const Value *rel[2];
const Value *base;
int32_t offset;
int8_t fileIndex;
uint8_t size;
bool locked;
Record *prev;
 
bool overlaps(const Instruction *ldst) const;
 
inline void link(Record **);
inline void unlink(Record **);
inline void set(const Instruction *ldst);
};
 
public:
MemoryOpt();
 
Record *loads[DATA_FILE_COUNT];
Record *stores[DATA_FILE_COUNT];
 
MemoryPool recordPool;
 
private:
virtual bool visit(BasicBlock *);
bool runOpt(BasicBlock *);
 
Record **getList(const Instruction *);
 
Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const;
 
// merge @insn into load/store instruction from @rec
bool combineLd(Record *rec, Instruction *ld);
bool combineSt(Record *rec, Instruction *st);
 
bool replaceLdFromLd(Instruction *ld, Record *ldRec);
bool replaceLdFromSt(Instruction *ld, Record *stRec);
bool replaceStFromSt(Instruction *restrict st, Record *stRec);
 
void addRecord(Instruction *ldst);
void purgeRecords(Instruction *const st, DataFile);
void lockStores(Instruction *const ld);
void reset();
 
private:
Record *prevRecord;
};
 
MemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6)
{
for (int i = 0; i < DATA_FILE_COUNT; ++i) {
loads[i] = NULL;
stores[i] = NULL;
}
prevRecord = NULL;
}
 
void
MemoryOpt::reset()
{
for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) {
Record *it, *next;
for (it = loads[i]; it; it = next) {
next = it->next;
recordPool.release(it);
}
loads[i] = NULL;
for (it = stores[i]; it; it = next) {
next = it->next;
recordPool.release(it);
}
stores[i] = NULL;
}
}
 
bool
MemoryOpt::combineLd(Record *rec, Instruction *ld)
{
int32_t offRc = rec->offset;
int32_t offLd = ld->getSrc(0)->reg.data.offset;
int sizeRc = rec->size;
int sizeLd = typeSizeof(ld->dType);
int size = sizeRc + sizeLd;
int d, j;
 
if (!prog->getTarget()->
isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size)))
return false;
// no unaligned loads
if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) ||
((size == 0xc) && (MIN2(offLd, offRc) & 0xf)))
return false;
 
assert(sizeRc + sizeLd <= 16 && offRc != offLd);
 
for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j);
 
if (offLd < offRc) {
int sz;
for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d);
// d: nr of definitions in ld
// j: nr of definitions in rec->insn, move:
for (d = d + j - 1; j > 0; --j, --d)
rec->insn->setDef(d, rec->insn->getDef(j - 1));
 
if (rec->insn->getSrc(0)->refCount() > 1)
rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0)));
rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd;
 
d = 0;
} else {
d = j;
}
// move definitions of @ld to @rec->insn
for (j = 0; sizeLd; ++j, ++d) {
sizeLd -= ld->getDef(j)->reg.size;
rec->insn->setDef(d, ld->getDef(j));
}
 
rec->size = size;
rec->insn->getSrc(0)->reg.size = size;
rec->insn->setType(typeOfSize(size));
 
delete_Instruction(prog, ld);
 
return true;
}
 
bool
MemoryOpt::combineSt(Record *rec, Instruction *st)
{
int32_t offRc = rec->offset;
int32_t offSt = st->getSrc(0)->reg.data.offset;
int sizeRc = rec->size;
int sizeSt = typeSizeof(st->dType);
int s = sizeSt / 4;
int size = sizeRc + sizeSt;
int j, k;
Value *src[4]; // no modifiers in ValueRef allowed for st
Value *extra[3];
 
if (!prog->getTarget()->
isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size)))
return false;
if (size == 8 && MIN2(offRc, offSt) & 0x7)
return false;
 
st->takeExtraSources(0, extra); // save predicate and indirect address
 
if (offRc < offSt) {
// save values from @st
for (s = 0; sizeSt; ++s) {
sizeSt -= st->getSrc(s + 1)->reg.size;
src[s] = st->getSrc(s + 1);
}
// set record's values as low sources of @st
for (j = 1; sizeRc; ++j) {
sizeRc -= rec->insn->getSrc(j)->reg.size;
st->setSrc(j, rec->insn->getSrc(j));
}
// set saved values as high sources of @st
for (k = j, j = 0; j < s; ++j)
st->setSrc(k++, src[j]);
 
updateLdStOffset(st, offRc, func);
} else {
for (j = 1; sizeSt; ++j)
sizeSt -= st->getSrc(j)->reg.size;
for (s = 1; sizeRc; ++j, ++s) {
sizeRc -= rec->insn->getSrc(s)->reg.size;
st->setSrc(j, rec->insn->getSrc(s));
}
rec->offset = offSt;
}
st->putExtraSources(0, extra); // restore pointer and predicate
 
delete_Instruction(prog, rec->insn);
rec->insn = st;
rec->size = size;
rec->insn->getSrc(0)->reg.size = size;
rec->insn->setType(typeOfSize(size));
return true;
}
 
void
MemoryOpt::Record::set(const Instruction *ldst)
{
const Symbol *mem = ldst->getSrc(0)->asSym();
fileIndex = mem->reg.fileIndex;
rel[0] = ldst->getIndirect(0, 0);
rel[1] = ldst->getIndirect(0, 1);
offset = mem->reg.data.offset;
base = mem->getBase();
size = typeSizeof(ldst->sType);
}
 
void
MemoryOpt::Record::link(Record **list)
{
next = *list;
if (next)
next->prev = this;
prev = NULL;
*list = this;
}
 
void
MemoryOpt::Record::unlink(Record **list)
{
if (next)
next->prev = prev;
if (prev)
prev->next = next;
else
*list = next;
}
 
MemoryOpt::Record **
MemoryOpt::getList(const Instruction *insn)
{
if (insn->op == OP_LOAD || insn->op == OP_VFETCH)
return &loads[insn->src(0).getFile()];
return &stores[insn->src(0).getFile()];
}
 
void
MemoryOpt::addRecord(Instruction *i)
{
Record **list = getList(i);
Record *it = reinterpret_cast<Record *>(recordPool.allocate());
 
it->link(list);
it->set(i);
it->insn = i;
it->locked = false;
}
 
MemoryOpt::Record *
MemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const
{
const Symbol *sym = insn->getSrc(0)->asSym();
const int size = typeSizeof(insn->sType);
Record *rec = NULL;
Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file];
 
for (; it; it = it->next) {
if (it->locked && insn->op != OP_LOAD)
continue;
if ((it->offset >> 4) != (sym->reg.data.offset >> 4) ||
it->rel[0] != insn->getIndirect(0, 0) ||
it->fileIndex != sym->reg.fileIndex ||
it->rel[1] != insn->getIndirect(0, 1))
continue;
 
if (it->offset < sym->reg.data.offset) {
if (it->offset + it->size >= sym->reg.data.offset) {
isAdj = (it->offset + it->size == sym->reg.data.offset);
if (!isAdj)
return it;
if (!(it->offset & 0x7))
rec = it;
}
} else {
isAdj = it->offset != sym->reg.data.offset;
if (size <= it->size && !isAdj)
return it;
else
if (!(sym->reg.data.offset & 0x7))
if (it->offset - size <= sym->reg.data.offset)
rec = it;
}
}
return rec;
}
 
bool
MemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec)
{
Instruction *st = rec->insn;
int32_t offSt = rec->offset;
int32_t offLd = ld->getSrc(0)->reg.data.offset;
int d, s;
 
for (s = 1; offSt != offLd && st->srcExists(s); ++s)
offSt += st->getSrc(s)->reg.size;
if (offSt != offLd)
return false;
 
for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) {
if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size)
return false;
if (st->getSrc(s)->reg.file != FILE_GPR)
return false;
ld->def(d).replace(st->src(s), false);
}
ld->bb->remove(ld);
return true;
}
 
bool
MemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec)
{
Instruction *ldR = rec->insn;
int32_t offR = rec->offset;
int32_t offE = ldE->getSrc(0)->reg.data.offset;
int dR, dE;
 
assert(offR <= offE);
for (dR = 0; offR < offE && ldR->defExists(dR); ++dR)
offR += ldR->getDef(dR)->reg.size;
if (offR != offE)
return false;
 
for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) {
if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size)
return false;
ldE->def(dE).replace(ldR->getDef(dR), false);
}
 
delete_Instruction(prog, ldE);
return true;
}
 
bool
MemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec)
{
const Instruction *const ri = rec->insn;
Value *extra[3];
 
int32_t offS = st->getSrc(0)->reg.data.offset;
int32_t offR = rec->offset;
int32_t endS = offS + typeSizeof(st->dType);
int32_t endR = offR + typeSizeof(ri->dType);
 
rec->size = MAX2(endS, endR) - MIN2(offS, offR);
 
st->takeExtraSources(0, extra);
 
if (offR < offS) {
Value *vals[10];
int s, n;
int k = 0;
// get non-replaced sources of ri
for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s)
vals[k++] = ri->getSrc(s);
n = s;
// get replaced sources of st
for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s)
vals[k++] = st->getSrc(s);
// skip replaced sources of ri
for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s);
// get non-replaced sources after values covered by st
for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s)
vals[k++] = ri->getSrc(s);
assert((unsigned int)k <= Elements(vals));
for (s = 0; s < k; ++s)
st->setSrc(s + 1, vals[s]);
st->setSrc(0, ri->getSrc(0));
} else
if (endR > endS) {
int j, s;
for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size);
for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size);
for (; offR < endR; offR += ri->getSrc(j++)->reg.size)
st->setSrc(s++, ri->getSrc(j));
}
st->putExtraSources(0, extra);
 
delete_Instruction(prog, rec->insn);
 
rec->insn = st;
rec->offset = st->getSrc(0)->reg.data.offset;
 
st->setType(typeOfSize(rec->size));
 
return true;
}
 
bool
MemoryOpt::Record::overlaps(const Instruction *ldst) const
{
Record that;
that.set(ldst);
 
if (this->fileIndex != that.fileIndex)
return false;
 
if (this->rel[0] || that.rel[0])
return this->base == that.base;
return
(this->offset < that.offset + that.size) &&
(this->offset + this->size > that.offset);
}
 
// We must not eliminate stores that affect the result of @ld if
// we find later stores to the same location, and we may no longer
// merge them with later stores.
// The stored value can, however, still be used to determine the value
// returned by future loads.
void
MemoryOpt::lockStores(Instruction *const ld)
{
for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next)
if (!r->locked && r->overlaps(ld))
r->locked = true;
}
 
// Prior loads from the location of @st are no longer valid.
// Stores to the location of @st may no longer be used to derive
// the value at it nor be coalesced into later stores.
void
MemoryOpt::purgeRecords(Instruction *const st, DataFile f)
{
if (st)
f = st->src(0).getFile();
 
for (Record *r = loads[f]; r; r = r->next)
if (!st || r->overlaps(st))
r->unlink(&loads[f]);
 
for (Record *r = stores[f]; r; r = r->next)
if (!st || r->overlaps(st))
r->unlink(&stores[f]);
}
 
bool
MemoryOpt::visit(BasicBlock *bb)
{
bool ret = runOpt(bb);
// Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st
// where 96 bit memory operations are forbidden.
if (ret)
ret = runOpt(bb);
return ret;
}
 
bool
MemoryOpt::runOpt(BasicBlock *bb)
{
Instruction *ldst, *next;
Record *rec;
bool isAdjacent = true;
 
for (ldst = bb->getEntry(); ldst; ldst = next) {
bool keep = true;
bool isLoad = true;
next = ldst->next;
 
if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) {
if (ldst->isDead()) {
// might have been produced by earlier optimization
delete_Instruction(prog, ldst);
continue;
}
} else
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
isLoad = false;
} else {
// TODO: maybe have all fixed ops act as barrier ?
if (ldst->op == OP_CALL ||
ldst->op == OP_BAR ||
ldst->op == OP_MEMBAR) {
purgeRecords(NULL, FILE_MEMORY_LOCAL);
purgeRecords(NULL, FILE_MEMORY_GLOBAL);
purgeRecords(NULL, FILE_MEMORY_SHARED);
purgeRecords(NULL, FILE_SHADER_OUTPUT);
} else
if (ldst->op == OP_ATOM || ldst->op == OP_CCTL) {
if (ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) {
purgeRecords(NULL, FILE_MEMORY_LOCAL);
purgeRecords(NULL, FILE_MEMORY_GLOBAL);
purgeRecords(NULL, FILE_MEMORY_SHARED);
} else {
purgeRecords(NULL, ldst->src(0).getFile());
}
} else
if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) {
purgeRecords(NULL, FILE_SHADER_OUTPUT);
}
continue;
}
if (ldst->getPredicate()) // TODO: handle predicated ld/st
continue;
 
if (isLoad) {
DataFile file = ldst->src(0).getFile();
 
// if ld l[]/g[] look for previous store to eliminate the reload
if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) {
// TODO: shared memory ?
rec = findRecord(ldst, false, isAdjacent);
if (rec && !isAdjacent)
keep = !replaceLdFromSt(ldst, rec);
}
 
// or look for ld from the same location and replace this one
rec = keep ? findRecord(ldst, true, isAdjacent) : NULL;
if (rec) {
if (!isAdjacent)
keep = !replaceLdFromLd(ldst, rec);
else
// or combine a previous load with this one
keep = !combineLd(rec, ldst);
}
if (keep)
lockStores(ldst);
} else {
rec = findRecord(ldst, false, isAdjacent);
if (rec) {
if (!isAdjacent)
keep = !replaceStFromSt(ldst, rec);
else
keep = !combineSt(rec, ldst);
}
if (keep)
purgeRecords(ldst, DATA_FILE_COUNT);
}
if (keep)
addRecord(ldst);
}
reset();
 
return true;
}
 
// =============================================================================
 
// Turn control flow into predicated instructions (after register allocation !).
// TODO:
// Could move this to before register allocation on NVC0 and also handle nested
// constructs.
class FlatteningPass : public Pass
{
private:
virtual bool visit(BasicBlock *);
 
bool tryPredicateConditional(BasicBlock *);
void predicateInstructions(BasicBlock *, Value *pred, CondCode cc);
void tryPropagateBranch(BasicBlock *);
inline bool isConstantCondition(Value *pred);
inline bool mayPredicate(const Instruction *, const Value *pred) const;
inline void removeFlow(Instruction *);
};
 
bool
FlatteningPass::isConstantCondition(Value *pred)
{
Instruction *insn = pred->getUniqueInsn();
assert(insn);
if (insn->op != OP_SET || insn->srcExists(2))
return false;
 
for (int s = 0; s < 2 && insn->srcExists(s); ++s) {
Instruction *ld = insn->getSrc(s)->getUniqueInsn();
DataFile file;
if (ld) {
if (ld->op != OP_MOV && ld->op != OP_LOAD)
return false;
if (ld->src(0).isIndirect(0))
return false;
file = ld->src(0).getFile();
} else {
file = insn->src(s).getFile();
// catch $r63 on NVC0
if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR)
file = FILE_IMMEDIATE;
}
if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
return false;
}
return true;
}
 
void
FlatteningPass::removeFlow(Instruction *insn)
{
FlowInstruction *term = insn ? insn->asFlow() : NULL;
if (!term)
return;
Graph::Edge::Type ty = term->bb->cfg.outgoing().getType();
 
if (term->op == OP_BRA) {
// TODO: this might get more difficult when we get arbitrary BRAs
if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK)
return;
} else
if (term->op != OP_JOIN)
return;
 
Value *pred = term->getPredicate();
 
delete_Instruction(prog, term);
 
if (pred && pred->refCount() == 0) {
Instruction *pSet = pred->getUniqueInsn();
pred->join->reg.data.id = -1; // deallocate
if (pSet->isDead())
delete_Instruction(prog, pSet);
}
}
 
void
FlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc)
{
for (Instruction *i = bb->getEntry(); i; i = i->next) {
if (i->isNop())
continue;
assert(!i->getPredicate());
i->setPredicate(cc, pred);
}
removeFlow(bb->getExit());
}
 
bool
FlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const
{
if (insn->isPseudo())
return true;
// TODO: calls where we don't know which registers are modified
 
if (!prog->getTarget()->mayPredicate(insn, pred))
return false;
for (int d = 0; insn->defExists(d); ++d)
if (insn->getDef(d)->equals(pred))
return false;
return true;
}
 
// If we jump to BRA/RET/EXIT, replace the jump with it.
// NOTE: We do not update the CFG anymore here !
//
// TODO: Handle cases where we skip over a branch (maybe do that elsewhere ?):
// BB:0
// @p0 bra BB:2 -> @!p0 bra BB:3 iff (!) BB:2 immediately adjoins BB:1
// BB1:
// bra BB:3
// BB2:
// ...
// BB3:
// ...
void
FlatteningPass::tryPropagateBranch(BasicBlock *bb)
{
for (Instruction *i = bb->getExit(); i && i->op == OP_BRA; i = i->prev) {
BasicBlock *bf = i->asFlow()->target.bb;
 
if (bf->getInsnCount() != 1)
continue;
 
FlowInstruction *bra = i->asFlow();
FlowInstruction *rep = bf->getExit()->asFlow();
 
if (!rep || rep->getPredicate())
continue;
if (rep->op != OP_BRA &&
rep->op != OP_JOIN &&
rep->op != OP_EXIT)
continue;
 
// TODO: If there are multiple branches to @rep, only the first would
// be replaced, so only remove them after this pass is done ?
// Also, need to check all incident blocks for fall-through exits and
// add the branch there.
bra->op = rep->op;
bra->target.bb = rep->target.bb;
if (bf->cfg.incidentCount() == 1)
bf->remove(rep);
}
}
 
bool
FlatteningPass::visit(BasicBlock *bb)
{
if (tryPredicateConditional(bb))
return true;
 
// try to attach join to previous instruction
if (prog->getTarget()->hasJoin) {
Instruction *insn = bb->getExit();
if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
insn = insn->prev;
if (insn && !insn->getPredicate() &&
!insn->asFlow() &&
insn->op != OP_TEXBAR &&
!isTextureOp(insn->op) && // probably just nve4
!isSurfaceOp(insn->op) && // not confirmed
insn->op != OP_LINTERP && // probably just nve4
insn->op != OP_PINTERP && // probably just nve4
((insn->op != OP_LOAD && insn->op != OP_STORE) ||
typeSizeof(insn->dType) <= 4) &&
!insn->isNop()) {
insn->join = 1;
bb->remove(bb->getExit());
return true;
}
}
}
 
tryPropagateBranch(bb);
 
return true;
}
 
bool
FlatteningPass::tryPredicateConditional(BasicBlock *bb)
{
BasicBlock *bL = NULL, *bR = NULL;
unsigned int nL = 0, nR = 0, limit = 12;
Instruction *insn;
unsigned int mask;
 
mask = bb->initiatesSimpleConditional();
if (!mask)
return false;
 
assert(bb->getExit());
Value *pred = bb->getExit()->getPredicate();
assert(pred);
 
if (isConstantCondition(pred))
limit = 4;
 
Graph::EdgeIterator ei = bb->cfg.outgoing();
 
if (mask & 1) {
bL = BasicBlock::get(ei.getNode());
for (insn = bL->getEntry(); insn; insn = insn->next, ++nL)
if (!mayPredicate(insn, pred))
return false;
if (nL > limit)
return false; // too long, do a real branch
}
ei.next();
 
if (mask & 2) {
bR = BasicBlock::get(ei.getNode());
for (insn = bR->getEntry(); insn; insn = insn->next, ++nR)
if (!mayPredicate(insn, pred))
return false;
if (nR > limit)
return false; // too long, do a real branch
}
 
if (bL)
predicateInstructions(bL, pred, bb->getExit()->cc);
if (bR)
predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc));
 
if (bb->joinAt) {
bb->remove(bb->joinAt);
bb->joinAt = NULL;
}
removeFlow(bb->getExit()); // delete the branch/join at the fork point
 
// remove potential join operations at the end of the conditional
if (prog->getTarget()->joinAnterior) {
bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode());
if (bb->getEntry() && bb->getEntry()->op == OP_JOIN)
removeFlow(bb->getEntry());
}
 
return true;
}
 
// =============================================================================
 
// Fold Immediate into MAD; must be done after register allocation due to
// constraint SDST == SSRC2
// TODO:
// Does NVC0+ have other situations where this pass makes sense?
class NV50PostRaConstantFolding : public Pass
{
private:
virtual bool visit(BasicBlock *);
};
 
bool
NV50PostRaConstantFolding::visit(BasicBlock *bb)
{
Value *vtmp;
Instruction *def;
 
for (Instruction *i = bb->getFirst(); i; i = i->next) {
switch (i->op) {
case OP_MAD:
if (i->def(0).getFile() != FILE_GPR ||
i->src(0).getFile() != FILE_GPR ||
i->src(1).getFile() != FILE_GPR ||
i->src(2).getFile() != FILE_GPR ||
i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id ||
!isFloatType(i->dType))
break;
 
def = i->getSrc(1)->getInsn();
if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
vtmp = i->getSrc(1);
i->setSrc(1, def->getSrc(0));
 
/* There's no post-RA dead code elimination, so do it here
* XXX: if we add more code-removing post-RA passes, we might
* want to create a post-RA dead-code elim pass */
if (vtmp->refCount() == 0)
delete_Instruction(bb->getProgram(), def);
 
break;
}
break;
default:
break;
}
}
 
return true;
}
 
// =============================================================================
 
// Common subexpression elimination. Stupid O^2 implementation.
class LocalCSE : public Pass
{
private:
virtual bool visit(BasicBlock *);
 
inline bool tryReplace(Instruction **, Instruction *);
 
DLList ops[OP_LAST + 1];
};
 
class GlobalCSE : public Pass
{
private:
virtual bool visit(BasicBlock *);
};
 
bool
Instruction::isActionEqual(const Instruction *that) const
{
if (this->op != that->op ||
this->dType != that->dType ||
this->sType != that->sType)
return false;
if (this->cc != that->cc)
return false;
 
if (this->asTex()) {
if (memcmp(&this->asTex()->tex,
&that->asTex()->tex,
sizeof(this->asTex()->tex)))
return false;
} else
if (this->asCmp()) {
if (this->asCmp()->setCond != that->asCmp()->setCond)
return false;
} else
if (this->asFlow()) {
return false;
} else {
if (this->ipa != that->ipa ||
this->lanes != that->lanes ||
this->perPatch != that->perPatch)
return false;
if (this->postFactor != that->postFactor)
return false;
}
 
if (this->subOp != that->subOp ||
this->saturate != that->saturate ||
this->rnd != that->rnd ||
this->ftz != that->ftz ||
this->dnz != that->dnz ||
this->cache != that->cache ||
this->mask != that->mask)
return false;
 
return true;
}
 
bool
Instruction::isResultEqual(const Instruction *that) const
{
unsigned int d, s;
 
// NOTE: location of discard only affects tex with liveOnly and quadops
if (!this->defExists(0) && this->op != OP_DISCARD)
return false;
 
if (!isActionEqual(that))
return false;
 
if (this->predSrc != that->predSrc)
return false;
 
for (d = 0; this->defExists(d); ++d) {
if (!that->defExists(d) ||
!this->getDef(d)->equals(that->getDef(d), false))
return false;
}
if (that->defExists(d))
return false;
 
for (s = 0; this->srcExists(s); ++s) {
if (!that->srcExists(s))
return false;
if (this->src(s).mod != that->src(s).mod)
return false;
if (!this->getSrc(s)->equals(that->getSrc(s), true))
return false;
}
if (that->srcExists(s))
return false;
 
if (op == OP_LOAD || op == OP_VFETCH) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
return true;
default:
return false;
}
}
 
return true;
}
 
// pull through common expressions from different in-blocks
bool
GlobalCSE::visit(BasicBlock *bb)
{
Instruction *phi, *next, *ik;
int s;
 
// TODO: maybe do this with OP_UNION, too
 
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) {
next = phi->next;
if (phi->getSrc(0)->refCount() > 1)
continue;
ik = phi->getSrc(0)->getInsn();
if (!ik)
continue; // probably a function input
for (s = 1; phi->srcExists(s); ++s) {
if (phi->getSrc(s)->refCount() > 1)
break;
if (!phi->getSrc(s)->getInsn() ||
!phi->getSrc(s)->getInsn()->isResultEqual(ik))
break;
}
if (!phi->srcExists(s)) {
Instruction *entry = bb->getEntry();
ik->bb->remove(ik);
if (!entry || entry->op != OP_JOIN)
bb->insertHead(ik);
else
bb->insertAfter(entry, ik);
ik->setDef(0, phi->getDef(0));
delete_Instruction(prog, phi);
}
}
 
return true;
}
 
bool
LocalCSE::tryReplace(Instruction **ptr, Instruction *i)
{
Instruction *old = *ptr;
 
// TODO: maybe relax this later (causes trouble with OP_UNION)
if (i->isPredicated())
return false;
 
if (!old->isResultEqual(i))
return false;
 
for (int d = 0; old->defExists(d); ++d)
old->def(d).replace(i->getDef(d), false);
delete_Instruction(prog, old);
*ptr = NULL;
return true;
}
 
bool
LocalCSE::visit(BasicBlock *bb)
{
unsigned int replaced;
 
do {
Instruction *ir, *next;
 
replaced = 0;
 
// will need to know the order of instructions
int serial = 0;
for (ir = bb->getFirst(); ir; ir = ir->next)
ir->serial = serial++;
 
for (ir = bb->getEntry(); ir; ir = next) {
int s;
Value *src = NULL;
 
next = ir->next;
 
if (ir->fixed) {
ops[ir->op].insert(ir);
continue;
}
 
for (s = 0; ir->srcExists(s); ++s)
if (ir->getSrc(s)->asLValue())
if (!src || ir->getSrc(s)->refCount() < src->refCount())
src = ir->getSrc(s);
 
if (src) {
for (Value::UseIterator it = src->uses.begin();
it != src->uses.end(); ++it) {
Instruction *ik = (*it)->getInsn();
if (ik && ik->bb == ir->bb && ik->serial < ir->serial)
if (tryReplace(&ir, ik))
break;
}
} else {
DLLIST_FOR_EACH(&ops[ir->op], iter)
{
Instruction *ik = reinterpret_cast<Instruction *>(iter.get());
if (tryReplace(&ir, ik))
break;
}
}
 
if (ir)
ops[ir->op].insert(ir);
else
++replaced;
}
for (unsigned int i = 0; i <= OP_LAST; ++i)
ops[i].clear();
 
} while (replaced);
 
return true;
}
 
// =============================================================================
 
// Remove computations of unused values.
class DeadCodeElim : public Pass
{
public:
bool buryAll(Program *);
 
private:
virtual bool visit(BasicBlock *);
 
void checkSplitLoad(Instruction *ld); // for partially dead loads
 
unsigned int deadCount;
};
 
bool
DeadCodeElim::buryAll(Program *prog)
{
do {
deadCount = 0;
if (!this->run(prog, false, false))
return false;
} while (deadCount);
 
return true;
}
 
bool
DeadCodeElim::visit(BasicBlock *bb)
{
Instruction *next;
 
for (Instruction *i = bb->getFirst(); i; i = next) {
next = i->next;
if (i->isDead()) {
++deadCount;
delete_Instruction(prog, i);
} else
if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) {
checkSplitLoad(i);
} else
if (i->defExists(0) && !i->getDef(0)->refCount()) {
if (i->op == OP_ATOM ||
i->op == OP_SUREDP ||
i->op == OP_SUREDB)
i->setDef(0, NULL);
}
}
return true;
}
 
void
DeadCodeElim::checkSplitLoad(Instruction *ld1)
{
Instruction *ld2 = NULL; // can get at most 2 loads
Value *def1[4];
Value *def2[4];
int32_t addr1, addr2;
int32_t size1, size2;
int d, n1, n2;
uint32_t mask = 0xffffffff;
 
for (d = 0; ld1->defExists(d); ++d)
if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0)
mask &= ~(1 << d);
if (mask == 0xffffffff)
return;
 
addr1 = ld1->getSrc(0)->reg.data.offset;
n1 = n2 = 0;
size1 = size2 = 0;
for (d = 0; ld1->defExists(d); ++d) {
if (mask & (1 << d)) {
if (size1 && (addr1 & 0x7))
break;
def1[n1] = ld1->getDef(d);
size1 += def1[n1++]->reg.size;
} else
if (!n1) {
addr1 += ld1->getDef(d)->reg.size;
} else {
break;
}
}
for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
if (mask & (1 << d)) {
def2[n2] = ld1->getDef(d);
size2 += def2[n2++]->reg.size;
} else {
assert(!n2);
addr2 += ld1->getDef(d)->reg.size;
}
}
 
updateLdStOffset(ld1, addr1, func);
ld1->setType(typeOfSize(size1));
for (d = 0; d < 4; ++d)
ld1->setDef(d, (d < n1) ? def1[d] : NULL);
 
if (!n2)
return;
 
ld2 = cloneShallow(func, ld1);
updateLdStOffset(ld2, addr2, func);
ld2->setType(typeOfSize(size2));
for (d = 0; d < 4; ++d)
ld2->setDef(d, (d < n2) ? def2[d] : NULL);
 
ld1->bb->insertAfter(ld1, ld2);
}
 
// =============================================================================
 
#define RUN_PASS(l, n, f) \
if (level >= (l)) { \
if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \
INFO("PEEPHOLE: %s\n", #n); \
n pass; \
if (!pass.f(this)) \
return false; \
}
 
bool
Program::optimizeSSA(int level)
{
RUN_PASS(1, DeadCodeElim, buryAll);
RUN_PASS(1, CopyPropagation, run);
RUN_PASS(1, MergeSplits, run);
RUN_PASS(2, GlobalCSE, run);
RUN_PASS(1, LocalCSE, run);
RUN_PASS(2, AlgebraicOpt, run);
RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
RUN_PASS(1, ConstantFolding, foldAll);
RUN_PASS(1, LoadPropagation, run);
RUN_PASS(2, MemoryOpt, run);
RUN_PASS(2, LocalCSE, run);
RUN_PASS(0, DeadCodeElim, buryAll);
 
return true;
}
 
bool
Program::optimizePostRA(int level)
{
RUN_PASS(2, FlatteningPass, run);
if (getTarget()->getChipset() < 0xc0)
RUN_PASS(2, NV50PostRaConstantFolding, run);
 
return true;
}
 
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
0,0 → 1,706
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target.h"
 
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
 
namespace nv50_ir {
 
enum TextStyle
{
TXT_DEFAULT,
TXT_GPR,
TXT_REGISTER,
TXT_FLAGS,
TXT_MEM,
TXT_IMMD,
TXT_BRA,
TXT_INSN
};
 
static const char *_colour[8] =
{
"\x1b[00m",
"\x1b[34m",
"\x1b[35m",
"\x1b[35m",
"\x1b[36m",
"\x1b[33m",
"\x1b[37m",
"\x1b[32m"
};
 
static const char *_nocolour[8] =
{
"", "", "", "", "", "", "", ""
};
 
static const char **colour;
 
static void init_colours()
{
if (getenv("NV50_PROG_DEBUG_NO_COLORS") != NULL)
colour = _nocolour;
else
colour = _colour;
}
 
const char *operationStr[OP_LAST + 1] =
{
"nop",
"phi",
"union",
"split",
"merge",
"consec",
"mov",
"ld",
"st",
"add",
"sub",
"mul",
"div",
"mod",
"mad",
"fma",
"sad",
"abs",
"neg",
"not",
"and",
"or",
"xor",
"shl",
"shr",
"max",
"min",
"sat",
"ceil",
"floor",
"trunc",
"cvt",
"set and",
"set or",
"set xor",
"set",
"selp",
"slct",
"rcp",
"rsq",
"lg2",
"sin",
"cos",
"ex2",
"exp",
"log",
"presin",
"preex2",
"sqrt",
"pow",
"bra",
"call",
"ret",
"cont",
"break",
"preret",
"precont",
"prebreak",
"brkpt",
"joinat",
"join",
"discard",
"exit",
"membar",
"vfetch",
"pfetch",
"export",
"linterp",
"pinterp",
"emit",
"restart",
"tex",
"texbias",
"texlod",
"texfetch",
"texquery",
"texgrad",
"texgather",
"texquerylod",
"texcsaa",
"texprep",
"suldb",
"suldp",
"sustb",
"sustp",
"suredb",
"suredp",
"sulea",
"subfm",
"suclamp",
"sueau",
"madsp",
"texbar",
"dfdx",
"dfdy",
"rdsv",
"wrsv",
"pixld",
"quadop",
"quadon",
"quadpop",
"popcnt",
"insbf",
"extbf",
"bfind",
"permt",
"atom",
"bar",
"vadd",
"vavg",
"vmin",
"vmax",
"vsad",
"vset",
"vshr",
"vshl",
"vsel",
"cctl",
"shfl",
"(invalid)"
};
 
static const char *atomSubOpStr[] =
{
"add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch"
};
 
static const char *DataTypeStr[] =
{
"-",
"u8", "s8",
"u16", "s16",
"u32", "s32",
"u64", "s64",
"f16", "f32", "f64",
"b96", "b128"
};
 
static const char *RoundModeStr[] =
{
"", "rm", "rz", "rp", "rni", "rmi", "rzi", "rpi"
};
 
static const char *CondCodeStr[] =
{
"never",
"lt",
"eq",
"le",
"gt",
"ne",
"ge",
"",
"(invalid)",
"ltu",
"equ",
"leu",
"gtu",
"neu",
"geu",
"",
"no",
"nc",
"ns",
"na",
"a",
"s",
"c",
"o"
};
 
static const char *SemanticStr[SV_LAST + 1] =
{
"POSITION",
"VERTEX_ID",
"INSTANCE_ID",
"INVOCATION_ID",
"PRIMITIVE_ID",
"VERTEX_COUNT",
"LAYER",
"VIEWPORT_INDEX",
"Y_DIR",
"FACE",
"POINT_SIZE",
"POINT_COORD",
"CLIP_DISTANCE",
"SAMPLE_INDEX",
"SAMPLE_POS",
"SAMPLE_MASK",
"TESS_FACTOR",
"TESS_COORD",
"TID",
"CTAID",
"NTID",
"GRIDID",
"NCTAID",
"LANEID",
"PHYSID",
"NPHYSID",
"CLOCK",
"LBASE",
"SBASE",
"VERTEX_STRIDE",
"INVOCATION_INFO",
"?",
"(INVALID)"
};
 
static const char *interpStr[16] =
{
"pass",
"mul",
"flat",
"sc",
"cent pass",
"cent mul",
"cent flat",
"cent sc",
"off pass",
"off mul",
"off flat",
"off sc",
"samp pass",
"samp mul",
"samp flat",
"samp sc"
};
 
#define PRINT(args...) \
do { \
pos += snprintf(&buf[pos], size - pos, args); \
} while(0)
 
#define SPACE_PRINT(cond, args...) \
do { \
if (cond) \
buf[pos++] = ' '; \
pos += snprintf(&buf[pos], size - pos, args); \
} while(0)
 
#define SPACE() \
do { \
if (pos < size) \
buf[pos++] = ' '; \
} while(0)
 
int Modifier::print(char *buf, size_t size) const
{
size_t pos = 0;
 
if (bits)
PRINT("%s", colour[TXT_INSN]);
 
size_t base = pos;
 
if (bits & NV50_IR_MOD_NOT)
PRINT("not");
if (bits & NV50_IR_MOD_SAT)
SPACE_PRINT(pos > base && pos < size, "sat");
if (bits & NV50_IR_MOD_NEG)
SPACE_PRINT(pos > base && pos < size, "neg");
if (bits & NV50_IR_MOD_ABS)
SPACE_PRINT(pos > base && pos < size, "abs");
 
return pos;
}
 
int LValue::print(char *buf, size_t size, DataType ty) const
{
const char *postFix = "";
size_t pos = 0;
int idx = join->reg.data.id >= 0 ? join->reg.data.id : id;
char p = join->reg.data.id >= 0 ? '$' : '%';
char r;
int col = TXT_DEFAULT;
 
switch (reg.file) {
case FILE_GPR:
r = 'r'; col = TXT_GPR;
if (reg.size == 2) {
if (p == '$') {
postFix = (idx & 1) ? "h" : "l";
idx /= 2;
} else {
postFix = "s";
}
} else
if (reg.size == 8) {
postFix = "d";
} else
if (reg.size == 16) {
postFix = "q";
} else
if (reg.size == 12) {
postFix = "t";
}
break;
case FILE_PREDICATE:
r = 'p'; col = TXT_REGISTER;
if (reg.size == 2)
postFix = "d";
else
if (reg.size == 4)
postFix = "q";
break;
case FILE_FLAGS:
r = 'c'; col = TXT_FLAGS;
break;
case FILE_ADDRESS:
r = 'a'; col = TXT_REGISTER;
break;
default:
assert(!"invalid file for lvalue");
r = '?';
break;
}
 
PRINT("%s%c%c%i%s", colour[col], p, r, idx, postFix);
 
return pos;
}
 
int ImmediateValue::print(char *buf, size_t size, DataType ty) const
{
size_t pos = 0;
 
PRINT("%s", colour[TXT_IMMD]);
 
switch (ty) {
case TYPE_F32: PRINT("%f", reg.data.f32); break;
case TYPE_F64: PRINT("%f", reg.data.f64); break;
case TYPE_U8: PRINT("0x%02x", reg.data.u8); break;
case TYPE_S8: PRINT("%i", reg.data.s8); break;
case TYPE_U16: PRINT("0x%04x", reg.data.u16); break;
case TYPE_S16: PRINT("%i", reg.data.s16); break;
case TYPE_U32: PRINT("0x%08x", reg.data.u32); break;
case TYPE_S32: PRINT("%i", reg.data.s32); break;
case TYPE_U64:
case TYPE_S64:
default:
PRINT("0x%016"PRIx64, reg.data.u64);
break;
}
return pos;
}
 
int Symbol::print(char *buf, size_t size, DataType ty) const
{
return print(buf, size, NULL, NULL, ty);
}
 
int Symbol::print(char *buf, size_t size,
Value *rel, Value *dimRel, DataType ty) const
{
size_t pos = 0;
char c;
 
if (ty == TYPE_NONE)
ty = typeOfSize(reg.size);
 
if (reg.file == FILE_SYSTEM_VALUE) {
PRINT("%ssv[%s%s:%i%s", colour[TXT_MEM],
colour[TXT_REGISTER],
SemanticStr[reg.data.sv.sv], reg.data.sv.index, colour[TXT_MEM]);
if (rel) {
PRINT("%s+", colour[TXT_DEFAULT]);
pos += rel->print(&buf[pos], size - pos);
}
PRINT("%s]", colour[TXT_MEM]);
return pos;
}
 
switch (reg.file) {
case FILE_MEMORY_CONST: c = 'c'; break;
case FILE_SHADER_INPUT: c = 'a'; break;
case FILE_SHADER_OUTPUT: c = 'o'; break;
case FILE_MEMORY_GLOBAL: c = 'g'; break;
case FILE_MEMORY_SHARED: c = 's'; break;
case FILE_MEMORY_LOCAL: c = 'l'; break;
default:
assert(!"invalid file");
c = '?';
break;
}
 
if (c == 'c')
PRINT("%s%c%i[", colour[TXT_MEM], c, reg.fileIndex);
else
PRINT("%s%c[", colour[TXT_MEM], c);
 
if (dimRel) {
pos += dimRel->print(&buf[pos], size - pos, TYPE_S32);
PRINT("%s][", colour[TXT_MEM]);
}
 
if (rel) {
pos += rel->print(&buf[pos], size - pos);
PRINT("%s%c", colour[TXT_DEFAULT], (reg.data.offset < 0) ? '-' : '+');
} else {
assert(reg.data.offset >= 0);
}
PRINT("%s0x%x%s]", colour[TXT_IMMD], abs(reg.data.offset), colour[TXT_MEM]);
 
return pos;
}
 
void Instruction::print() const
{
#define BUFSZ 512
 
const size_t size = BUFSZ;
 
char buf[BUFSZ];
int s, d;
size_t pos = 0;
 
PRINT("%s", colour[TXT_INSN]);
 
if (join)
PRINT("join ");
 
if (predSrc >= 0) {
const size_t pre = pos;
if (getSrc(predSrc)->reg.file == FILE_PREDICATE) {
if (cc == CC_NOT_P)
PRINT("not");
} else {
PRINT("%s", CondCodeStr[cc]);
}
if (pos > pre)
SPACE();
pos += getSrc(predSrc)->print(&buf[pos], BUFSZ - pos);
PRINT(" %s", colour[TXT_INSN]);
}
 
if (saturate)
PRINT("sat ");
 
if (asFlow()) {
PRINT("%s", operationStr[op]);
if (asFlow()->indirect)
PRINT(" ind");
if (asFlow()->absolute)
PRINT(" abs");
if (op == OP_CALL && asFlow()->builtin) {
PRINT(" %sBUILTIN:%i", colour[TXT_BRA], asFlow()->target.builtin);
} else
if (op == OP_CALL && asFlow()->target.fn) {
PRINT(" %s%s:%i", colour[TXT_BRA],
asFlow()->target.fn->getName(),
asFlow()->target.fn->getLabel());
} else
if (asFlow()->target.bb)
PRINT(" %sBB:%i", colour[TXT_BRA], asFlow()->target.bb->getId());
} else {
PRINT("%s ", operationStr[op]);
if (op == OP_LINTERP || op == OP_PINTERP)
PRINT("%s ", interpStr[ipa]);
switch (op) {
case OP_SUREDP:
case OP_ATOM:
if (subOp < Elements(atomSubOpStr))
PRINT("%s ", atomSubOpStr[subOp]);
break;
default:
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
break;
}
if (perPatch)
PRINT("patch ");
if (asTex())
PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(),
colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s,
colour[TXT_INSN]);
if (postFactor)
PRINT("x2^%i ", postFactor);
PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]);
}
 
if (rnd != ROUND_N)
PRINT(" %s", RoundModeStr[rnd]);
 
if (defExists(1))
PRINT(" {");
for (d = 0; defExists(d); ++d) {
SPACE();
pos += getDef(d)->print(&buf[pos], size - pos);
}
if (d > 1)
PRINT(" %s}", colour[TXT_INSN]);
else
if (!d && !asFlow())
PRINT(" %s#", colour[TXT_INSN]);
 
if (asCmp())
PRINT(" %s%s", colour[TXT_INSN], CondCodeStr[asCmp()->setCond]);
 
if (sType != dType)
PRINT(" %s%s", colour[TXT_INSN], DataTypeStr[sType]);
 
for (s = 0; srcExists(s); ++s) {
if (s == predSrc || src(s).usedAsPtr)
continue;
const size_t pre = pos;
SPACE();
pos += src(s).mod.print(&buf[pos], BUFSZ - pos);
if (pos > pre + 1)
SPACE();
if (src(s).isIndirect(0) || src(s).isIndirect(1))
pos += getSrc(s)->asSym()->print(&buf[pos], BUFSZ - pos,
getIndirect(s, 0),
getIndirect(s, 1));
else
pos += getSrc(s)->print(&buf[pos], BUFSZ - pos, sType);
}
if (exit)
PRINT("%s exit", colour[TXT_INSN]);
 
PRINT("%s", colour[TXT_DEFAULT]);
 
buf[MIN2(pos, BUFSZ - 1)] = 0;
 
INFO("%s (%u)\n", buf, encSize);
}
 
class PrintPass : public Pass
{
public:
PrintPass() : serial(0) { }
 
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
virtual bool visit(Instruction *);
 
private:
int serial;
};
 
bool
PrintPass::visit(Function *fn)
{
char str[16];
 
INFO("\n%s:%i (", fn->getName(), fn->getLabel());
 
if (!fn->outs.empty())
INFO("out");
for (std::deque<ValueRef>::iterator it = fn->outs.begin();
it != fn->outs.end();
++it) {
it->get()->print(str, sizeof(str), typeOfSize(it->get()->reg.size));
INFO(" %s", str);
}
 
if (!fn->ins.empty())
INFO("%s%sin", colour[TXT_DEFAULT], fn->outs.empty() ? "" : ", ");
for (std::deque<ValueDef>::iterator it = fn->ins.begin();
it != fn->ins.end();
++it) {
it->get()->print(str, sizeof(str), typeOfSize(it->get()->reg.size));
INFO(" %s", str);
}
INFO("%s)\n", colour[TXT_DEFAULT]);
 
return true;
}
 
bool
PrintPass::visit(BasicBlock *bb)
{
#if 0
INFO("---\n");
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next())
INFO(" <- BB:%i (%s)\n",
BasicBlock::get(ei.getNode())->getId(),
ei.getEdge()->typeStr());
#endif
INFO("BB:%i (%u instructions) - ", bb->getId(), bb->getInsnCount());
 
if (bb->idom())
INFO("idom = BB:%i, ", bb->idom()->getId());
 
INFO("df = { ");
for (DLList::Iterator df = bb->getDF().iterator(); !df.end(); df.next())
INFO("BB:%i ", BasicBlock::get(df)->getId());
 
INFO("}\n");
 
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next())
INFO(" -> BB:%i (%s)\n",
BasicBlock::get(ei.getNode())->getId(),
ei.getEdge()->typeStr());
 
return true;
}
 
bool
PrintPass::visit(Instruction *insn)
{
INFO("%3i: ", serial++);
insn->print();
return true;
}
 
void
Function::print()
{
PrintPass pass;
pass.run(this, true, false);
}
 
void
Program::print()
{
PrintPass pass;
init_colours();
pass.run(this, true, false);
}
 
void
Function::printLiveIntervals() const
{
INFO("printing live intervals ...\n");
 
for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next()) {
const Value *lval = Value::get(it)->asLValue();
if (lval && !lval->livei.isEmpty()) {
INFO("livei(%%%i): ", lval->id);
lval->livei.print();
}
}
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
0,0 → 1,2146
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target.h"
 
#include <stack>
#include <limits>
#include <tr1/unordered_set>
 
namespace nv50_ir {
 
#define MAX_REGISTER_FILE_SIZE 256
 
class RegisterSet
{
public:
RegisterSet(const Target *);
 
void init(const Target *);
void reset(DataFile, bool resetMax = false);
 
void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
void intersect(DataFile f, const RegisterSet *);
 
bool assign(int32_t& reg, DataFile f, unsigned int size);
void release(DataFile f, int32_t reg, unsigned int size);
void occupy(DataFile f, int32_t reg, unsigned int size);
void occupy(const Value *);
void occupyMask(DataFile f, int32_t reg, uint8_t mask);
bool isOccupied(DataFile f, int32_t reg, unsigned int size) const;
bool testOccupy(const Value *);
bool testOccupy(DataFile f, int32_t reg, unsigned int size);
 
inline int getMaxAssigned(DataFile f) const { return fill[f]; }
 
inline unsigned int getFileSize(DataFile f, uint8_t regSize) const
{
if (restrictedGPR16Range && f == FILE_GPR && regSize == 2)
return (last[f] + 1) / 2;
return last[f] + 1;
}
 
inline unsigned int units(DataFile f, unsigned int size) const
{
return size >> unit[f];
}
// for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
inline unsigned int idToBytes(const Value *v) const
{
return v->reg.data.id * MIN2(v->reg.size, 4);
}
inline unsigned int idToUnits(const Value *v) const
{
return units(v->reg.file, idToBytes(v));
}
inline int bytesToId(Value *v, unsigned int bytes) const
{
if (v->reg.size < 4)
return units(v->reg.file, bytes);
return bytes / 4;
}
inline int unitsToId(DataFile f, int u, uint8_t size) const
{
if (u < 0)
return -1;
return (size < 4) ? u : ((u << unit[f]) / 4);
}
 
void print() const;
 
private:
BitSet bits[LAST_REGISTER_FILE + 1];
 
int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity
 
int last[LAST_REGISTER_FILE + 1];
int fill[LAST_REGISTER_FILE + 1];
 
const bool restrictedGPR16Range;
};
 
void
RegisterSet::reset(DataFile f, bool resetMax)
{
bits[f].fill(0);
if (resetMax)
fill[f] = -1;
}
 
void
RegisterSet::init(const Target *targ)
{
for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
DataFile f = static_cast<DataFile>(rf);
last[rf] = targ->getFileSize(f) - 1;
unit[rf] = targ->getFileUnit(f);
fill[rf] = -1;
assert(last[rf] < MAX_REGISTER_FILE_SIZE);
bits[rf].allocate(last[rf] + 1, true);
}
}
 
RegisterSet::RegisterSet(const Target *targ)
: restrictedGPR16Range(targ->getChipset() < 0xc0)
{
init(targ);
for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i)
reset(static_cast<DataFile>(i));
}
 
void
RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
{
bits[f].periodicMask32(lock, unlock);
}
 
void
RegisterSet::intersect(DataFile f, const RegisterSet *set)
{
bits[f] |= set->bits[f];
}
 
void
RegisterSet::print() const
{
INFO("GPR:");
bits[FILE_GPR].print();
INFO("\n");
}
 
bool
RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size)
{
reg = bits[f].findFreeRange(size);
if (reg < 0)
return false;
fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
return true;
}
 
bool
RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const
{
return bits[f].testRange(reg, size);
}
 
void
RegisterSet::occupy(const Value *v)
{
occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]);
}
 
void
RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask)
{
bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
}
 
void
RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
{
bits[f].setRange(reg, size);
 
INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
 
fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
}
 
bool
RegisterSet::testOccupy(const Value *v)
{
return testOccupy(v->reg.file,
idToUnits(v), v->reg.size >> unit[v->reg.file]);
}
 
bool
RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size)
{
if (isOccupied(f, reg, size))
return false;
occupy(f, reg, size);
return true;
}
 
void
RegisterSet::release(DataFile f, int32_t reg, unsigned int size)
{
bits[f].clrRange(reg, size);
 
INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size);
}
 
class RegAlloc
{
public:
RegAlloc(Program *program) : prog(program), sequence(0) { }
 
bool exec();
bool execFunc();
 
private:
class PhiMovesPass : public Pass {
private:
virtual bool visit(BasicBlock *);
inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
};
 
class ArgumentMovesPass : public Pass {
private:
virtual bool visit(BasicBlock *);
};
 
class BuildIntervalsPass : public Pass {
private:
virtual bool visit(BasicBlock *);
void collectLiveValues(BasicBlock *);
void addLiveRange(Value *, const BasicBlock *, int end);
};
 
class InsertConstraintsPass : public Pass {
public:
bool exec(Function *func);
private:
virtual bool visit(BasicBlock *);
 
bool insertConstraintMoves();
 
void condenseDefs(Instruction *);
void condenseSrcs(Instruction *, const int first, const int last);
 
void addHazard(Instruction *i, const ValueRef *src);
void textureMask(TexInstruction *);
void addConstraint(Instruction *, int s, int n);
bool detectConflict(Instruction *, int s);
 
// target specific functions, TODO: put in subclass or Target
void texConstraintNV50(TexInstruction *);
void texConstraintNVC0(TexInstruction *);
void texConstraintNVE0(TexInstruction *);
void texConstraintGM107(TexInstruction *);
 
std::list<Instruction *> constrList;
 
const Target *targ;
};
 
bool buildLiveSets(BasicBlock *);
 
private:
Program *prog;
Function *func;
 
// instructions in control flow / chronological order
ArrayList insns;
 
int sequence; // for manual passes through CFG
};
 
typedef std::pair<Value *, Value *> ValuePair;
 
class SpillCodeInserter
{
public:
SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
 
bool run(const std::list<ValuePair>&);
 
Symbol *assignSlot(const Interval&, const unsigned int size);
Value *offsetSlot(Value *, const LValue *);
inline int32_t getStackSize() const { return stackSize; }
 
private:
Function *func;
 
struct SpillSlot
{
Interval occup;
std::list<Value *> residents; // needed to recalculate occup
Symbol *sym;
int32_t offset;
inline uint8_t size() const { return sym->reg.size; }
};
std::list<SpillSlot> slots;
int32_t stackSize;
int32_t stackBase;
 
LValue *unspill(Instruction *usei, LValue *, Value *slot);
void spill(Instruction *defi, Value *slot, LValue *);
};
 
void
RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
const BasicBlock *bb,
int end)
{
Instruction *insn = val->getUniqueInsn();
 
if (!insn)
insn = bb->getFirst();
 
assert(bb->getFirst()->serial <= bb->getExit()->serial);
assert(bb->getExit()->serial + 1 >= end);
 
int begin = insn->serial;
if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
begin = bb->getEntry()->serial;
 
INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
val->id, begin, insn->serial, end);
 
if (begin != end) // empty ranges are only added as hazards for fixed regs
val->livei.extend(begin, end);
}
 
bool
RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
{
if (b->cfg.incidentCount() <= 1)
return false;
 
int n = 0;
for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
if (ei.getType() == Graph::Edge::TREE ||
ei.getType() == Graph::Edge::FORWARD)
++n;
return (n == 2);
}
 
// For each operand of each PHI in b, generate a new value by inserting a MOV
// at the end of the block it is coming from and replace the operand with its
// result. This eliminates liveness conflicts and enables us to let values be
// copied to the right register if such a conflict exists nonetheless.
//
// These MOVs are also crucial in making sure the live intervals of phi srces
// are extended until the end of the loop, since they are not included in the
// live-in sets.
bool
RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
{
Instruction *phi, *mov;
BasicBlock *pb, *pn;
 
std::stack<BasicBlock *> stack;
 
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
pb = BasicBlock::get(ei.getNode());
assert(pb);
if (needNewElseBlock(bb, pb))
stack.push(pb);
}
while (!stack.empty()) {
pb = stack.top();
pn = new BasicBlock(func);
stack.pop();
 
pb->cfg.detach(&bb->cfg);
pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD);
 
assert(pb->getExit()->op != OP_CALL);
if (pb->getExit()->asFlow()->target.bb == bb)
pb->getExit()->asFlow()->target.bb = pn;
}
 
// insert MOVs (phi->src(j) should stem from j-th in-BB)
int j = 0;
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
pb = BasicBlock::get(ei.getNode());
if (!pb->isTerminated())
pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
 
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue());
mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
 
mov->setSrc(0, phi->getSrc(j));
mov->setDef(0, tmp);
phi->setSrc(j, tmp);
 
pb->insertBefore(pb->getExit(), mov);
}
++j;
}
 
return true;
}
 
bool
RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
{
// Bind function call inputs/outputs to the same physical register
// the callee uses, inserting moves as appropriate for the case a
// conflict arises.
for (Instruction *i = bb->getEntry(); i; i = i->next) {
FlowInstruction *cal = i->asFlow();
// TODO: Handle indirect calls.
// Right now they should only be generated for builtins.
if (!cal || cal->op != OP_CALL || cal->builtin || cal->indirect)
continue;
RegisterSet clobberSet(prog->getTarget());
 
// Bind input values.
for (int s = cal->indirect ? 1 : 0; cal->srcExists(s); ++s) {
const int t = cal->indirect ? (s - 1) : s;
LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
tmp->reg.data.id = cal->target.fn->ins[t].rep()->reg.data.id;
 
Instruction *mov =
new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
mov->setDef(0, tmp);
mov->setSrc(0, cal->getSrc(s));
cal->setSrc(s, tmp);
 
bb->insertBefore(cal, mov);
}
 
// Bind output values.
for (int d = 0; cal->defExists(d); ++d) {
LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
 
Instruction *mov =
new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
mov->setSrc(0, tmp);
mov->setDef(0, cal->getDef(d));
cal->setDef(d, tmp);
 
bb->insertAfter(cal, mov);
clobberSet.occupy(tmp);
}
 
// Bind clobbered values.
for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
it != cal->target.fn->clobbers.end();
++it) {
if (clobberSet.testOccupy(*it)) {
Value *tmp = new_LValue(func, (*it)->asLValue());
tmp->reg.data.id = (*it)->reg.data.id;
cal->setDef(cal->defCount(), tmp);
}
}
}
 
// Update the clobber set of the function.
if (BasicBlock::get(func->cfgExit) == bb) {
func->buildDefSets();
for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
if (bb->defSet.test(i))
func->clobbers.push_back(func->getLValue(i));
}
 
return true;
}
 
// Build the set of live-in variables of bb.
bool
RegAlloc::buildLiveSets(BasicBlock *bb)
{
Function *f = bb->getFunction();
BasicBlock *bn;
Instruction *i;
unsigned int s, d;
 
INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
 
bb->liveSet.allocate(func->allLValues.getSize(), false);
 
int n = 0;
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
bn = BasicBlock::get(ei.getNode());
if (bn == bb)
continue;
if (bn->cfg.visit(sequence))
if (!buildLiveSets(bn))
return false;
if (n++ || bb->liveSet.marker)
bb->liveSet |= bn->liveSet;
else
bb->liveSet = bn->liveSet;
}
if (!n && !bb->liveSet.marker)
bb->liveSet.fill(0);
bb->liveSet.marker = true;
 
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
INFO("BB:%i live set of out blocks:\n", bb->getId());
bb->liveSet.print();
}
 
// if (!bb->getEntry())
// return true;
 
if (bb == BasicBlock::get(f->cfgExit)) {
for (std::deque<ValueRef>::iterator it = f->outs.begin();
it != f->outs.end(); ++it) {
assert(it->get()->asLValue());
bb->liveSet.set(it->get()->id);
}
}
 
for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
for (d = 0; i->defExists(d); ++d)
bb->liveSet.clr(i->getDef(d)->id);
for (s = 0; i->srcExists(s); ++s)
if (i->getSrc(s)->asLValue())
bb->liveSet.set(i->getSrc(s)->id);
}
for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
bb->liveSet.clr(i->getDef(0)->id);
 
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
INFO("BB:%i live set after propagation:\n", bb->getId());
bb->liveSet.print();
}
 
return true;
}
 
void
RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
{
BasicBlock *bbA = NULL, *bbB = NULL;
 
if (bb->cfg.outgoingCount()) {
// trickery to save a loop of OR'ing liveSets
// aliasing works fine with BitSet::setOr
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
if (ei.getType() == Graph::Edge::DUMMY)
continue;
if (bbA) {
bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
bbA = bb;
} else {
bbA = bbB;
}
bbB = BasicBlock::get(ei.getNode());
}
bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
} else
if (bb->cfg.incidentCount()) {
bb->liveSet.fill(0);
}
}
 
bool
RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
{
collectLiveValues(bb);
 
INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
 
// go through out blocks and delete phi sources that do not originate from
// the current block from the live set
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
BasicBlock *out = BasicBlock::get(ei.getNode());
 
for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
bb->liveSet.clr(i->getDef(0)->id);
 
for (int s = 0; i->srcExists(s); ++s) {
assert(i->src(s).getInsn());
if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
bb->liveSet.set(i->getSrc(s)->id);
else
bb->liveSet.clr(i->getSrc(s)->id);
}
}
}
 
// remaining live-outs are live until end
if (bb->getExit()) {
for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
if (bb->liveSet.test(j))
addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
}
 
for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
for (int d = 0; i->defExists(d); ++d) {
bb->liveSet.clr(i->getDef(d)->id);
if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
i->getDef(d)->livei.extend(i->serial, i->serial);
}
 
for (int s = 0; i->srcExists(s); ++s) {
if (!i->getSrc(s)->asLValue())
continue;
if (!bb->liveSet.test(i->getSrc(s)->id)) {
bb->liveSet.set(i->getSrc(s)->id);
addLiveRange(i->getSrc(s), bb, i->serial);
}
}
}
 
if (bb == BasicBlock::get(func->cfg.getRoot())) {
for (std::deque<ValueDef>::iterator it = func->ins.begin();
it != func->ins.end(); ++it) {
if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
it->get()->livei.extend(0, 1);
}
}
 
return true;
}
 
 
#define JOIN_MASK_PHI (1 << 0)
#define JOIN_MASK_UNION (1 << 1)
#define JOIN_MASK_MOV (1 << 2)
#define JOIN_MASK_TEX (1 << 3)
 
class GCRA
{
public:
GCRA(Function *, SpillCodeInserter&);
~GCRA();
 
bool allocateRegisters(ArrayList& insns);
 
void printNodeInfo() const;
 
private:
class RIG_Node : public Graph::Node
{
public:
RIG_Node();
 
void init(const RegisterSet&, LValue *);
 
void addInterference(RIG_Node *);
void addRegPreference(RIG_Node *);
 
inline LValue *getValue() const
{
return reinterpret_cast<LValue *>(data);
}
inline void setValue(LValue *lval) { data = lval; }
 
inline uint8_t getCompMask() const
{
return ((1 << colors) - 1) << (reg & 7);
}
 
static inline RIG_Node *get(const Graph::EdgeIterator& ei)
{
return static_cast<RIG_Node *>(ei.getNode());
}
 
public:
uint32_t degree;
uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
uint16_t colors;
 
DataFile f;
int32_t reg;
 
float weight;
 
// list pointers for simplify() phase
RIG_Node *next;
RIG_Node *prev;
 
// union of the live intervals of all coalesced values (we want to retain
// the separate intervals for testing interference of compound values)
Interval livei;
 
std::list<RIG_Node *> prefRegs;
};
 
private:
inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; }
 
void buildRIG(ArrayList&);
bool coalesce(ArrayList&);
bool doCoalesce(ArrayList&, unsigned int mask);
void calculateSpillWeights();
void simplify();
bool selectRegisters();
void cleanup(const bool success);
 
void simplifyEdge(RIG_Node *, RIG_Node *);
void simplifyNode(RIG_Node *);
 
bool coalesceValues(Value *, Value *, bool force);
void resolveSplitsAndMerges();
void makeCompound(Instruction *, bool isSplit);
 
inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&);
 
inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *);
void checkList(std::list<RIG_Node *>&);
 
private:
std::stack<uint32_t> stack;
 
// list headers for simplify() phase
RIG_Node lo[2];
RIG_Node hi;
 
Graph RIG;
RIG_Node *nodes;
unsigned int nodeCount;
 
Function *func;
Program *prog;
 
static uint8_t relDegree[17][17];
 
RegisterSet regs;
 
// need to fixup register id for participants of OP_MERGE/SPLIT
std::list<Instruction *> merges;
std::list<Instruction *> splits;
 
SpillCodeInserter& spill;
std::list<ValuePair> mustSpill;
};
 
uint8_t GCRA::relDegree[17][17];
 
GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
{
colors = 0;
}
 
void
GCRA::printNodeInfo() const
{
for (unsigned int i = 0; i < nodeCount; ++i) {
if (!nodes[i].colors)
continue;
INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
i,
nodes[i].f,nodes[i].reg,nodes[i].colors,
nodes[i].weight,
nodes[i].degree, nodes[i].degreeLimit);
 
for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next())
INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next())
INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
INFO("\n");
}
}
 
void
GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
{
setValue(lval);
if (lval->reg.data.id >= 0)
lval->noSpill = lval->fixedReg = 1;
 
colors = regs.units(lval->reg.file, lval->reg.size);
f = lval->reg.file;
reg = -1;
if (lval->reg.data.id >= 0)
reg = regs.idToUnits(lval);
 
weight = std::numeric_limits<float>::infinity();
degree = 0;
degreeLimit = regs.getFileSize(f, lval->reg.size);
degreeLimit -= relDegree[1][colors] - 1;
 
livei.insert(lval->livei);
}
 
bool
GCRA::coalesceValues(Value *dst, Value *src, bool force)
{
LValue *rep = dst->join->asLValue();
LValue *val = src->join->asLValue();
 
if (!force && val->reg.data.id >= 0) {
rep = src->join->asLValue();
val = dst->join->asLValue();
}
RIG_Node *nRep = &nodes[rep->id];
RIG_Node *nVal = &nodes[val->id];
 
if (src->reg.file != dst->reg.file) {
if (!force)
return false;
WARN("forced coalescing of values in different files !\n");
}
if (!force && dst->reg.size != src->reg.size)
return false;
 
if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) {
if (force) {
if (val->reg.data.id >= 0)
WARN("forced coalescing of values in different fixed regs !\n");
} else {
if (val->reg.data.id >= 0)
return false;
// make sure that there is no overlap with the fixed register of rep
for (ArrayList::Iterator it = func->allLValues.iterator();
!it.end(); it.next()) {
Value *reg = reinterpret_cast<Value *>(it.get())->asLValue();
assert(reg);
if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei))
return false;
}
}
}
 
if (!force && nRep->livei.overlaps(nVal->livei))
return false;
 
INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
rep->id, rep->reg.data.id, val->id);
 
// set join pointer of all values joined with val
for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
++def)
(*def)->get()->join = rep;
assert(rep->join == rep && val->join == rep);
 
// add val's definitions to rep and extend the live interval of its RIG node
rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
nRep->livei.unify(nVal->livei);
return true;
}
 
bool
GCRA::coalesce(ArrayList& insns)
{
bool ret = doCoalesce(insns, JOIN_MASK_PHI);
if (!ret)
return false;
switch (func->getProgram()->getTarget()->getChipset() & ~0xf) {
case 0x50:
case 0x80:
case 0x90:
case 0xa0:
ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
break;
case 0xc0:
case 0xd0:
case 0xe0:
case 0xf0:
case 0x100:
case 0x110:
ret = doCoalesce(insns, JOIN_MASK_UNION);
break;
default:
break;
}
if (!ret)
return false;
return doCoalesce(insns, JOIN_MASK_MOV);
}
 
static inline uint8_t makeCompMask(int compSize, int base, int size)
{
uint8_t m = ((1 << size) - 1) << base;
 
switch (compSize) {
case 1:
return 0xff;
case 2:
m |= (m << 2);
return (m << 4) | m;
case 3:
case 4:
return (m << 4) | m;
default:
assert(compSize <= 8);
return m;
}
}
 
// Used when coalescing moves. The non-compound value will become one, e.g.:
// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 }
// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d
static inline void copyCompound(Value *dst, Value *src)
{
LValue *ldst = dst->asLValue();
LValue *lsrc = src->asLValue();
 
if (ldst->compound && !lsrc->compound) {
LValue *swap = lsrc;
lsrc = ldst;
ldst = swap;
}
 
ldst->compound = lsrc->compound;
ldst->compMask = lsrc->compMask;
}
 
void
GCRA::makeCompound(Instruction *insn, bool split)
{
LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue();
 
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
INFO("makeCompound(split = %i): ", split);
insn->print();
}
 
const unsigned int size = getNode(rep)->colors;
unsigned int base = 0;
 
if (!rep->compound)
rep->compMask = 0xff;
rep->compound = 1;
 
for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) {
LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue();
 
val->compound = 1;
if (!val->compMask)
val->compMask = 0xff;
val->compMask &= makeCompMask(size, base, getNode(val)->colors);
assert(val->compMask);
 
INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n",
rep->id, rep->compMask, val->id, val->compMask);
 
base += getNode(val)->colors;
}
assert(base == size);
}
 
bool
GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
{
int c, n;
 
for (n = 0; n < insns.getSize(); ++n) {
Instruction *i;
Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n));
 
switch (insn->op) {
case OP_PHI:
if (!(mask & JOIN_MASK_PHI))
break;
for (c = 0; insn->srcExists(c); ++c)
if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) {
// this is bad
ERROR("failed to coalesce phi operands\n");
return false;
}
break;
case OP_UNION:
case OP_MERGE:
if (!(mask & JOIN_MASK_UNION))
break;
for (c = 0; insn->srcExists(c); ++c)
coalesceValues(insn->getDef(0), insn->getSrc(c), true);
if (insn->op == OP_MERGE) {
merges.push_back(insn);
if (insn->srcExists(1))
makeCompound(insn, false);
}
break;
case OP_SPLIT:
if (!(mask & JOIN_MASK_UNION))
break;
splits.push_back(insn);
for (c = 0; insn->defExists(c); ++c)
coalesceValues(insn->getSrc(0), insn->getDef(c), true);
makeCompound(insn, true);
break;
case OP_MOV:
if (!(mask & JOIN_MASK_MOV))
break;
i = NULL;
if (!insn->getDef(0)->uses.empty())
i = (*insn->getDef(0)->uses.begin())->getInsn();
// if this is a contraint-move there will only be a single use
if (i && i->op == OP_MERGE) // do we really still need this ?
break;
i = insn->getSrc(0)->getUniqueInsn();
if (i && !i->constrainedDefs()) {
if (coalesceValues(insn->getDef(0), insn->getSrc(0), false))
copyCompound(insn->getSrc(0), insn->getDef(0));
}
break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
case OP_TXF:
case OP_TXQ:
case OP_TXD:
case OP_TXG:
case OP_TXLQ:
case OP_TEXCSAA:
case OP_TEXPREP:
if (!(mask & JOIN_MASK_TEX))
break;
for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
coalesceValues(insn->getDef(c), insn->getSrc(c), true);
break;
default:
break;
}
}
return true;
}
 
void
GCRA::RIG_Node::addInterference(RIG_Node *node)
{
this->degree += relDegree[node->colors][colors];
node->degree += relDegree[colors][node->colors];
 
this->attach(node, Graph::Edge::CROSS);
}
 
void
GCRA::RIG_Node::addRegPreference(RIG_Node *node)
{
prefRegs.push_back(node);
}
 
GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
func(fn),
regs(fn->getProgram()->getTarget()),
spill(spill)
{
prog = func->getProgram();
 
// initialize relative degrees array - i takes away from j
for (int i = 1; i <= 16; ++i)
for (int j = 1; j <= 16; ++j)
relDegree[i][j] = j * ((i + j - 1) / j);
}
 
GCRA::~GCRA()
{
if (nodes)
delete[] nodes;
}
 
void
GCRA::checkList(std::list<RIG_Node *>& lst)
{
GCRA::RIG_Node *prev = NULL;
 
for (std::list<RIG_Node *>::iterator it = lst.begin();
it != lst.end();
++it) {
assert((*it)->getValue()->join == (*it)->getValue());
if (prev)
assert(prev->livei.begin() <= (*it)->livei.begin());
prev = *it;
}
}
 
void
GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node)
{
if (node->livei.isEmpty())
return;
// only the intervals of joined values don't necessarily arrive in order
std::list<RIG_Node *>::iterator prev, it;
for (it = list.end(); it != list.begin(); it = prev) {
prev = it;
--prev;
if ((*prev)->livei.begin() <= node->livei.begin())
break;
}
list.insert(it, node);
}
 
void
GCRA::buildRIG(ArrayList& insns)
{
std::list<RIG_Node *> values, active;
 
for (std::deque<ValueDef>::iterator it = func->ins.begin();
it != func->ins.end(); ++it)
insertOrderedTail(values, getNode(it->get()->asLValue()));
 
for (int i = 0; i < insns.getSize(); ++i) {
Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i));
for (int d = 0; insn->defExists(d); ++d)
if (insn->getDef(d)->rep() == insn->getDef(d))
insertOrderedTail(values, getNode(insn->getDef(d)->asLValue()));
}
checkList(values);
 
while (!values.empty()) {
RIG_Node *cur = values.front();
 
for (std::list<RIG_Node *>::iterator it = active.begin();
it != active.end();) {
RIG_Node *node = *it;
 
if (node->livei.end() <= cur->livei.begin()) {
it = active.erase(it);
} else {
if (node->f == cur->f && node->livei.overlaps(cur->livei))
cur->addInterference(node);
++it;
}
}
values.pop_front();
active.push_back(cur);
}
}
 
void
GCRA::calculateSpillWeights()
{
for (unsigned int i = 0; i < nodeCount; ++i) {
RIG_Node *const n = &nodes[i];
if (!nodes[i].colors || nodes[i].livei.isEmpty())
continue;
if (nodes[i].reg >= 0) {
// update max reg
regs.occupy(n->f, n->reg, n->colors);
continue;
}
LValue *val = nodes[i].getValue();
 
if (!val->noSpill) {
int rc = 0;
for (Value::DefIterator it = val->defs.begin();
it != val->defs.end();
++it)
rc += (*it)->get()->refCount();
 
nodes[i].weight =
(float)rc * (float)rc / (float)nodes[i].livei.extent();
}
 
if (nodes[i].degree < nodes[i].degreeLimit) {
int l = 0;
if (val->reg.size > 4)
l = 1;
DLLIST_ADDHEAD(&lo[l], &nodes[i]);
} else {
DLLIST_ADDHEAD(&hi, &nodes[i]);
}
}
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
printNodeInfo();
}
 
void
GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b)
{
bool move = b->degree >= b->degreeLimit;
 
INFO_DBG(prog->dbgFlags, REG_ALLOC,
"edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
a->getValue()->id, a->degree, a->degreeLimit,
b->getValue()->id, b->degree, b->degreeLimit);
 
b->degree -= relDegree[a->colors][b->colors];
 
move = move && b->degree < b->degreeLimit;
if (move && !DLLIST_EMPTY(b)) {
int l = (b->getValue()->reg.size > 4) ? 1 : 0;
DLLIST_DEL(b);
DLLIST_ADDTAIL(&lo[l], b);
}
}
 
void
GCRA::simplifyNode(RIG_Node *node)
{
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
simplifyEdge(node, RIG_Node::get(ei));
 
for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
simplifyEdge(node, RIG_Node::get(ei));
 
DLLIST_DEL(node);
stack.push(node->getValue()->id);
 
INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n",
node->getValue()->id,
(node->degree < node->degreeLimit) ? "" : "(spill)");
}
 
void
GCRA::simplify()
{
for (;;) {
if (!DLLIST_EMPTY(&lo[0])) {
do {
simplifyNode(lo[0].next);
} while (!DLLIST_EMPTY(&lo[0]));
} else
if (!DLLIST_EMPTY(&lo[1])) {
simplifyNode(lo[1].next);
} else
if (!DLLIST_EMPTY(&hi)) {
RIG_Node *best = hi.next;
float bestScore = best->weight / (float)best->degree;
// spill candidate
for (RIG_Node *it = best->next; it != &hi; it = it->next) {
float score = it->weight / (float)it->degree;
if (score < bestScore) {
best = it;
bestScore = score;
}
}
if (isinf(bestScore)) {
ERROR("no viable spill candidates left\n");
break;
}
simplifyNode(best);
} else {
break;
}
}
}
 
void
GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei)
{
const RIG_Node *intf = RIG_Node::get(ei);
 
if (intf->reg < 0)
return;
const LValue *vA = node->getValue();
const LValue *vB = intf->getValue();
 
const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7);
 
if (vA->compound | vB->compound) {
// NOTE: this only works for >aligned< register tuples !
for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) {
for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) {
const LValue *vD = (*D)->get()->asLValue();
const LValue *vd = (*d)->get()->asLValue();
 
if (!vD->livei.overlaps(vd->livei)) {
INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n",
vD->id, vd->id);
continue;
}
 
uint8_t mask = vD->compound ? vD->compMask : ~0;
if (vd->compound) {
assert(vB->compound);
mask &= vd->compMask & vB->compMask;
} else {
mask &= intfMask;
}
 
INFO_DBG(prog->dbgFlags, REG_ALLOC,
"(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
vD->id,
vD->compound ? vD->compMask : 0xff,
vd->id,
vd->compound ? vd->compMask : intfMask,
vB->compMask, intf->reg & ~7, mask);
if (mask)
regs.occupyMask(node->f, intf->reg & ~7, mask);
}
}
} else {
INFO_DBG(prog->dbgFlags, REG_ALLOC,
"(%%%i) X (%%%i): $r%i + %u\n",
vA->id, vB->id, intf->reg, intf->colors);
regs.occupy(node->f, intf->reg, intf->colors);
}
}
 
bool
GCRA::selectRegisters()
{
INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n");
 
while (!stack.empty()) {
RIG_Node *node = &nodes[stack.top()];
stack.pop();
 
regs.reset(node->f);
 
INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n",
node->getValue()->id, node->colors);
 
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
checkInterference(node, ei);
for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
checkInterference(node, ei);
 
if (!node->prefRegs.empty()) {
for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin();
it != node->prefRegs.end();
++it) {
if ((*it)->reg >= 0 &&
regs.testOccupy(node->f, (*it)->reg, node->colors)) {
node->reg = (*it)->reg;
break;
}
}
}
if (node->reg >= 0)
continue;
LValue *lval = node->getValue();
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
regs.print();
bool ret = regs.assign(node->reg, node->f, node->colors);
if (ret) {
INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
lval->compMask = node->getCompMask();
} else {
INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n",
lval->id, lval->reg.size);
Symbol *slot = NULL;
if (lval->reg.file == FILE_GPR)
slot = spill.assignSlot(node->livei, lval->reg.size);
mustSpill.push_back(ValuePair(lval, slot));
}
}
if (!mustSpill.empty())
return false;
for (unsigned int i = 0; i < nodeCount; ++i) {
LValue *lval = nodes[i].getValue();
if (nodes[i].reg >= 0 && nodes[i].colors > 0)
lval->reg.data.id =
regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size);
}
return true;
}
 
bool
GCRA::allocateRegisters(ArrayList& insns)
{
bool ret;
 
INFO_DBG(prog->dbgFlags, REG_ALLOC,
"allocateRegisters to %u instructions\n", insns.getSize());
 
nodeCount = func->allLValues.getSize();
nodes = new RIG_Node[nodeCount];
if (!nodes)
return false;
for (unsigned int i = 0; i < nodeCount; ++i) {
LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i));
if (lval) {
nodes[i].init(regs, lval);
RIG.insert(&nodes[i]);
}
}
 
// coalesce first, we use only 1 RIG node for a group of joined values
ret = coalesce(insns);
if (!ret)
goto out;
 
if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
func->printLiveIntervals();
 
buildRIG(insns);
calculateSpillWeights();
simplify();
 
ret = selectRegisters();
if (!ret) {
INFO_DBG(prog->dbgFlags, REG_ALLOC,
"selectRegisters failed, inserting spill code ...\n");
regs.reset(FILE_GPR, true);
spill.run(mustSpill);
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
func->print();
} else {
prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR));
}
 
out:
cleanup(ret);
return ret;
}
 
void
GCRA::cleanup(const bool success)
{
mustSpill.clear();
 
for (ArrayList::Iterator it = func->allLValues.iterator();
!it.end(); it.next()) {
LValue *lval = reinterpret_cast<LValue *>(it.get());
 
lval->livei.clear();
 
lval->compound = 0;
lval->compMask = 0;
 
if (lval->join == lval)
continue;
 
if (success) {
lval->reg.data.id = lval->join->reg.data.id;
} else {
for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
++d)
lval->join->defs.remove(*d);
lval->join = lval;
}
}
 
if (success)
resolveSplitsAndMerges();
splits.clear(); // avoid duplicate entries on next coalesce pass
merges.clear();
 
delete[] nodes;
nodes = NULL;
}
 
Symbol *
SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size)
{
SpillSlot slot;
int32_t offsetBase = stackSize;
int32_t offset;
std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin();
 
if (offsetBase % size)
offsetBase += size - (offsetBase % size);
 
slot.sym = NULL;
 
for (offset = offsetBase; offset < stackSize; offset += size) {
const int32_t entryEnd = offset + size;
while (it != slots.end() && it->offset < offset)
++it;
if (it == slots.end()) // no slots left
break;
std::list<SpillSlot>::iterator bgn = it;
 
while (it != slots.end() && it->offset < entryEnd) {
it->occup.print();
if (it->occup.overlaps(livei))
break;
++it;
}
if (it == slots.end() || it->offset >= entryEnd) {
// fits
for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) {
bgn->occup.insert(livei);
if (bgn->size() == size)
slot.sym = bgn->sym;
}
break;
}
}
if (!slot.sym) {
stackSize = offset + size;
slot.offset = offset;
slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL);
if (!func->stackPtr)
offset += func->tlsBase;
slot.sym->setAddress(NULL, offset);
slot.sym->reg.size = size;
slots.insert(pos, slot)->occup.insert(livei);
}
return slot.sym;
}
 
Value *
SpillCodeInserter::offsetSlot(Value *base, const LValue *lval)
{
if (!lval->compound || (lval->compMask & 0x1))
return base;
Value *slot = cloneShallow(func, base);
 
slot->reg.data.offset += (ffs(lval->compMask) - 1) * lval->reg.size;
slot->reg.size = lval->reg.size;
 
return slot;
}
 
void
SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
{
const DataType ty = typeOfSize(lval->reg.size);
 
slot = offsetSlot(slot, lval);
 
Instruction *st;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
st = new_Instruction(func, OP_STORE, ty);
st->setSrc(0, slot);
st->setSrc(1, lval);
lval->noSpill = 1;
} else {
st = new_Instruction(func, OP_CVT, ty);
st->setDef(0, slot);
st->setSrc(0, lval);
}
defi->bb->insertAfter(defi, st);
}
 
LValue *
SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
{
const DataType ty = typeOfSize(lval->reg.size);
 
slot = offsetSlot(slot, lval);
lval = cloneShallow(func, lval);
 
Instruction *ld;
if (slot->reg.file == FILE_MEMORY_LOCAL) {
lval->noSpill = 1;
ld = new_Instruction(func, OP_LOAD, ty);
} else {
ld = new_Instruction(func, OP_CVT, ty);
}
ld->setDef(0, lval);
ld->setSrc(0, slot);
 
usei->bb->insertBefore(usei, ld);
return lval;
}
 
 
// For each value that is to be spilled, go through all its definitions.
// A value can have multiple definitions if it has been coalesced before.
// For each definition, first go through all its uses and insert an unspill
// instruction before it, then replace the use with the temporary register.
// Unspill can be either a load from memory or simply a move to another
// register file.
// For "Pseudo" instructions (like PHI, SPLIT, MERGE) we can erase the use
// if we have spilled to a memory location, or simply with the new register.
// No load or conversion instruction should be needed.
bool
SpillCodeInserter::run(const std::list<ValuePair>& lst)
{
for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end();
++it) {
LValue *lval = it->first->asLValue();
Symbol *mem = it->second ? it->second->asSym() : NULL;
 
// Keep track of which instructions to delete later. Deleting them
// inside the loop is unsafe since a single instruction may have
// multiple destinations that all need to be spilled (like OP_SPLIT).
std::tr1::unordered_set<Instruction *> to_del;
 
for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
++d) {
Value *slot = mem ?
static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
Value *tmp = NULL;
Instruction *last = NULL;
 
LValue *dval = (*d)->get()->asLValue();
Instruction *defi = (*d)->getInsn();
 
// Unspill at each use *before* inserting spill instructions,
// we don't want to have the spill instructions in the use list here.
while (!dval->uses.empty()) {
ValueRef *u = *dval->uses.begin();
Instruction *usei = u->getInsn();
assert(usei);
if (usei->isPseudo()) {
tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot;
last = NULL;
} else
if (!last || usei != last->next) { // TODO: sort uses
tmp = unspill(usei, dval, slot);
last = usei;
}
u->set(tmp);
}
 
assert(defi);
if (defi->isPseudo()) {
d = lval->defs.erase(d);
--d;
if (slot->reg.file == FILE_MEMORY_LOCAL)
to_del.insert(defi);
else
defi->setDef(0, slot);
} else {
spill(defi, slot, dval);
}
}
 
for (std::tr1::unordered_set<Instruction *>::const_iterator it = to_del.begin();
it != to_del.end(); ++it)
delete_Instruction(func->getProgram(), *it);
}
 
// TODO: We're not trying to reuse old slots in a potential next iteration.
// We have to update the slots' livei intervals to be able to do that.
stackBase = stackSize;
slots.clear();
return true;
}
 
bool
RegAlloc::exec()
{
for (IteratorRef it = prog->calls.iteratorDFS(false);
!it->end(); it->next()) {
func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
 
func->tlsBase = prog->tlsSize;
if (!execFunc())
return false;
prog->tlsSize += func->tlsSize;
}
return true;
}
 
bool
RegAlloc::execFunc()
{
InsertConstraintsPass insertConstr;
PhiMovesPass insertPhiMoves;
ArgumentMovesPass insertArgMoves;
BuildIntervalsPass buildIntervals;
SpillCodeInserter insertSpills(func);
 
GCRA gcra(func, insertSpills);
 
unsigned int i, retries;
bool ret;
 
if (!func->ins.empty()) {
// Insert a nop at the entry so inputs only used by the first instruction
// don't count as having an empty live range.
Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE);
BasicBlock::get(func->cfg.getRoot())->insertHead(nop);
}
 
ret = insertConstr.exec(func);
if (!ret)
goto out;
 
ret = insertPhiMoves.run(func);
if (!ret)
goto out;
 
ret = insertArgMoves.run(func);
if (!ret)
goto out;
 
// TODO: need to fix up spill slot usage ranges to support > 1 retry
for (retries = 0; retries < 3; ++retries) {
if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC))
INFO("Retry: %i\n", retries);
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
func->print();
 
// spilling to registers may add live ranges, need to rebuild everything
ret = true;
for (sequence = func->cfg.nextSequence(), i = 0;
ret && i <= func->loopNestingBound;
sequence = func->cfg.nextSequence(), ++i)
ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
// reset marker
for (ArrayList::Iterator bi = func->allBBlocks.iterator();
!bi.end(); bi.next())
BasicBlock::get(bi)->liveSet.marker = false;
if (!ret)
break;
func->orderInstructions(this->insns);
 
ret = buildIntervals.run(func);
if (!ret)
break;
ret = gcra.allocateRegisters(insns);
if (ret)
break; // success
}
INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret);
 
func->tlsSize = insertSpills.getStackSize();
out:
return ret;
}
 
// TODO: check if modifying Instruction::join here breaks anything
void
GCRA::resolveSplitsAndMerges()
{
for (std::list<Instruction *>::iterator it = splits.begin();
it != splits.end();
++it) {
Instruction *split = *it;
unsigned int reg = regs.idToBytes(split->getSrc(0));
for (int d = 0; split->defExists(d); ++d) {
Value *v = split->getDef(d);
v->reg.data.id = regs.bytesToId(v, reg);
v->join = v;
reg += v->reg.size;
}
}
splits.clear();
 
for (std::list<Instruction *>::iterator it = merges.begin();
it != merges.end();
++it) {
Instruction *merge = *it;
unsigned int reg = regs.idToBytes(merge->getDef(0));
for (int s = 0; merge->srcExists(s); ++s) {
Value *v = merge->getSrc(s);
v->reg.data.id = regs.bytesToId(v, reg);
v->join = v;
// If the value is defined by a phi/union node, we also need to
// perform the same fixup on that node's sources, since after RA
// their registers should be identical.
if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) {
Instruction *phi = v->getInsn();
for (int phis = 0; phi->srcExists(phis); ++phis)
phi->getSrc(phis)->join = v;
}
reg += v->reg.size;
}
}
merges.clear();
}
 
bool Program::registerAllocation()
{
RegAlloc ra(this);
return ra.exec();
}
 
bool
RegAlloc::InsertConstraintsPass::exec(Function *ir)
{
constrList.clear();
 
bool ret = run(ir, true, true);
if (ret)
ret = insertConstraintMoves();
return ret;
}
 
// TODO: make part of texture insn
void
RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
{
Value *def[4];
int c, k, d;
uint8_t mask = 0;
 
for (d = 0, k = 0, c = 0; c < 4; ++c) {
if (!(tex->tex.mask & (1 << c)))
continue;
if (tex->getDef(k)->refCount()) {
mask |= 1 << c;
def[d++] = tex->getDef(k);
}
++k;
}
tex->tex.mask = mask;
 
for (c = 0; c < d; ++c)
tex->setDef(c, def[c]);
for (; c < 4; ++c)
tex->setDef(c, NULL);
}
 
bool
RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
{
Value *v = cst->getSrc(s);
 
// current register allocation can't handle it if a value participates in
// multiple constraints
for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
if (cst != (*it)->getInsn())
return true;
}
 
// can start at s + 1 because detectConflict is called on all sources
for (int c = s + 1; cst->srcExists(c); ++c)
if (v == cst->getSrc(c))
return true;
 
Instruction *defi = v->getInsn();
 
return (!defi || defi->constrainedDefs());
}
 
void
RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
{
Instruction *cst;
int d;
 
// first, look for an existing identical constraint op
for (std::list<Instruction *>::iterator it = constrList.begin();
it != constrList.end();
++it) {
cst = (*it);
if (!i->bb->dominatedBy(cst->bb))
break;
for (d = 0; d < n; ++d)
if (cst->getSrc(d) != i->getSrc(d + s))
break;
if (d >= n) {
for (d = 0; d < n; ++d, ++s)
i->setSrc(s, cst->getDef(d));
return;
}
}
cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
 
for (d = 0; d < n; ++s, ++d) {
cst->setDef(d, new_LValue(func, FILE_GPR));
cst->setSrc(d, i->getSrc(s));
i->setSrc(s, cst->getDef(d));
}
i->bb->insertBefore(i, cst);
 
constrList.push_back(cst);
}
 
// Add a dummy use of the pointer source of >= 8 byte loads after the load
// to prevent it from being assigned a register which overlapping the load's
// destination, which would produce random corruptions.
void
RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
{
Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
hzd->setSrc(0, src->get());
i->bb->insertAfter(i, hzd);
 
}
 
// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
void
RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
{
uint8_t size = 0;
int n;
for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n)
size += insn->getDef(n)->reg.size;
if (n < 2)
return;
LValue *lval = new_LValue(func, FILE_GPR);
lval->reg.size = size;
 
Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
split->setSrc(0, lval);
for (int d = 0; d < n; ++d) {
split->setDef(d, insn->getDef(d));
insn->setDef(d, NULL);
}
insn->setDef(0, lval);
 
for (int k = 1, d = n; insn->defExists(d); ++d, ++k) {
insn->setDef(k, insn->getDef(d));
insn->setDef(d, NULL);
}
// carry over predicate if any (mainly for OP_UNION uses)
split->setPredicate(insn->cc, insn->getPredicate());
 
insn->bb->insertAfter(insn, split);
constrList.push_back(split);
}
void
RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
const int a, const int b)
{
uint8_t size = 0;
if (a >= b)
return;
for (int s = a; s <= b; ++s)
size += insn->getSrc(s)->reg.size;
if (!size)
return;
LValue *lval = new_LValue(func, FILE_GPR);
lval->reg.size = size;
 
Value *save[3];
insn->takeExtraSources(0, save);
 
Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size));
merge->setDef(0, lval);
for (int s = a, i = 0; s <= b; ++s, ++i) {
merge->setSrc(i, insn->getSrc(s));
insn->setSrc(s, NULL);
}
insn->setSrc(a, lval);
 
for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) {
insn->setSrc(k, insn->getSrc(s));
insn->setSrc(s, NULL);
}
insn->bb->insertBefore(insn, merge);
 
insn->putExtraSources(0, save);
 
constrList.push_back(merge);
}
 
void
RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
{
int n, s;
 
if (isTextureOp(tex->op))
textureMask(tex);
condenseDefs(tex);
 
if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1);
} else
if (isTextureOp(tex->op)) {
if (tex->op != OP_TXQ) {
s = tex->tex.target.getArgCount() - tex->tex.target.isMS();
if (tex->op == OP_TXD) {
// Indirect handle belongs in the first arg
if (tex->tex.rIndirectSrc >= 0)
s++;
if (!tex->tex.target.isArray() && tex->tex.useOffsets)
s++;
}
n = tex->srcCount(0xff) - s;
} else {
s = tex->srcCount(0xff);
n = 0;
}
 
if (s > 1)
condenseSrcs(tex, 0, s - 1);
if (n > 1) // NOTE: first call modified positions already
condenseSrcs(tex, 1, n);
}
}
 
void
RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
{
if (isTextureOp(tex->op))
textureMask(tex);
condenseDefs(tex);
 
if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) {
condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1);
} else
if (isTextureOp(tex->op)) {
int n = tex->srcCount(0xff, true);
if (n > 4) {
condenseSrcs(tex, 0, 3);
if (n > 5) // NOTE: first call modified positions already
condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
} else
if (n > 1) {
condenseSrcs(tex, 0, n - 1);
}
}
}
 
void
RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
{
int n, s;
 
textureMask(tex);
 
if (tex->op == OP_TXQ) {
s = tex->srcCount(0xff);
n = 0;
} else {
s = tex->tex.target.getArgCount() - tex->tex.target.isMS();
if (!tex->tex.target.isArray() &&
(tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
++s;
if (tex->op == OP_TXD && tex->tex.useOffsets)
++s;
n = tex->srcCount(0xff) - s;
assert(n <= 4);
}
 
if (s > 1)
condenseSrcs(tex, 0, s - 1);
if (n > 1) // NOTE: first call modified positions already
condenseSrcs(tex, 1, n);
 
condenseDefs(tex);
}
 
void
RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
{
Value *pred = tex->getPredicate();
if (pred)
tex->setPredicate(tex->cc, NULL);
 
textureMask(tex);
 
assert(tex->defExists(0) && tex->srcExists(0));
// make src and def count match
int c;
for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
if (!tex->srcExists(c))
tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
if (!tex->defExists(c))
tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
}
if (pred)
tex->setPredicate(tex->cc, pred);
condenseDefs(tex);
condenseSrcs(tex, 0, c - 1);
}
 
// Insert constraint markers for instructions whose multiple sources must be
// located in consecutive registers.
bool
RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
{
TexInstruction *tex;
Instruction *next;
int s, size;
 
targ = bb->getProgram()->getTarget();
 
for (Instruction *i = bb->getEntry(); i; i = next) {
next = i->next;
 
if ((tex = i->asTex())) {
switch (targ->getChipset() & ~0xf) {
case 0x50:
case 0x80:
case 0x90:
case 0xa0:
texConstraintNV50(tex);
break;
case 0xc0:
case 0xd0:
texConstraintNVC0(tex);
break;
case 0xe0:
case 0xf0:
case 0x100:
texConstraintNVE0(tex);
break;
case 0x110:
texConstraintGM107(tex);
break;
default:
break;
}
} else
if (i->op == OP_EXPORT || i->op == OP_STORE) {
for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
assert(i->srcExists(s));
size -= i->getSrc(s)->reg.size;
}
condenseSrcs(i, 1, s - 1);
} else
if (i->op == OP_LOAD || i->op == OP_VFETCH) {
condenseDefs(i);
if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
addHazard(i, i->src(0).getIndirect(0));
} else
if (i->op == OP_UNION ||
i->op == OP_MERGE ||
i->op == OP_SPLIT) {
constrList.push_back(i);
}
}
return true;
}
 
// Insert extra moves so that, if multiple register constraints on a value are
// in conflict, these conflicts can be resolved.
bool
RegAlloc::InsertConstraintsPass::insertConstraintMoves()
{
for (std::list<Instruction *>::iterator it = constrList.begin();
it != constrList.end();
++it) {
Instruction *cst = *it;
Instruction *mov;
 
if (cst->op == OP_SPLIT && 0) {
// spilling splits is annoying, just make sure they're separate
for (int d = 0; cst->defExists(d); ++d) {
if (!cst->getDef(d)->refCount())
continue;
LValue *lval = new_LValue(func, cst->def(d).getFile());
const uint8_t size = cst->def(d).getSize();
lval->reg.size = size;
 
mov = new_Instruction(func, OP_MOV, typeOfSize(size));
mov->setSrc(0, lval);
mov->setDef(0, cst->getDef(d));
cst->setDef(d, mov->getSrc(0));
cst->bb->insertAfter(cst, mov);
 
cst->getSrc(0)->asLValue()->noSpill = 1;
mov->getSrc(0)->asLValue()->noSpill = 1;
}
} else
if (cst->op == OP_MERGE || cst->op == OP_UNION) {
for (int s = 0; cst->srcExists(s); ++s) {
const uint8_t size = cst->src(s).getSize();
 
if (!cst->getSrc(s)->defs.size()) {
mov = new_Instruction(func, OP_NOP, typeOfSize(size));
mov->setDef(0, cst->getSrc(s));
cst->bb->insertBefore(cst, mov);
continue;
}
assert(cst->getSrc(s)->defs.size() == 1); // still SSA
 
Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
// catch some cases where don't really need MOVs
if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs())
continue;
 
LValue *lval = new_LValue(func, cst->src(s).getFile());
lval->reg.size = size;
 
mov = new_Instruction(func, OP_MOV, typeOfSize(size));
mov->setDef(0, lval);
mov->setSrc(0, cst->getSrc(s));
cst->setSrc(s, mov->getDef(0));
cst->bb->insertBefore(cst, mov);
 
cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help
 
if (cst->op == OP_UNION)
mov->setPredicate(defi->cc, defi->getPredicate());
}
}
}
 
return true;
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp
0,0 → 1,552
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target.h"
 
namespace nv50_ir {
 
// Converts nv50 IR generated from TGSI to SSA form.
 
// DominatorTree implements an algorithm for finding immediate dominators,
// as described by T. Lengauer & R. Tarjan.
class DominatorTree : public Graph
{
public:
DominatorTree(Graph *cfg);
~DominatorTree() { }
 
bool dominates(BasicBlock *, BasicBlock *);
 
void findDominanceFrontiers();
 
private:
void build();
void buildDFS(Node *);
 
void squash(int);
inline void link(int, int);
inline int eval(int);
 
void debugPrint();
 
Graph *cfg;
 
Node **vert;
int *data;
const int count;
 
#define SEMI(i) (data[(i) + 0 * count])
#define ANCESTOR(i) (data[(i) + 1 * count])
#define PARENT(i) (data[(i) + 2 * count])
#define LABEL(i) (data[(i) + 3 * count])
#define DOM(i) (data[(i) + 4 * count])
};
 
void DominatorTree::debugPrint()
{
for (int i = 0; i < count; ++i) {
INFO("SEMI(%i) = %i\n", i, SEMI(i));
INFO("ANCESTOR(%i) = %i\n", i, ANCESTOR(i));
INFO("PARENT(%i) = %i\n", i, PARENT(i));
INFO("LABEL(%i) = %i\n", i, LABEL(i));
INFO("DOM(%i) = %i\n", i, DOM(i));
}
}
 
DominatorTree::DominatorTree(Graph *cfgraph) : cfg(cfgraph),
count(cfg->getSize())
{
int i = 0;
 
vert = new Node * [count];
data = new int[5 * count];
 
for (IteratorRef it = cfg->iteratorDFS(true); !it->end(); it->next(), ++i) {
vert[i] = reinterpret_cast<Node *>(it->get());
vert[i]->tag = i;
LABEL(i) = i;
SEMI(i) = ANCESTOR(i) = -1;
}
 
build();
 
delete[] vert;
delete[] data;
}
 
void DominatorTree::buildDFS(Graph::Node *node)
{
SEMI(node->tag) = node->tag;
 
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) {
if (SEMI(ei.getNode()->tag) < 0) {
buildDFS(ei.getNode());
PARENT(ei.getNode()->tag) = node->tag;
}
}
}
 
void DominatorTree::squash(int v)
{
if (ANCESTOR(ANCESTOR(v)) >= 0) {
squash(ANCESTOR(v));
 
if (SEMI(LABEL(ANCESTOR(v))) < SEMI(LABEL(v)))
LABEL(v) = LABEL(ANCESTOR(v));
ANCESTOR(v) = ANCESTOR(ANCESTOR(v));
}
}
 
int DominatorTree::eval(int v)
{
if (ANCESTOR(v) < 0)
return v;
squash(v);
return LABEL(v);
}
 
void DominatorTree::link(int v, int w)
{
ANCESTOR(w) = v;
}
 
void DominatorTree::build()
{
DLList *bucket = new DLList[count];
Node *nv, *nw;
int p, u, v, w;
 
buildDFS(cfg->getRoot());
 
for (w = count - 1; w >= 1; --w) {
nw = vert[w];
assert(nw->tag == w);
for (Graph::EdgeIterator ei = nw->incident(); !ei.end(); ei.next()) {
nv = ei.getNode();
v = nv->tag;
u = eval(v);
if (SEMI(u) < SEMI(w))
SEMI(w) = SEMI(u);
}
p = PARENT(w);
bucket[SEMI(w)].insert(nw);
link(p, w);
 
for (DLList::Iterator it = bucket[p].iterator(); !it.end(); it.erase()) {
v = reinterpret_cast<Node *>(it.get())->tag;
u = eval(v);
DOM(v) = (SEMI(u) < SEMI(v)) ? u : p;
}
}
for (w = 1; w < count; ++w) {
if (DOM(w) != SEMI(w))
DOM(w) = DOM(DOM(w));
}
DOM(0) = 0;
 
insert(&BasicBlock::get(cfg->getRoot())->dom);
do {
p = 0;
for (v = 1; v < count; ++v) {
nw = &BasicBlock::get(vert[DOM(v)])->dom;;
nv = &BasicBlock::get(vert[v])->dom;
if (nw->getGraph() && !nv->getGraph()) {
++p;
nw->attach(nv, Graph::Edge::TREE);
}
}
} while (p);
 
delete[] bucket;
}
 
#undef SEMI
#undef ANCESTOR
#undef PARENT
#undef LABEL
#undef DOM
 
void DominatorTree::findDominanceFrontiers()
{
BasicBlock *bb;
 
for (IteratorRef dtIt = iteratorDFS(false); !dtIt->end(); dtIt->next()) {
EdgeIterator succIt, chldIt;
 
bb = BasicBlock::get(reinterpret_cast<Node *>(dtIt->get()));
bb->getDF().clear();
 
for (succIt = bb->cfg.outgoing(); !succIt.end(); succIt.next()) {
BasicBlock *dfLocal = BasicBlock::get(succIt.getNode());
if (dfLocal->idom() != bb)
bb->getDF().insert(dfLocal);
}
 
for (chldIt = bb->dom.outgoing(); !chldIt.end(); chldIt.next()) {
BasicBlock *cb = BasicBlock::get(chldIt.getNode());
 
DLList::Iterator dfIt = cb->getDF().iterator();
for (; !dfIt.end(); dfIt.next()) {
BasicBlock *dfUp = BasicBlock::get(dfIt);
if (dfUp->idom() != bb)
bb->getDF().insert(dfUp);
}
}
}
}
 
// liveIn(bb) = usedBeforeAssigned(bb) U (liveOut(bb) - assigned(bb))
void
Function::buildLiveSetsPreSSA(BasicBlock *bb, const int seq)
{
Function *f = bb->getFunction();
BitSet usedBeforeAssigned(allLValues.getSize(), true);
BitSet assigned(allLValues.getSize(), true);
 
bb->liveSet.allocate(allLValues.getSize(), false);
 
int n = 0;
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
BasicBlock *out = BasicBlock::get(ei.getNode());
if (out == bb)
continue;
if (out->cfg.visit(seq))
buildLiveSetsPreSSA(out, seq);
if (!n++)
bb->liveSet = out->liveSet;
else
bb->liveSet |= out->liveSet;
}
if (!n && !bb->liveSet.marker)
bb->liveSet.fill(0);
bb->liveSet.marker = true;
 
for (Instruction *i = bb->getEntry(); i; i = i->next) {
for (int s = 0; i->srcExists(s); ++s)
if (i->getSrc(s)->asLValue() && !assigned.test(i->getSrc(s)->id))
usedBeforeAssigned.set(i->getSrc(s)->id);
for (int d = 0; i->defExists(d); ++d)
assigned.set(i->getDef(d)->id);
}
 
if (bb == BasicBlock::get(f->cfgExit)) {
for (std::deque<ValueRef>::iterator it = f->outs.begin();
it != f->outs.end(); ++it) {
if (!assigned.test(it->get()->id))
usedBeforeAssigned.set(it->get()->id);
}
}
 
bb->liveSet.andNot(assigned);
bb->liveSet |= usedBeforeAssigned;
}
 
void
Function::buildDefSetsPreSSA(BasicBlock *bb, const int seq)
{
bb->defSet.allocate(allLValues.getSize(), !bb->liveSet.marker);
bb->liveSet.marker = true;
 
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
BasicBlock *in = BasicBlock::get(ei.getNode());
 
if (in->cfg.visit(seq))
buildDefSetsPreSSA(in, seq);
 
bb->defSet |= in->defSet;
}
 
for (Instruction *i = bb->getEntry(); i; i = i->next) {
for (int d = 0; i->defExists(d); ++d)
bb->defSet.set(i->getDef(d)->id);
}
}
 
class RenamePass
{
public:
RenamePass(Function *);
~RenamePass();
 
bool run();
void search(BasicBlock *);
 
inline LValue *getStackTop(Value *);
 
LValue *mkUndefined(Value *);
 
private:
Stack *stack;
Function *func;
Program *prog;
};
 
bool
Program::convertToSSA()
{
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
Function *fn = reinterpret_cast<Function *>(fi.get());
if (!fn->convertToSSA())
return false;
}
return true;
}
 
// XXX: add edge from entry to exit ?
 
// Efficiently Computing Static Single Assignment Form and
// the Control Dependence Graph,
// R. Cytron, J. Ferrante, B. K. Rosen, M. N. Wegman, F. K. Zadeck
bool
Function::convertToSSA()
{
// 0. calculate live in variables (for pruned SSA)
buildLiveSets();
 
// 1. create the dominator tree
domTree = new DominatorTree(&cfg);
reinterpret_cast<DominatorTree *>(domTree)->findDominanceFrontiers();
 
// 2. insert PHI functions
DLList workList;
LValue *lval;
BasicBlock *bb;
int var;
int iterCount = 0;
int *hasAlready = new int[allBBlocks.getSize() * 2];
int *work = &hasAlready[allBBlocks.getSize()];
 
memset(hasAlready, 0, allBBlocks.getSize() * 2 * sizeof(int));
 
// for each variable
for (var = 0; var < allLValues.getSize(); ++var) {
if (!allLValues.get(var))
continue;
lval = reinterpret_cast<Value *>(allLValues.get(var))->asLValue();
if (!lval || lval->defs.empty())
continue;
++iterCount;
 
// TODO: don't add phi functions for values that aren't used outside
// the BB they're defined in
 
// gather blocks with assignments to lval in workList
for (Value::DefIterator d = lval->defs.begin();
d != lval->defs.end(); ++d) {
bb = ((*d)->getInsn() ? (*d)->getInsn()->bb : NULL);
if (!bb)
continue; // instruction likely been removed but not XXX deleted
 
if (work[bb->getId()] == iterCount)
continue;
work[bb->getId()] = iterCount;
workList.insert(bb);
}
 
// for each block in workList, insert a phi for lval in the block's
// dominance frontier (if we haven't already done so)
for (DLList::Iterator wI = workList.iterator(); !wI.end(); wI.erase()) {
bb = BasicBlock::get(wI);
 
DLList::Iterator dfIter = bb->getDF().iterator();
for (; !dfIter.end(); dfIter.next()) {
Instruction *phi;
BasicBlock *dfBB = BasicBlock::get(dfIter);
 
if (hasAlready[dfBB->getId()] >= iterCount)
continue;
hasAlready[dfBB->getId()] = iterCount;
 
// pruned SSA: don't need a phi if the value is not live-in
if (!dfBB->liveSet.test(lval->id))
continue;
 
phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size));
dfBB->insertTail(phi);
 
phi->setDef(0, lval);
for (int s = 0; s < dfBB->cfg.incidentCount(); ++s)
phi->setSrc(s, lval);
 
if (work[dfBB->getId()] < iterCount) {
work[dfBB->getId()] = iterCount;
wI.insert(dfBB);
}
}
}
}
delete[] hasAlready;
 
RenamePass rename(this);
return rename.run();
}
 
RenamePass::RenamePass(Function *fn) : func(fn), prog(fn->getProgram())
{
stack = new Stack[func->allLValues.getSize()];
}
 
RenamePass::~RenamePass()
{
if (stack)
delete[] stack;
}
 
LValue *
RenamePass::getStackTop(Value *val)
{
if (!stack[val->id].getSize())
return 0;
return reinterpret_cast<LValue *>(stack[val->id].peek().u.p);
}
 
LValue *
RenamePass::mkUndefined(Value *val)
{
LValue *lval = val->asLValue();
assert(lval);
LValue *ud = new_LValue(func, lval);
Instruction *nop = new_Instruction(func, OP_NOP, typeOfSize(lval->reg.size));
nop->setDef(0, ud);
BasicBlock::get(func->cfg.getRoot())->insertHead(nop);
return ud;
}
 
bool RenamePass::run()
{
if (!stack)
return false;
search(BasicBlock::get(func->domTree->getRoot()));
 
return true;
}
 
// Go through BBs in dominance order, create new values for each definition,
// and replace all sources with their current new values.
//
// NOTE: The values generated for function inputs/outputs have no connection
// to their corresponding outputs/inputs in other functions. Only allocation
// of physical registers will establish this connection.
//
void RenamePass::search(BasicBlock *bb)
{
LValue *lval, *ssa;
int d, s;
const Target *targ = prog->getTarget();
 
// Put current definitions for function inputs values on the stack.
// They can be used before any redefinitions are pushed.
if (bb == BasicBlock::get(func->cfg.getRoot())) {
for (std::deque<ValueDef>::iterator it = func->ins.begin();
it != func->ins.end(); ++it) {
lval = it->get()->asLValue();
assert(lval);
 
ssa = new_LValue(func, targ->nativeFile(lval->reg.file));
ssa->reg.size = lval->reg.size;
ssa->reg.data.id = lval->reg.data.id;
 
it->setSSA(ssa);
stack[lval->id].push(ssa);
}
}
 
for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) {
// PHI sources get definitions from the passes through the incident BBs,
// so skip them here.
if (stmt->op != OP_PHI) {
for (s = 0; stmt->srcExists(s); ++s) {
lval = stmt->getSrc(s)->asLValue();
if (!lval)
continue;
// Values on the stack created in previously visited blocks, and
// function inputs, will be valid because they dominate this one.
lval = getStackTop(lval);
if (!lval)
lval = mkUndefined(stmt->getSrc(s));
stmt->setSrc(s, lval);
}
}
for (d = 0; stmt->defExists(d); ++d) {
lval = stmt->def(d).get()->asLValue();
assert(lval);
stmt->def(d).setSSA(
new_LValue(func, targ->nativeFile(lval->reg.file)));
stmt->def(d).get()->reg.size = lval->reg.size;
stmt->def(d).get()->reg.data.id = lval->reg.data.id;
stack[lval->id].push(stmt->def(d).get());
}
}
 
// Update sources of PHI ops corresponding to this BB in outgoing BBs.
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
Instruction *phi;
int p = 0;
BasicBlock *sb = BasicBlock::get(ei.getNode());
 
// which predecessor of sb is bb ?
for (Graph::EdgeIterator ei = sb->cfg.incident(); !ei.end(); ei.next()) {
if (ei.getNode() == &bb->cfg)
break;
++p;
}
assert(p < sb->cfg.incidentCount());
 
for (phi = sb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
lval = getStackTop(phi->getSrc(p));
if (!lval)
lval = mkUndefined(phi->getSrc(p));
phi->setSrc(p, lval);
}
}
 
// Visit the BBs we dominate.
for (Graph::EdgeIterator ei = bb->dom.outgoing(); !ei.end(); ei.next())
search(BasicBlock::get(ei.getNode()));
 
// Update function outputs to the last definitions of their pre-SSA values.
// I hope they're unique, i.e. that we get PHIs for all of them ...
if (bb == BasicBlock::get(func->cfgExit)) {
for (std::deque<ValueRef>::iterator it = func->outs.begin();
it != func->outs.end(); ++it) {
lval = it->get()->asLValue();
if (!lval)
continue;
lval = getStackTop(lval);
if (!lval)
lval = mkUndefined(it->get());
it->set(lval);
}
}
 
// Pop the values we created in this block from the stack because we will
// return to blocks that we do not dominate.
for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) {
if (stmt->op == OP_NOP)
continue;
for (d = 0; stmt->defExists(d); ++d)
stack[stmt->def(d).preSSA()->id].pop();
}
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
0,0 → 1,483
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target.h"
 
namespace nv50_ir {
 
const uint8_t Target::operationSrcNr[] =
{
0, 0, // NOP, PHI
0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
1, 1, 2, // MOV, LOAD, STORE
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
1, 1, 1, // ABS, NEG, NOT
2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
2, 2, 1, // MAX, MIN, SAT
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
0, 0, 0, // PRERET,CONT,BREAK
0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
1, 1, // EMIT, RESTART
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
3, // SHFL
0
};
 
const OpClass Target::operationClass[] =
{
// NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
OPCLASS_OTHER,
OPCLASS_PSEUDO,
OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
// MOV; LOAD; STORE
OPCLASS_MOVE,
OPCLASS_LOAD,
OPCLASS_STORE,
// ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
// ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
OPCLASS_CONVERT, OPCLASS_CONVERT,
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
OPCLASS_SHIFT, OPCLASS_SHIFT,
// MAX, MIN
OPCLASS_COMPARE, OPCLASS_COMPARE,
// SAT, CEIL, FLOOR, TRUNC; CVT
OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
OPCLASS_CONVERT,
// SET(AND,OR,XOR); SELP, SLCT
OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
OPCLASS_COMPARE, OPCLASS_COMPARE,
// RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
OPCLASS_SFU, OPCLASS_SFU,
// BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
// DISCARD, EXIT
OPCLASS_FLOW, OPCLASS_FLOW,
// MEMBAR
OPCLASS_CONTROL,
// VFETCH, PFETCH, EXPORT
OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
// LINTERP, PINTERP
OPCLASS_SFU, OPCLASS_SFU,
// EMIT, RESTART
OPCLASS_CONTROL, OPCLASS_CONTROL,
// TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
OPCLASS_TEXTURE, OPCLASS_TEXTURE,
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
// SUBFM, SUCLAMP, SUEAU, MADSP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
// POPCNT, INSBF, EXTBF, BFIND; PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
// VSAD, VSET, VSHR, VSHL
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
// VSEL, CCTL
OPCLASS_VECTOR, OPCLASS_CONTROL,
// SHFL
OPCLASS_OTHER,
OPCLASS_PSEUDO // LAST
};
 
 
extern Target *getTargetGM107(unsigned int chipset);
extern Target *getTargetNVC0(unsigned int chipset);
extern Target *getTargetNV50(unsigned int chipset);
 
Target *Target::create(unsigned int chipset)
{
STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1);
STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
switch (chipset & ~0xf) {
case 0x110:
return getTargetGM107(chipset);
case 0xc0:
case 0xd0:
case 0xe0:
case 0xf0:
case 0x100:
return getTargetNVC0(chipset);
case 0x50:
case 0x80:
case 0x90:
case 0xa0:
return getTargetNV50(chipset);
default:
ERROR("unsupported target: NV%x\n", chipset);
return 0;
}
}
 
void Target::destroy(Target *targ)
{
delete targ;
}
 
CodeEmitter::CodeEmitter(const Target *target) : targ(target)
{
}
 
void
CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
{
code = reinterpret_cast<uint32_t *>(ptr);
codeSize = 0;
codeSizeLimit = size;
}
 
void
CodeEmitter::printBinary() const
{
uint32_t *bin = code - codeSize / 4;
INFO("program binary (%u bytes)", codeSize);
for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
if ((pos % 8) == 0)
INFO("\n");
INFO("%08x ", bin[pos]);
}
INFO("\n");
}
 
static inline uint32_t sizeToBundlesNVE4(uint32_t size)
{
return (size + 55) / 56;
}
 
void
CodeEmitter::prepareEmission(Program *prog)
{
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
!fi.end(); fi.next()) {
Function *func = reinterpret_cast<Function *>(fi.get());
func->binPos = prog->binSize;
prepareEmission(func);
 
// adjust sizes & positions for schedulding info:
if (prog->getTarget()->hasSWSched) {
uint32_t adjPos = func->binPos;
BasicBlock *bb = NULL;
for (int i = 0; i < func->bbCount; ++i) {
bb = func->bbArray[i];
int32_t adjSize = bb->binSize;
if (adjPos % 64) {
adjSize -= 64 - adjPos % 64;
if (adjSize < 0)
adjSize = 0;
}
adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
bb->binPos = adjPos;
bb->binSize = adjSize;
adjPos += adjSize;
}
if (bb)
func->binSize = adjPos - func->binPos;
}
 
prog->binSize += func->binSize;
}
}
 
void
CodeEmitter::prepareEmission(Function *func)
{
func->bbCount = 0;
func->bbArray = new BasicBlock * [func->cfg.getSize()];
 
BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
 
for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
prepareEmission(BasicBlock::get(*it));
}
 
void
CodeEmitter::prepareEmission(BasicBlock *bb)
{
Instruction *i, *next;
Function *func = bb->getFunction();
int j;
unsigned int nShort;
 
for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
 
for (; j >= 0; --j) {
BasicBlock *in = func->bbArray[j];
Instruction *exit = in->getExit();
 
if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
in->binSize -= 8;
func->binSize -= 8;
 
for (++j; j < func->bbCount; ++j)
func->bbArray[j]->binPos -= 8;
 
in->remove(exit);
}
bb->binPos = in->binPos + in->binSize;
if (in->binSize) // no more no-op branches to bb
break;
}
func->bbArray[func->bbCount++] = bb;
 
if (!bb->getExit())
return;
 
// determine encoding size, try to group short instructions
nShort = 0;
for (i = bb->getEntry(); i; i = next) {
next = i->next;
 
if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
bb->remove(i);
continue;
}
 
i->encSize = getMinEncodingSize(i);
if (next && i->encSize < 8)
++nShort;
else
if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
if (i->isCommutationLegal(i->next)) {
bb->permuteAdjacent(i, next);
next->encSize = 4;
next = i;
i = i->prev;
++nShort;
} else
if (i->isCommutationLegal(i->prev) && next->next) {
bb->permuteAdjacent(i->prev, i);
next->encSize = 4;
next = next->next;
bb->binSize += 4;
++nShort;
} else {
i->encSize = 8;
i->prev->encSize = 8;
bb->binSize += 4;
nShort = 0;
}
} else {
i->encSize = 8;
if (nShort & 1) {
i->prev->encSize = 8;
bb->binSize += 4;
}
nShort = 0;
}
bb->binSize += i->encSize;
}
 
if (bb->getExit()->encSize == 4) {
assert(nShort);
bb->getExit()->encSize = 8;
bb->binSize += 4;
 
if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
bb->binSize += 8;
bb->getExit()->prev->encSize = 8;
}
}
assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
 
func->binSize += bb->binSize;
}
 
void
Program::emitSymbolTable(struct nv50_ir_prog_info *info)
{
unsigned int n = 0, nMax = allFuncs.getSize();
 
info->bin.syms =
(struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
 
for (ArrayList::Iterator fi = allFuncs.iterator();
!fi.end();
fi.next(), ++n) {
Function *f = (Function *)fi.get();
assert(n < nMax);
 
info->bin.syms[n].label = f->getLabel();
info->bin.syms[n].offset = f->binPos;
}
 
info->bin.numSyms = n;
}
 
bool
Program::emitBinary(struct nv50_ir_prog_info *info)
{
CodeEmitter *emit = target->getCodeEmitter(progType);
 
emit->prepareEmission(this);
 
if (dbgFlags & NV50_IR_DEBUG_BASIC)
this->print();
 
if (!binSize) {
code = NULL;
return false;
}
code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
if (!code)
return false;
emit->setCodeLocation(code, binSize);
 
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
Function *fn = reinterpret_cast<Function *>(fi.get());
 
assert(emit->getCodeSize() == fn->binPos);
 
for (int b = 0; b < fn->bbCount; ++b) {
for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
emit->emitInstruction(i);
if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
info->io.fp64 = true;
}
}
}
info->bin.relocData = emit->getRelocInfo();
 
emitSymbolTable(info);
 
// the nvc0 driver will print the binary iself together with the header
if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
emit->printBinary();
 
delete emit;
return true;
}
 
#define RELOC_ALLOC_INCREMENT 8
 
bool
CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
int s)
{
unsigned int n = relocInfo ? relocInfo->count : 0;
 
if (!(n % RELOC_ALLOC_INCREMENT)) {
size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
relocInfo = reinterpret_cast<RelocInfo *>(
REALLOC(relocInfo, n ? size : 0,
size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
if (!relocInfo)
return false;
if (n == 0)
memset(relocInfo, 0, sizeof(RelocInfo));
}
++relocInfo->count;
 
relocInfo->entry[n].data = data;
relocInfo->entry[n].mask = m;
relocInfo->entry[n].offset = codeSize + w * 4;
relocInfo->entry[n].bitPos = s;
relocInfo->entry[n].type = ty;
 
return true;
}
 
void
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
{
uint32_t value = 0;
 
switch (type) {
case TYPE_CODE: value = info->codePos; break;
case TYPE_BUILTIN: value = info->libPos; break;
case TYPE_DATA: value = info->dataPos; break;
default:
assert(0);
break;
}
value += data;
value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
 
binary[offset / 4] &= ~mask;
binary[offset / 4] |= value & mask;
}
 
} // namespace nv50_ir
 
 
#include "codegen/nv50_ir_driver.h"
 
extern "C" {
 
void
nv50_ir_relocate_code(void *relocData, uint32_t *code,
uint32_t codePos,
uint32_t libPos,
uint32_t dataPos)
{
nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
 
info->codePos = codePos;
info->libPos = libPos;
info->dataPos = dataPos;
 
for (unsigned int i = 0; i < info->count; ++i)
info->entry[i].apply(code, info);
}
 
void
nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size)
{
nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
targ->getBuiltinCode(code, size);
nv50_ir::Target::destroy(targ);
}
 
}
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
0,0 → 1,236
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef __NV50_IR_TARGET_H__
#define __NV50_IR_TARGET_H__
 
#include "codegen/nv50_ir.h"
 
namespace nv50_ir {
 
struct RelocInfo;
 
struct RelocEntry
{
enum Type
{
TYPE_CODE,
TYPE_BUILTIN,
TYPE_DATA
};
 
uint32_t data;
uint32_t mask;
uint32_t offset;
int8_t bitPos;
Type type;
 
inline void apply(uint32_t *binary, const RelocInfo *info) const;
};
 
struct RelocInfo
{
uint32_t codePos;
uint32_t libPos;
uint32_t dataPos;
 
uint32_t count;
 
RelocEntry entry[0];
};
 
class CodeEmitter
{
public:
CodeEmitter(const Target *);
virtual ~CodeEmitter() { }
 
// returns whether the instruction was encodable and written
virtual bool emitInstruction(Instruction *) = 0;
 
virtual uint32_t getMinEncodingSize(const Instruction *) const = 0;
 
void setCodeLocation(void *, uint32_t size);
inline void *getCodeLocation() const { return code; }
inline uint32_t getCodeSize() const { return codeSize; }
 
bool addReloc(RelocEntry::Type, int w, uint32_t data, uint32_t m,
int s);
 
inline void *getRelocInfo() const { return relocInfo; }
 
virtual void prepareEmission(Program *);
virtual void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);
 
void printBinary() const;
 
protected:
const Target *targ;
 
uint32_t *code;
uint32_t codeSize;
uint32_t codeSizeLimit;
 
RelocInfo *relocInfo;
};
 
 
enum OpClass
{
OPCLASS_MOVE = 0,
OPCLASS_LOAD = 1,
OPCLASS_STORE = 2,
OPCLASS_ARITH = 3,
OPCLASS_SHIFT = 4,
OPCLASS_SFU = 5,
OPCLASS_LOGIC = 6,
OPCLASS_COMPARE = 7,
OPCLASS_CONVERT = 8,
OPCLASS_ATOMIC = 9,
OPCLASS_TEXTURE = 10,
OPCLASS_SURFACE = 11,
OPCLASS_FLOW = 12,
OPCLASS_PSEUDO = 14,
OPCLASS_VECTOR = 15,
OPCLASS_BITFIELD = 16,
OPCLASS_CONTROL = 17,
OPCLASS_OTHER = 18
};
 
class Target
{
public:
Target(bool m, bool j, bool s) : hasJoin(m), joinAnterior(j), hasSWSched(s) { }
virtual ~Target() { }
 
static Target *create(uint32_t chipset);
static void destroy(Target *);
 
// 0x50 and 0x84 to 0xaf for nv50
// 0xc0 to 0xdf for nvc0
inline uint32_t getChipset() const { return chipset; }
 
virtual CodeEmitter *getCodeEmitter(Program::Type) = 0;
 
// Drivers should upload this so we can use it from all programs.
// The address chosen is supplied to the relocation routine.
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
 
virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) { }
 
virtual bool runLegalizePass(Program *, CGStage stage) const = 0;
 
public:
struct OpInfo
{
OpInfo *variants;
operation op;
uint16_t srcTypes;
uint16_t dstTypes;
uint32_t immdBits;
uint8_t srcNr;
uint8_t srcMods[3];
uint8_t dstMods;
uint16_t srcFiles[3];
uint16_t dstFiles;
unsigned int minEncSize : 4;
unsigned int vector : 1;
unsigned int predicate : 1;
unsigned int commutative : 1;
unsigned int pseudo : 1;
unsigned int flow : 1;
unsigned int hasDest : 1;
unsigned int terminator : 1;
};
 
inline const OpInfo& getOpInfo(const Instruction *) const;
inline const OpInfo& getOpInfo(const operation) const;
 
inline DataFile nativeFile(DataFile f) const;
 
virtual bool insnCanLoad(const Instruction *insn, int s,
const Instruction *ld) const = 0;
virtual bool isOpSupported(operation, DataType) const = 0;
virtual bool isAccessSupported(DataFile, DataType) const = 0;
virtual bool isModSupported(const Instruction *,
int s, Modifier) const = 0;
virtual bool isSatSupported(const Instruction *) const = 0;
virtual bool isPostMultiplySupported(operation op, float f,
int& e) const { return false; }
virtual bool mayPredicate(const Instruction *,
const Value *) const = 0;
 
// whether @insn can be issued together with @next (order matters)
virtual bool canDualIssue(const Instruction *insn,
const Instruction *next) const { return false; }
virtual int getLatency(const Instruction *) const { return 1; }
virtual int getThroughput(const Instruction *) const { return 1; }
 
virtual unsigned int getFileSize(DataFile) const = 0;
virtual unsigned int getFileUnit(DataFile) const = 0;
 
virtual uint32_t getSVAddress(DataFile, const Symbol *) const = 0;
 
public:
const bool hasJoin; // true if instructions have a join modifier
const bool joinAnterior; // true if join is executed before the op
const bool hasSWSched; // true if code should provide scheduling data
 
static const uint8_t operationSrcNr[];
static const OpClass operationClass[];
 
static inline uint8_t getOpSrcNr(operation op)
{
return operationSrcNr[op];
}
static inline OpClass getOpClass(operation op)
{
return operationClass[op];
}
 
protected:
uint32_t chipset;
 
DataFile nativeFileMap[DATA_FILE_COUNT];
 
OpInfo opInfo[OP_LAST + 1];
};
 
const Target::OpInfo& Target::getOpInfo(const Instruction *insn) const
{
return opInfo[MIN2(insn->op, OP_LAST)];
}
 
const Target::OpInfo& Target::getOpInfo(const operation op) const
{
return opInfo[op];
}
 
inline DataFile Target::nativeFile(DataFile f) const
{
return nativeFileMap[f];
}
 
} // namespace nv50_ir
 
#endif // __NV50_IR_TARGET_H__
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
0,0 → 1,100
/*
* Copyright 2011 Christoph Bumiller
* 2014 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_target_gm107.h"
#include "codegen/nv50_ir_lowering_gm107.h"
 
namespace nv50_ir {
 
Target *getTargetGM107(unsigned int chipset)
{
return new TargetGM107(chipset);
}
 
// BULTINS / LIBRARY FUNCTIONS:
 
// lazyness -> will just hardcode everything for the time being
 
#include "lib/gm107.asm.h"
 
void
TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
*code = (const uint32_t *)&gm107_builtin_code[0];
*size = sizeof(gm107_builtin_code);
}
 
uint32_t
TargetGM107::getBuiltinOffset(int builtin) const
{
assert(builtin < NVC0_BUILTIN_COUNT);
return gm107_builtin_offsets[builtin];
}
 
bool
TargetGM107::isOpSupported(operation op, DataType ty) const
{
switch (op) {
case OP_MAD:
case OP_FMA:
if (ty != TYPE_F32)
return false;
break;
case OP_SAD:
case OP_POW:
case OP_SQRT:
case OP_DIV:
case OP_MOD:
return false;
default:
break;
}
 
return true;
}
 
bool
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
{
if (stage == CG_STAGE_PRE_SSA) {
GM107LoweringPass pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_POST_RA) {
NVC0LegalizePostRA pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_SSA) {
NVC0LegalizeSSA pass;
return pass.run(prog, false, true);
}
return false;
}
 
CodeEmitter *
TargetGM107::getCodeEmitter(Program::Type type)
{
return createCodeEmitterGM107(type);
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h
0,0 → 1,21
#include "codegen/nv50_ir_target_nvc0.h"
 
namespace nv50_ir {
 
class TargetGM107 : public TargetNVC0
{
public:
TargetGM107(unsigned int chipset) : TargetNVC0(chipset) {}
 
virtual CodeEmitter *getCodeEmitter(Program::Type);
CodeEmitter *createCodeEmitterGM107(Program::Type);
 
virtual bool runLegalizePass(Program *, CGStage) const;
 
virtual void getBuiltinCode(const uint32_t **, uint32_t *) const;
virtual uint32_t getBuiltinOffset(int) const;
 
virtual bool isOpSupported(operation, DataType) const;
};
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
0,0 → 1,570
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_target_nv50.h"
 
namespace nv50_ir {
 
Target *getTargetNV50(unsigned int chipset)
{
return new TargetNV50(chipset);
}
 
TargetNV50::TargetNV50(unsigned int card) : Target(true, true, false)
{
chipset = card;
 
wposMask = 0;
for (unsigned int i = 0; i <= SV_LAST; ++i)
sysvalLocation[i] = ~0;
 
initOpInfo();
}
 
#if 0
// BULTINS / LIBRARY FUNCTIONS:
 
// TODO
static const uint32_t nvc0_builtin_code[] =
{
};
 
static const uint16_t nvc0_builtin_offsets[NV50_BUILTIN_COUNT] =
{
};
#endif
 
void
TargetNV50::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
*code = NULL;
*size = 0;
}
 
uint32_t
TargetNV50::getBuiltinOffset(int builtin) const
{
return 0;
}
 
struct opProperties
{
operation op;
unsigned int mNeg : 4;
unsigned int mAbs : 4;
unsigned int mNot : 4;
unsigned int mSat : 4;
unsigned int fConst : 3;
unsigned int fShared : 3;
unsigned int fAttrib : 3;
unsigned int fImm : 3;
};
 
static const struct opProperties _initProps[] =
{
// neg abs not sat c[] s[], a[], imm
{ OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
{ OP_SUB, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
{ OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint
{ OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
{ OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
{ OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x1, 0x1, 0x0 },
{ OP_AND, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
{ OP_OR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
{ OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
{ OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 },
{ OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 },
{ OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_LG2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_RCP, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_RSQ, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
};
 
void TargetNV50::initOpInfo()
{
unsigned int i, j;
 
static const uint32_t commutative[(OP_LAST + 31) / 32] =
{
// ADD,MAD,MUL,AND,OR,XOR,MAX,MIN
0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
};
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF
0x00014e40, 0x00000040, 0x00000498, 0x00000000
};
static const operation noDestList[] =
{
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP,
OP_SUREDB, OP_BAR
};
static const operation noPredList[] =
{
OP_CALL, OP_PREBREAK, OP_PRERET, OP_QUADON, OP_QUADPOP, OP_JOINAT,
OP_EMIT, OP_RESTART
};
 
for (i = 0; i < DATA_FILE_COUNT; ++i)
nativeFileMap[i] = (DataFile)i;
nativeFileMap[FILE_PREDICATE] = FILE_FLAGS;
 
for (i = 0; i < OP_LAST; ++i) {
opInfo[i].variants = NULL;
opInfo[i].op = (operation)i;
opInfo[i].srcTypes = 1 << (int)TYPE_F32;
opInfo[i].dstTypes = 1 << (int)TYPE_F32;
opInfo[i].immdBits = 0xffffffff;
opInfo[i].srcNr = operationSrcNr[i];
 
for (j = 0; j < opInfo[i].srcNr; ++j) {
opInfo[i].srcMods[j] = 0;
opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
}
opInfo[i].dstMods = 0;
opInfo[i].dstFiles = 1 << (int)FILE_GPR;
 
opInfo[i].hasDest = 1;
opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
opInfo[i].pseudo = (i < OP_MOV);
opInfo[i].predicate = !opInfo[i].pseudo;
opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
}
for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i)
opInfo[noDestList[i]].hasDest = 0;
for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i)
opInfo[noPredList[i]].predicate = 0;
 
for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
const struct opProperties *prop = &_initProps[i];
 
for (int s = 0; s < 3; ++s) {
if (prop->mNeg & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
if (prop->mAbs & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
if (prop->mNot & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
if (prop->fConst & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
if (prop->fShared & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_SHARED;
if (prop->fAttrib & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_SHADER_INPUT;
if (prop->fImm & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
}
if (prop->mSat & 8)
opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
}
 
if (chipset >= 0xa0)
opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT;
}
 
unsigned int
TargetNV50::getFileSize(DataFile file) const
{
switch (file) {
case FILE_NULL: return 0;
case FILE_GPR: return 256; // in 16-bit units **
case FILE_PREDICATE: return 0;
case FILE_FLAGS: return 4;
case FILE_ADDRESS: return 4;
case FILE_IMMEDIATE: return 0;
case FILE_MEMORY_CONST: return 65536;
case FILE_SHADER_INPUT: return 0x200;
case FILE_SHADER_OUTPUT: return 0x200;
case FILE_MEMORY_GLOBAL: return 0xffffffff;
case FILE_MEMORY_SHARED: return 16 << 10;
case FILE_MEMORY_LOCAL: return 48 << 10;
case FILE_SYSTEM_VALUE: return 16;
default:
assert(!"invalid file");
return 0;
}
// ** only first 128 units encodable for 16-bit regs
}
 
unsigned int
TargetNV50::getFileUnit(DataFile file) const
{
if (file == FILE_GPR || file == FILE_ADDRESS)
return 1;
if (file == FILE_SYSTEM_VALUE)
return 2;
return 0;
}
 
uint32_t
TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const
{
switch (sym->reg.data.sv.sv) {
case SV_FACE:
return 0x3fc;
case SV_POSITION:
{
uint32_t addr = sysvalLocation[sym->reg.data.sv.sv];
for (int c = 0; c < sym->reg.data.sv.index; ++c)
if (wposMask & (1 << c))
addr += 4;
return addr;
}
case SV_PRIMITIVE_ID:
return shaderFile == FILE_SHADER_INPUT ? 0x18 :
sysvalLocation[sym->reg.data.sv.sv];
case SV_NCTAID:
return 0x8 + 2 * sym->reg.data.sv.index;
case SV_CTAID:
return 0xc + 2 * sym->reg.data.sv.index;
case SV_NTID:
return 0x2 + 2 * sym->reg.data.sv.index;
case SV_TID:
return 0;
case SV_SAMPLE_POS:
return 0; /* sample position is handled differently */
default:
return sysvalLocation[sym->reg.data.sv.sv];
}
}
 
// long: rrr, arr, rcr, acr, rrc, arc, gcr, grr
// short: rr, ar, rc, gr
// immd: ri, gi
bool
TargetNV50::insnCanLoad(const Instruction *i, int s,
const Instruction *ld) const
{
DataFile sf = ld->src(0).getFile();
 
if (sf == FILE_IMMEDIATE && (i->predSrc >= 0 || i->flagsDef >= 0))
return false;
if (s >= opInfo[i->op].srcNr)
return false;
if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
return false;
if (s == 2 && i->src(1).getFile() != FILE_GPR)
return false;
 
// NOTE: don't rely on flagsDef
if (sf == FILE_IMMEDIATE)
for (int d = 0; i->defExists(d); ++d)
if (i->def(d).getFile() == FILE_FLAGS)
return false;
 
unsigned mode = 0;
 
for (int z = 0; z < Target::operationSrcNr[i->op]; ++z) {
DataFile zf = (z == s) ? sf : i->src(z).getFile();
switch (zf) {
case FILE_GPR:
break;
case FILE_MEMORY_SHARED:
case FILE_SHADER_INPUT:
mode |= 1 << (z * 2);
break;
case FILE_MEMORY_CONST:
mode |= 2 << (z * 2);
break;
case FILE_IMMEDIATE:
mode |= 3 << (z * 2);
default:
break;
}
}
 
switch (mode) {
case 0x00:
case 0x01:
case 0x03:
case 0x08:
case 0x0c:
case 0x20:
case 0x21:
break;
case 0x09:
// Shader inputs get transformed to p[] in geometry shaders, and those
// aren't allowed to be used at the same time as c[].
if (ld->bb->getProgram()->getType() == Program::TYPE_GEOMETRY)
return false;
break;
case 0x0d:
if (ld->bb->getProgram()->getType() != Program::TYPE_GEOMETRY)
return false;
break;
default:
return false;
}
 
uint8_t ldSize;
 
if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) {
// 32-bit MUL will be split into 16-bit MULs
if (ld->src(0).isIndirect(0))
return false;
if (sf == FILE_IMMEDIATE)
return false;
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH && sf == FILE_MEMORY_CONST)
return false;
ldSize = 2;
} else {
ldSize = typeSizeof(ld->dType);
}
 
if (sf == FILE_IMMEDIATE)
return true;
 
 
// Check if memory access is encodable:
 
if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access
return false;
if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize))
return false;
 
if (ld->src(0).isIndirect(0)) {
for (int z = 0; i->srcExists(z); ++z)
if (i->src(z).isIndirect(0))
return false;
 
// s[] access only possible in CP, $aX always applies
if (sf == FILE_MEMORY_SHARED)
return true;
if (!ld->bb) // can't check type ...
return false;
Program::Type pt = ld->bb->getProgram()->getType();
 
// $aX applies to c[] only in VP, FP, GP if p[] is not accessed
if (pt == Program::TYPE_COMPUTE)
return false;
if (pt == Program::TYPE_GEOMETRY) {
if (sf == FILE_MEMORY_CONST)
return i->src(s).getFile() != FILE_SHADER_INPUT;
return sf == FILE_SHADER_INPUT;
}
return sf == FILE_MEMORY_CONST;
}
return true;
}
 
bool
TargetNV50::isAccessSupported(DataFile file, DataType ty) const
{
if (ty == TYPE_B96 || ty == TYPE_NONE)
return false;
if (typeSizeof(ty) > 4)
return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL);
return true;
}
 
bool
TargetNV50::isOpSupported(operation op, DataType ty) const
{
if (ty == TYPE_F64 && chipset < 0xa0)
return false;
 
switch (op) {
case OP_PRERET:
return chipset >= 0xa0;
case OP_TXG:
return chipset >= 0xa3 && chipset != 0xaa && chipset != 0xac;
case OP_POW:
case OP_SQRT:
case OP_DIV:
case OP_MOD:
case OP_SET_AND:
case OP_SET_OR:
case OP_SET_XOR:
case OP_SLCT:
case OP_SELP:
case OP_POPCNT:
case OP_INSBF:
case OP_EXTBF:
case OP_EXIT: // want exit modifier instead (on NOP if required)
case OP_MEMBAR:
return false;
case OP_SAD:
return ty == TYPE_S32;
default:
return true;
}
}
 
bool
TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const
{
if (!isFloatType(insn->dType)) {
switch (insn->op) {
case OP_ABS:
case OP_NEG:
case OP_CVT:
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_AND:
case OP_OR:
case OP_XOR:
break;
case OP_ADD:
if (insn->src(s ? 0 : 1).mod.neg())
return false;
break;
case OP_SUB:
if (s == 0)
return insn->src(1).mod.neg() ? false : true;
break;
case OP_SET:
if (insn->sType != TYPE_F32)
return false;
break;
default:
return false;
}
}
if (s >= 3)
return false;
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
}
 
bool
TargetNV50::mayPredicate(const Instruction *insn, const Value *pred) const
{
if (insn->getPredicate() || insn->flagsSrc >= 0)
return false;
for (int s = 0; insn->srcExists(s); ++s)
if (insn->src(s).getFile() == FILE_IMMEDIATE)
return false;
return opInfo[insn->op].predicate;
}
 
bool
TargetNV50::isSatSupported(const Instruction *insn) const
{
if (insn->op == OP_CVT)
return true;
if (insn->dType != TYPE_F32)
return false;
return opInfo[insn->op].dstMods & NV50_IR_MOD_SAT;
}
 
int TargetNV50::getLatency(const Instruction *i) const
{
// TODO: tune these values
if (i->op == OP_LOAD) {
switch (i->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
case FILE_MEMORY_GLOBAL:
return 100; // really 400 to 800
default:
return 22;
}
}
return 22;
}
 
// These are "inverse" throughput values, i.e. the number of cycles required
// to issue a specific instruction for a full warp (32 threads).
//
// Assuming we have more than 1 warp in flight, a higher issue latency results
// in a lower result latency since the MP will have spent more time with other
// warps.
// This also helps to determine the number of cycles between instructions in
// a single warp.
//
int TargetNV50::getThroughput(const Instruction *i) const
{
// TODO: tune these values
if (i->dType == TYPE_F32) {
switch (i->op) {
case OP_RCP:
case OP_RSQ:
case OP_LG2:
case OP_SIN:
case OP_COS:
case OP_PRESIN:
case OP_PREEX2:
return 16;
default:
return 4;
}
} else
if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
return 4;
} else
if (i->dType == TYPE_F64) {
return 32;
} else {
return 1;
}
}
 
static void
recordLocation(uint16_t *locs, uint8_t *masks,
const struct nv50_ir_varying *var)
{
uint16_t addr = var->slot[0] * 4;
 
switch (var->sn) {
case TGSI_SEMANTIC_POSITION: locs[SV_POSITION] = addr; break;
case TGSI_SEMANTIC_INSTANCEID: locs[SV_INSTANCE_ID] = addr; break;
case TGSI_SEMANTIC_VERTEXID: locs[SV_VERTEX_ID] = addr; break;
case TGSI_SEMANTIC_PRIMID: locs[SV_PRIMITIVE_ID] = addr; break;
case TGSI_SEMANTIC_LAYER: locs[SV_LAYER] = addr; break;
case TGSI_SEMANTIC_VIEWPORT_INDEX: locs[SV_VIEWPORT_INDEX] = addr; break;
default:
break;
}
if (var->sn == TGSI_SEMANTIC_POSITION && masks)
masks[0] = var->mask;
}
 
void
TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info)
{
unsigned int i;
for (i = 0; i < info->numOutputs; ++i)
recordLocation(sysvalLocation, NULL, &info->out[i]);
for (i = 0; i < info->numInputs; ++i)
recordLocation(sysvalLocation, &wposMask, &info->in[i]);
for (i = 0; i < info->numSysVals; ++i)
recordLocation(sysvalLocation, NULL, &info->sv[i]);
 
if (sysvalLocation[SV_POSITION] >= 0x200) {
// not assigned by driver, but we need it internally
wposMask = 0x8;
sysvalLocation[SV_POSITION] = 0;
}
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h
0,0 → 1,72
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_target.h"
 
namespace nv50_ir {
 
#define NVC0_BUILTIN_DIV_U32 0
#define NVC0_BUILTIN_DIV_S32 1
#define NVC0_BUILTIN_RCP_F64 2
#define NVC0_BUILTIN_RSQ_F64 3
 
#define NVC0_BUILTIN_COUNT 4
 
class TargetNV50 : public Target
{
public:
TargetNV50(unsigned int chipset);
 
virtual CodeEmitter *getCodeEmitter(Program::Type);
 
virtual bool runLegalizePass(Program *, CGStage stage) const;
 
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
 
virtual void parseDriverInfo(const struct nv50_ir_prog_info *);
 
virtual bool insnCanLoad(const Instruction *insn, int s,
const Instruction *ld) const;
virtual bool isOpSupported(operation, DataType) const;
virtual bool isAccessSupported(DataFile, DataType) const;
virtual bool isModSupported(const Instruction *, int s, Modifier) const;
virtual bool isSatSupported(const Instruction *) const;
virtual bool mayPredicate(const Instruction *, const Value *) const;
 
virtual int getLatency(const Instruction *) const;
virtual int getThroughput(const Instruction *) const;
 
virtual unsigned int getFileSize(DataFile) const;
virtual unsigned int getFileUnit(DataFile) const;
 
virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const;
 
uint32_t getBuiltinOffset(int builtin) const;
 
private:
void initOpInfo();
 
uint16_t sysvalLocation[SV_LAST + 1];
uint8_t wposMask;
};
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
0,0 → 1,617
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_target_nvc0.h"
 
namespace nv50_ir {
 
Target *getTargetNVC0(unsigned int chipset)
{
return new TargetNVC0(chipset);
}
 
TargetNVC0::TargetNVC0(unsigned int card) :
Target(card < 0x110, false, card >= 0xe4)
{
chipset = card;
initOpInfo();
}
 
// BULTINS / LIBRARY FUNCTIONS:
 
// lazyness -> will just hardcode everything for the time being
 
#include "lib/gf100.asm.h"
#include "lib/gk104.asm.h"
#include "lib/gk110.asm.h"
 
void
TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
{
switch (chipset & ~0xf) {
case 0xe0:
if (chipset < NVISA_GK20A_CHIPSET) {
*code = (const uint32_t *)&gk104_builtin_code[0];
*size = sizeof(gk104_builtin_code);
break;
}
/* fall-through for GK20A */
case 0xf0:
case 0x100:
*code = (const uint32_t *)&gk110_builtin_code[0];
*size = sizeof(gk110_builtin_code);
break;
default:
*code = (const uint32_t *)&gf100_builtin_code[0];
*size = sizeof(gf100_builtin_code);
break;
}
}
 
uint32_t
TargetNVC0::getBuiltinOffset(int builtin) const
{
assert(builtin < NVC0_BUILTIN_COUNT);
 
switch (chipset & ~0xf) {
case 0xe0:
if (chipset < NVISA_GK20A_CHIPSET)
return gk104_builtin_offsets[builtin];
/* fall-through for GK20A */
case 0xf0:
case 0x100:
return gk110_builtin_offsets[builtin];
default:
return gf100_builtin_offsets[builtin];
}
}
 
struct opProperties
{
operation op;
unsigned int mNeg : 4;
unsigned int mAbs : 4;
unsigned int mNot : 4;
unsigned int mSat : 4;
unsigned int fConst : 3;
unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted
};
 
static const struct opProperties _initProps[] =
{
// neg abs not sat c[] imm
{ OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
{ OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
{ OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
{ OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
{ OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
{ OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
{ OP_CEIL, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
{ OP_FLOOR, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
{ OP_TRUNC, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
{ OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
{ OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
{ OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
{ OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
{ OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
{ OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
{ OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
{ OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
{ OP_POPCNT, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 },
{ OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_EXTBF, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
{ OP_BFIND, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1 },
{ OP_PERMT, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
// saturate only:
{ OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
{ OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
// nve4 ops:
{ OP_SULDB, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 },
{ OP_SUSTB, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 },
{ OP_SUSTP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 },
{ OP_SUCLAMP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
{ OP_SUBFM, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_SUEAU, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }
};
 
void TargetNVC0::initOpInfo()
{
unsigned int i, j;
 
static const uint32_t commutative[(OP_LAST + 31) / 32] =
{
// ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
0x0670ca00, 0x0000003f, 0x00000000, 0x00000000
};
 
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
0x0670ca00, 0x00000000, 0x00000000, 0x00000000
};
 
static const operation noDest[] =
{
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP,
OP_SUREDB, OP_BAR
};
 
static const operation noPred[] =
{
OP_CALL, OP_PRERET, OP_QUADON, OP_QUADPOP,
OP_JOINAT, OP_PREBREAK, OP_PRECONT, OP_BRKPT
};
 
for (i = 0; i < DATA_FILE_COUNT; ++i)
nativeFileMap[i] = (DataFile)i;
nativeFileMap[FILE_ADDRESS] = FILE_GPR;
 
for (i = 0; i < OP_LAST; ++i) {
opInfo[i].variants = NULL;
opInfo[i].op = (operation)i;
opInfo[i].srcTypes = 1 << (int)TYPE_F32;
opInfo[i].dstTypes = 1 << (int)TYPE_F32;
opInfo[i].immdBits = 0;
opInfo[i].srcNr = operationSrcNr[i];
 
for (j = 0; j < opInfo[i].srcNr; ++j) {
opInfo[i].srcMods[j] = 0;
opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
}
opInfo[i].dstMods = 0;
opInfo[i].dstFiles = 1 << (int)FILE_GPR;
 
opInfo[i].hasDest = 1;
opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
opInfo[i].pseudo = (i < OP_MOV);
opInfo[i].predicate = !opInfo[i].pseudo;
opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
}
for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
opInfo[noDest[i]].hasDest = 0;
for (i = 0; i < sizeof(noPred) / sizeof(noPred[0]); ++i)
opInfo[noPred[i]].predicate = 0;
 
for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
const struct opProperties *prop = &_initProps[i];
 
for (int s = 0; s < 3; ++s) {
if (prop->mNeg & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
if (prop->mAbs & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
if (prop->mNot & (1 << s))
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
if (prop->fConst & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
if (prop->fImmd & (1 << s))
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
if (prop->fImmd & 8)
opInfo[prop->op].immdBits = 0xffffffff;
}
if (prop->mSat & 8)
opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
}
}
 
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
switch (file) {
case FILE_NULL: return 0;
case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
case FILE_PREDICATE: return 7;
case FILE_FLAGS: return 1;
case FILE_ADDRESS: return 0;
case FILE_IMMEDIATE: return 0;
case FILE_MEMORY_CONST: return 65536;
case FILE_SHADER_INPUT: return 0x400;
case FILE_SHADER_OUTPUT: return 0x400;
case FILE_MEMORY_GLOBAL: return 0xffffffff;
case FILE_MEMORY_SHARED: return 16 << 10;
case FILE_MEMORY_LOCAL: return 48 << 10;
case FILE_SYSTEM_VALUE: return 32;
default:
assert(!"invalid file");
return 0;
}
}
 
unsigned int
TargetNVC0::getFileUnit(DataFile file) const
{
if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
return 2;
return 0;
}
 
uint32_t
TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
{
const int idx = sym->reg.data.sv.index;
const SVSemantic sv = sym->reg.data.sv.sv;
 
const bool isInput = shaderFile == FILE_SHADER_INPUT;
const bool kepler = getChipset() >= NVISA_GK104_CHIPSET;
 
switch (sv) {
case SV_POSITION: return 0x070 + idx * 4;
case SV_INSTANCE_ID: return 0x2f8;
case SV_VERTEX_ID: return 0x2fc;
case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040;
case SV_LAYER: return 0x064;
case SV_VIEWPORT_INDEX: return 0x068;
case SV_POINT_SIZE: return 0x06c;
case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
case SV_POINT_COORD: return 0x2e0 + idx * 4;
case SV_FACE: return 0x3fc;
case SV_TESS_FACTOR: return 0x000 + idx * 4;
case SV_TESS_COORD: return 0x2f0 + idx * 4;
case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
case SV_GRIDID: return kepler ? 0x18 : ~0;
case SV_SAMPLE_INDEX: return 0;
case SV_SAMPLE_POS: return 0;
case SV_SAMPLE_MASK: return 0;
default:
return 0xffffffff;
}
}
 
bool
TargetNVC0::insnCanLoad(const Instruction *i, int s,
const Instruction *ld) const
{
DataFile sf = ld->src(0).getFile();
 
// immediate 0 can be represented by GPR $r63/$r255
if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
return (!i->isPseudo() &&
!i->asTex() &&
i->op != OP_EXPORT && i->op != OP_STORE);
 
if (s >= opInfo[i->op].srcNr)
return false;
if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
return false;
 
// indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
if (ld->src(0).isIndirect(0))
return false;
 
for (int k = 0; i->srcExists(k); ++k) {
if (i->src(k).getFile() == FILE_IMMEDIATE) {
if (k == 2 && i->op == OP_SUCLAMP) // special case
continue;
if (i->getSrc(k)->reg.data.u64 != 0)
return false;
} else
if (i->src(k).getFile() != FILE_GPR &&
i->src(k).getFile() != FILE_PREDICATE) {
return false;
}
}
 
// not all instructions support full 32 bit immediates
if (sf == FILE_IMMEDIATE) {
Storage &reg = ld->getSrc(0)->asImm()->reg;
 
if (typeSizeof(i->sType) > 4)
return false;
if (opInfo[i->op].immdBits != 0xffffffff) {
if (i->sType == TYPE_F32) {
if (reg.data.u32 & 0xfff)
return false;
} else
if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
// with u32, 0xfffff counts as 0xffffffff as well
if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
return false;
}
} else
if (i->op == OP_MAD || i->op == OP_FMA) {
// requires src == dst, cannot decide before RA
// (except if we implement more constraints)
if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
return false;
} else
if (i->op == OP_ADD && i->sType == TYPE_F32) {
// add f32 LIMM cannot saturate
if (i->saturate && (reg.data.u32 & 0xfff))
return false;
}
}
 
return true;
}
 
bool
TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
{
if (ty == TYPE_NONE)
return false;
if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ?
return typeSizeof(ty) <= 8;
if (ty == TYPE_B96)
return false;
return true;
}
 
bool
TargetNVC0::isOpSupported(operation op, DataType ty) const
{
if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
return false;
if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32)
return false;
if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
return false;
return true;
}
 
bool
TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
{
if (!isFloatType(insn->dType)) {
switch (insn->op) {
case OP_ABS:
case OP_NEG:
case OP_CVT:
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_AND:
case OP_OR:
case OP_XOR:
case OP_POPCNT:
case OP_BFIND:
break;
case OP_SET:
if (insn->sType != TYPE_F32)
return false;
break;
case OP_ADD:
if (mod.abs())
return false;
if (insn->src(s ? 0 : 1).mod.neg())
return false;
break;
case OP_SUB:
if (s == 0)
return insn->src(1).mod.neg() ? false : true;
break;
default:
return false;
}
}
if (s >= 3)
return false;
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
}
 
bool
TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
{
if (insn->getPredicate())
return false;
return opInfo[insn->op].predicate;
}
 
bool
TargetNVC0::isSatSupported(const Instruction *insn) const
{
if (insn->op == OP_CVT)
return true;
if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
return false;
 
if (insn->dType == TYPE_U32)
return (insn->op == OP_ADD) || (insn->op == OP_MAD);
 
// add f32 LIMM cannot saturate
if (insn->op == OP_ADD && insn->sType == TYPE_F32) {
if (insn->getSrc(1)->asImm() &&
insn->getSrc(1)->reg.data.u32 & 0xfff)
return false;
}
 
return insn->dType == TYPE_F32;
}
 
bool
TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const
{
if (op != OP_MUL)
return false;
f = fabsf(f);
e = static_cast<int>(log2f(f));
if (e < -3 || e > 3)
return false;
return f == exp2f(static_cast<float>(e));
}
 
// TODO: better values
// this could be more precise, e.g. depending on the issue-to-read/write delay
// of the depending instruction, but it's good enough
int TargetNVC0::getLatency(const Instruction *i) const
{
if (chipset >= 0xe4) {
if (i->dType == TYPE_F64 || i->sType == TYPE_F64)
return 20;
switch (i->op) {
case OP_LINTERP:
case OP_PINTERP:
return 15;
case OP_LOAD:
if (i->src(0).getFile() == FILE_MEMORY_CONST)
return 9;
// fall through
case OP_VFETCH:
return 24;
default:
if (Target::getOpClass(i->op) == OPCLASS_TEXTURE)
return 17;
if (i->op == OP_MUL && i->dType != TYPE_F32)
return 15;
return 9;
}
} else {
if (i->op == OP_LOAD) {
if (i->cache == CACHE_CV)
return 700;
return 48;
}
return 24;
}
return 32;
}
 
// These are "inverse" throughput values, i.e. the number of cycles required
// to issue a specific instruction for a full warp (32 threads).
//
// Assuming we have more than 1 warp in flight, a higher issue latency results
// in a lower result latency since the MP will have spent more time with other
// warps.
// This also helps to determine the number of cycles between instructions in
// a single warp.
//
int TargetNVC0::getThroughput(const Instruction *i) const
{
// TODO: better values
if (i->dType == TYPE_F32) {
switch (i->op) {
case OP_ADD:
case OP_MUL:
case OP_MAD:
case OP_FMA:
return 1;
case OP_CVT:
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
case OP_SET:
case OP_SLCT:
case OP_MIN:
case OP_MAX:
return 2;
case OP_RCP:
case OP_RSQ:
case OP_LG2:
case OP_SIN:
case OP_COS:
case OP_PRESIN:
case OP_PREEX2:
default:
return 8;
}
} else
if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
switch (i->op) {
case OP_ADD:
case OP_AND:
case OP_OR:
case OP_XOR:
case OP_NOT:
return 1;
case OP_MUL:
case OP_MAD:
case OP_CVT:
case OP_SET:
case OP_SLCT:
case OP_SHL:
case OP_SHR:
case OP_NEG:
case OP_ABS:
case OP_MIN:
case OP_MAX:
default:
return 2;
}
} else
if (i->dType == TYPE_F64) {
return 2;
} else {
return 1;
}
}
 
bool TargetNVC0::canDualIssue(const Instruction *a, const Instruction *b) const
{
const OpClass clA = operationClass[a->op];
const OpClass clB = operationClass[b->op];
 
if (getChipset() >= 0xe4) {
// not texturing
// not if the 2nd instruction isn't necessarily executed
if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW)
return false;
// anything with MOV
if (a->op == OP_MOV || b->op == OP_MOV)
return true;
if (clA == clB) {
// only F32 arith or integer additions
if (clA != OPCLASS_ARITH)
return false;
return (a->dType == TYPE_F32 || a->op == OP_ADD ||
b->dType == TYPE_F32 || b->op == OP_ADD);
}
// nothing with TEXBAR
if (a->op == OP_TEXBAR || b->op == OP_TEXBAR)
return false;
// no loads and stores accessing the the same space
if ((clA == OPCLASS_LOAD && clB == OPCLASS_STORE) ||
(clB == OPCLASS_LOAD && clA == OPCLASS_STORE))
if (a->src(0).getFile() == b->src(0).getFile())
return false;
// no > 32-bit ops
if (typeSizeof(a->dType) > 4 || typeSizeof(b->dType) > 4 ||
typeSizeof(a->sType) > 4 || typeSizeof(b->sType) > 4)
return false;
return true;
} else {
return false; // info not needed (yet)
}
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.h
0,0 → 1,73
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_target.h"
 
namespace nv50_ir {
 
#define NVC0_BUILTIN_DIV_U32 0
#define NVC0_BUILTIN_DIV_S32 1
#define NVC0_BUILTIN_RCP_F64 2
#define NVC0_BUILTIN_RSQ_F64 3
 
#define NVC0_BUILTIN_COUNT 4
 
class TargetNVC0 : public Target
{
public:
TargetNVC0(unsigned int chipset);
 
virtual CodeEmitter *getCodeEmitter(Program::Type);
 
CodeEmitter *createCodeEmitterNVC0(Program::Type);
CodeEmitter *createCodeEmitterGK110(Program::Type);
 
virtual bool runLegalizePass(Program *, CGStage stage) const;
 
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
virtual uint32_t getBuiltinOffset(int builtin) const;
 
virtual bool insnCanLoad(const Instruction *insn, int s,
const Instruction *ld) const;
virtual bool isOpSupported(operation, DataType) const;
virtual bool isAccessSupported(DataFile, DataType) const;
virtual bool isModSupported(const Instruction *, int s, Modifier) const;
virtual bool isSatSupported(const Instruction *) const;
virtual bool isPostMultiplySupported(operation, float, int& e) const;
virtual bool mayPredicate(const Instruction *, const Value *) const;
 
virtual bool canDualIssue(const Instruction *, const Instruction *) const;
virtual int getLatency(const Instruction *) const;
virtual int getThroughput(const Instruction *) const;
 
virtual unsigned int getFileSize(DataFile) const;
virtual unsigned int getFileUnit(DataFile) const;
 
virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const;
 
private:
void initOpInfo();
};
 
bool calculateSchedDataNVC0(const Target *, Function *);
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp
0,0 → 1,392
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#include "codegen/nv50_ir_util.h"
 
namespace nv50_ir {
 
void DLList::clear()
{
for (Item *next, *item = head.next; item != &head; item = next) {
next = item->next;
delete item;
}
head.next = head.prev = &head;
}
 
void
DLList::Iterator::erase()
{
Item *rem = pos;
 
if (rem == term)
return;
pos = pos->next;
 
DLLIST_DEL(rem);
delete rem;
}
 
void DLList::Iterator::moveToList(DLList& dest)
{
Item *item = pos;
 
assert(term != &dest.head);
assert(pos != term);
 
pos = pos->next;
 
DLLIST_DEL(item);
DLLIST_ADDHEAD(&dest.head, item);
}
 
bool
DLList::Iterator::insert(void *data)
{
Item *ins = new Item(data);
 
ins->next = pos->next;
ins->prev = pos;
pos->next->prev = ins;
pos->next = ins;
 
if (pos == term)
term = ins;
 
return true;
}
 
void
Stack::moveTo(Stack& that)
{
unsigned int newSize = this->size + that.size;
 
while (newSize > that.limit)
that.resize();
memcpy(&that.array[that.size], &array[0], this->size * sizeof(Item));
 
that.size = newSize;
this->size = 0;
}
 
Interval::Interval(const Interval& that) : head(NULL), tail(NULL)
{
this->insert(that);
}
 
Interval::~Interval()
{
clear();
}
 
void
Interval::clear()
{
for (Range *next, *r = head; r; r = next) {
next = r->next;
delete r;
}
head = tail = NULL;
}
 
bool
Interval::extend(int a, int b)
{
Range *r, **nextp = &head;
 
// NOTE: we need empty intervals for fixed registers
// if (a == b)
// return false;
assert(a <= b);
 
for (r = head; r; r = r->next) {
if (b < r->bgn)
break; // insert before
if (a > r->end) {
// insert after
nextp = &r->next;
continue;
}
 
// overlap
if (a < r->bgn) {
r->bgn = a;
if (b > r->end)
r->end = b;
r->coalesce(&tail);
return true;
}
if (b > r->end) {
r->end = b;
r->coalesce(&tail);
return true;
}
assert(a >= r->bgn);
assert(b <= r->end);
return true;
}
 
(*nextp) = new Range(a, b);
(*nextp)->next = r;
 
for (r = (*nextp); r->next; r = r->next);
tail = r;
return true;
}
 
bool Interval::contains(int pos) const
{
for (Range *r = head; r && r->bgn <= pos; r = r->next)
if (r->end > pos)
return true;
return false;
}
 
bool Interval::overlaps(const Interval &that) const
{
#if 1
Range *a = this->head;
Range *b = that.head;
 
while (a && b) {
if (b->bgn < a->end &&
b->end > a->bgn)
return true;
if (a->end <= b->bgn)
a = a->next;
else
b = b->next;
}
#else
for (Range *rA = this->head; rA; rA = rA->next)
for (Range *rB = iv.head; rB; rB = rB->next)
if (rB->bgn < rA->end &&
rB->end > rA->bgn)
return true;
#endif
return false;
}
 
void Interval::insert(const Interval &that)
{
for (Range *r = that.head; r; r = r->next)
this->extend(r->bgn, r->end);
}
 
void Interval::unify(Interval &that)
{
assert(this != &that);
for (Range *next, *r = that.head; r; r = next) {
next = r->next;
this->extend(r->bgn, r->end);
delete r;
}
that.head = NULL;
}
 
int Interval::length() const
{
int len = 0;
for (Range *r = head; r; r = r->next)
len += r->bgn - r->end;
return len;
}
 
void Interval::print() const
{
if (!head)
return;
INFO("[%i %i)", head->bgn, head->end);
for (const Range *r = head->next; r; r = r->next)
INFO(" [%i %i)", r->bgn, r->end);
INFO("\n");
}
 
void
BitSet::andNot(const BitSet &set)
{
assert(data && set.data);
assert(size >= set.size);
for (unsigned int i = 0; i < (set.size + 31) / 32; ++i)
data[i] &= ~set.data[i];
}
 
BitSet& BitSet::operator|=(const BitSet &set)
{
assert(data && set.data);
assert(size >= set.size);
for (unsigned int i = 0; i < (set.size + 31) / 32; ++i)
data[i] |= set.data[i];
return *this;
}
 
bool BitSet::resize(unsigned int nBits)
{
if (!data || !nBits)
return allocate(nBits, true);
const unsigned int p = (size + 31) / 32;
const unsigned int n = (nBits + 31) / 32;
if (n == p)
return true;
 
data = (uint32_t *)REALLOC(data, 4 * p, 4 * n);
if (!data) {
size = 0;
return false;
}
if (n > p)
memset(&data[p], 0, (n - p) * 4);
if (nBits < size && (nBits % 32))
data[(nBits + 31) / 32 - 1] &= (1 << (nBits % 32)) - 1;
 
size = nBits;
return true;
}
 
bool BitSet::allocate(unsigned int nBits, bool zero)
{
if (data && size < nBits) {
FREE(data);
data = NULL;
}
size = nBits;
 
if (!data)
data = reinterpret_cast<uint32_t *>(CALLOC((size + 31) / 32, 4));
 
if (zero)
memset(data, 0, (size + 7) / 8);
else
if (size % 32) // clear unused bits (e.g. for popCount)
data[(size + 31) / 32 - 1] &= (1 << (size % 32)) - 1;
 
return data;
}
 
unsigned int BitSet::popCount() const
{
unsigned int count = 0;
 
for (unsigned int i = 0; i < (size + 31) / 32; ++i)
if (data[i])
count += util_bitcount(data[i]);
return count;
}
 
void BitSet::fill(uint32_t val)
{
unsigned int i;
for (i = 0; i < (size + 31) / 32; ++i)
data[i] = val;
if (val)
data[i] &= ~(0xffffffff << (size % 32)); // BE ?
}
 
void BitSet::setOr(BitSet *pA, BitSet *pB)
{
if (!pB) {
*this = *pA;
} else {
for (unsigned int i = 0; i < (size + 31) / 32; ++i)
data[i] = pA->data[i] | pB->data[i];
}
}
 
int BitSet::findFreeRange(unsigned int count) const
{
const uint32_t m = (1 << count) - 1;
int pos = size;
unsigned int i;
const unsigned int end = (size + 31) / 32;
 
if (count == 1) {
for (i = 0; i < end; ++i) {
pos = ffs(~data[i]) - 1;
if (pos >= 0)
break;
}
} else
if (count == 2) {
for (i = 0; i < end; ++i) {
if (data[i] != 0xffffffff) {
uint32_t b = data[i] | (data[i] >> 1) | 0xaaaaaaaa;
pos = ffs(~b) - 1;
if (pos >= 0)
break;
}
}
} else
if (count == 4 || count == 3) {
for (i = 0; i < end; ++i) {
if (data[i] != 0xffffffff) {
uint32_t b =
(data[i] >> 0) | (data[i] >> 1) |
(data[i] >> 2) | (data[i] >> 3) | 0xeeeeeeee;
pos = ffs(~b) - 1;
if (pos >= 0)
break;
}
}
} else {
if (count <= 8)
count = 8;
else
if (count <= 16)
count = 16;
else
count = 32;
 
for (i = 0; i < end; ++i) {
if (data[i] != 0xffffffff) {
for (pos = 0; pos < 32; pos += count)
if (!(data[i] & (m << pos)))
break;
if (pos < 32)
break;
}
}
}
pos += i * 32;
 
return ((pos + count) <= size) ? pos : -1;
}
 
void BitSet::print() const
{
unsigned int n = 0;
INFO("BitSet of size %u:\n", size);
for (unsigned int i = 0; i < (size + 31) / 32; ++i) {
uint32_t bits = data[i];
while (bits) {
int pos = ffs(bits) - 1;
bits &= ~(1 << pos);
INFO(" %i", i * 32 + pos);
++n;
if ((n % 16) == 0)
INFO("\n");
}
}
if (n % 16)
INFO("\n");
}
 
} // namespace nv50_ir
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
0,0 → 1,789
/*
* Copyright 2011 Christoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef __NV50_IR_UTIL_H__
#define __NV50_IR_UTIL_H__
 
#include <new>
#include <assert.h>
#include <stdio.h>
#include <memory>
#include <map>
 
#ifndef NDEBUG
# include <typeinfo>
#endif
 
#include "util/u_inlines.h"
#include "util/u_memory.h"
 
#define ERROR(args...) debug_printf("ERROR: " args)
#define WARN(args...) debug_printf("WARNING: " args)
#define INFO(args...) debug_printf(args)
 
#define INFO_DBG(m, f, args...) \
do { \
if (m & NV50_IR_DEBUG_##f) \
debug_printf(args); \
} while(0)
 
#define FATAL(args...) \
do { \
fprintf(stderr, args); \
abort(); \
} while(0)
 
 
#define NV50_IR_FUNC_ALLOC_OBJ_DEF(obj, f, args...) \
new ((f)->getProgram()->mem_##obj.allocate()) obj(f, args)
 
#define new_Instruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(Instruction, f, args)
#define new_CmpInstruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(CmpInstruction, f, args)
#define new_TexInstruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(TexInstruction, f, args)
#define new_FlowInstruction(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(FlowInstruction, f, args)
 
#define new_LValue(f, args...) \
NV50_IR_FUNC_ALLOC_OBJ_DEF(LValue, f, args)
 
 
#define NV50_IR_PROG_ALLOC_OBJ_DEF(obj, p, args...) \
new ((p)->mem_##obj.allocate()) obj(p, args)
 
#define new_Symbol(p, args...) \
NV50_IR_PROG_ALLOC_OBJ_DEF(Symbol, p, args)
#define new_ImmediateValue(p, args...) \
NV50_IR_PROG_ALLOC_OBJ_DEF(ImmediateValue, p, args)
 
 
#define delete_Instruction(p, insn) (p)->releaseInstruction(insn)
#define delete_Value(p, val) (p)->releaseValue(val)
 
 
namespace nv50_ir {
 
class Iterator
{
public:
virtual ~Iterator() { };
virtual void next() = 0;
virtual void *get() const = 0;
virtual bool end() const = 0; // if true, get will return 0
virtual void reset() { assert(0); } // only for graph iterators
};
 
typedef std::auto_ptr<Iterator> IteratorRef;
 
class ManipIterator : public Iterator
{
public:
virtual bool insert(void *) = 0; // insert after current position
virtual void erase() = 0;
};
 
// WARNING: do not use a->prev/next for __item or __list
 
#define DLLIST_DEL(__item) \
do { \
(__item)->prev->next = (__item)->next; \
(__item)->next->prev = (__item)->prev; \
(__item)->next = (__item); \
(__item)->prev = (__item); \
} while(0)
 
#define DLLIST_ADDTAIL(__list, __item) \
do { \
(__item)->next = (__list); \
(__item)->prev = (__list)->prev; \
(__list)->prev->next = (__item); \
(__list)->prev = (__item); \
} while(0)
 
#define DLLIST_ADDHEAD(__list, __item) \
do { \
(__item)->prev = (__list); \
(__item)->next = (__list)->next; \
(__list)->next->prev = (__item); \
(__list)->next = (__item); \
} while(0)
 
#define DLLIST_MERGE(__listA, __listB, ty) \
do { \
ty prevB = (__listB)->prev; \
(__listA)->prev->next = (__listB); \
(__listB)->prev->next = (__listA); \
(__listB)->prev = (__listA)->prev; \
(__listA)->prev = prevB; \
} while(0)
 
#define DLLIST_EMPTY(__list) ((__list)->next == (__list))
 
#define DLLIST_FOR_EACH(list, it) \
for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next())
 
class DLList
{
public:
class Item
{
public:
Item(void *priv) : next(this), prev(this), data(priv) { }
 
public:
Item *next;
Item *prev;
void *data;
};
 
DLList() : head(0) { }
~DLList() { clear(); }
 
inline void insertHead(void *data)
{
Item *item = new Item(data);
 
assert(data);
 
item->prev = &head;
item->next = head.next;
head.next->prev = item;
head.next = item;
}
 
inline void insertTail(void *data)
{
Item *item = new Item(data);
 
assert(data);
 
DLLIST_ADDTAIL(&head, item);
}
 
inline void insert(void *data) { insertTail(data); }
 
void clear();
 
class Iterator : public ManipIterator
{
public:
Iterator(Item *head, bool r) : rev(r), pos(r ? head->prev : head->next),
term(head) { }
 
virtual void next() { if (!end()) pos = rev ? pos->prev : pos->next; }
virtual void *get() const { return pos->data; }
virtual bool end() const { return pos == term; }
 
// caution: if you're at end-2 and erase it, then do next, you're at end
virtual void erase();
virtual bool insert(void *data);
 
// move item to a another list, no consistency with its iterators though
void moveToList(DLList&);
 
private:
const bool rev;
Item *pos;
Item *term;
 
friend class DLList;
};
 
inline void erase(Iterator& pos)
{
pos.erase();
}
 
Iterator iterator()
{
return Iterator(&head, false);
}
 
Iterator revIterator()
{
return Iterator(&head, true);
}
 
private:
Item head;
};
 
class Stack
{
public:
class Item {
public:
union {
void *p;
int i;
unsigned int u;
float f;
double d;
} u;
 
Item() { memset(&u, 0, sizeof(u)); }
};
 
Stack() : size(0), limit(0), array(0) { }
~Stack() { if (array) FREE(array); }
 
inline void push(int i) { Item data; data.u.i = i; push(data); }
inline void push(unsigned int u) { Item data; data.u.u = u; push(data); }
inline void push(void *p) { Item data; data.u.p = p; push(data); }
inline void push(float f) { Item data; data.u.f = f; push(data); }
 
inline void push(Item data)
{
if (size == limit)
resize();
array[size++] = data;
}
 
inline Item pop()
{
if (!size) {
Item data;
assert(0);
return data;
}
return array[--size];
}
 
inline unsigned int getSize() { return size; }
 
inline Item& peek() { assert(size); return array[size - 1]; }
 
void clear(bool releaseStorage = false)
{
if (releaseStorage && array)
FREE(array);
size = limit = 0;
}
 
void moveTo(Stack&); // move all items to target (not like push(pop()))
 
private:
void resize()
{
unsigned int sizeOld, sizeNew;
 
sizeOld = limit * sizeof(Item);
limit = MAX2(4, limit + limit);
sizeNew = limit * sizeof(Item);
 
array = (Item *)REALLOC(array, sizeOld, sizeNew);
}
 
unsigned int size;
unsigned int limit;
Item *array;
};
 
class DynArray
{
public:
class Item
{
public:
union {
uint32_t u32;
void *p;
};
};
 
DynArray() : data(NULL), size(0) { }
 
~DynArray() { if (data) FREE(data); }
 
inline Item& operator[](unsigned int i)
{
if (i >= size)
resize(i);
return data[i];
}
 
inline const Item operator[](unsigned int i) const
{
return data[i];
}
 
void resize(unsigned int index)
{
const unsigned int oldSize = size * sizeof(Item);
 
if (!size)
size = 8;
while (size <= index)
size <<= 1;
 
data = (Item *)REALLOC(data, oldSize, size * sizeof(Item));
}
 
void clear()
{
FREE(data);
data = NULL;
size = 0;
}
 
private:
Item *data;
unsigned int size;
};
 
class ArrayList
{
public:
ArrayList() : size(0) { }
 
void insert(void *item, int& id)
{
id = ids.getSize() ? ids.pop().u.i : size++;
data[id].p = item;
}
 
void remove(int& id)
{
const unsigned int uid = id;
assert(uid < size && data[id].p);
ids.push(uid);
data[uid].p = NULL;
id = -1;
}
 
inline int getSize() const { return size; }
 
inline void *get(unsigned int id) { assert(id < size); return data[id].p; }
 
class Iterator : public nv50_ir::Iterator
{
public:
Iterator(const ArrayList *array) : pos(0), data(array->data)
{
size = array->getSize();
if (size)
nextValid();
}
 
void nextValid() { while ((pos < size) && !data[pos].p) ++pos; }
 
void next() { if (pos < size) { ++pos; nextValid(); } }
void *get() const { assert(pos < size); return data[pos].p; }
bool end() const { return pos >= size; }
 
private:
unsigned int pos;
unsigned int size;
const DynArray& data;
 
friend class ArrayList;
};
 
Iterator iterator() const { return Iterator(this); }
 
void clear()
{
data.clear();
ids.clear(true);
size = 0;
}
 
private:
DynArray data;
Stack ids;
unsigned int size;
};
 
class Interval
{
public:
Interval() : head(0), tail(0) { }
Interval(const Interval&);
~Interval();
 
bool extend(int, int);
void insert(const Interval&);
void unify(Interval&); // clears source interval
void clear();
 
inline int begin() const { return head ? head->bgn : -1; }
inline int end() const { checkTail(); return tail ? tail->end : -1; }
inline bool isEmpty() const { return !head; }
bool overlaps(const Interval&) const;
bool contains(int pos) const;
 
inline int extent() const { return end() - begin(); }
int length() const;
 
void print() const;
 
inline void checkTail() const;
 
private:
class Range
{
public:
Range(int a, int b) : next(0), bgn(a), end(b) { }
 
Range *next;
int bgn;
int end;
 
void coalesce(Range **ptail)
{
Range *rnn;
 
while (next && end >= next->bgn) {
assert(bgn <= next->bgn);
rnn = next->next;
end = MAX2(end, next->end);
delete next;
next = rnn;
}
if (!next)
*ptail = this;
}
};
 
Range *head;
Range *tail;
};
 
class BitSet
{
public:
BitSet() : marker(false), data(0), size(0) { }
BitSet(unsigned int nBits, bool zero) : marker(false), data(0), size(0)
{
allocate(nBits, zero);
}
~BitSet()
{
if (data)
FREE(data);
}
 
// allocate will keep old data iff size is unchanged
bool allocate(unsigned int nBits, bool zero);
bool resize(unsigned int nBits); // keep old data, zero additional bits
 
inline unsigned int getSize() const { return size; }
 
void fill(uint32_t val);
 
void setOr(BitSet *, BitSet *); // second BitSet may be NULL
 
inline void set(unsigned int i)
{
assert(i < size);
data[i / 32] |= 1 << (i % 32);
}
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
inline void setRange(unsigned int i, unsigned int n)
{
assert((i + n) <= size && (((i % 32) + n) <= 32));
data[i / 32] |= ((1 << n) - 1) << (i % 32);
}
inline void setMask(unsigned int i, uint32_t m)
{
assert(i < size);
data[i / 32] |= m;
}
 
inline void clr(unsigned int i)
{
assert(i < size);
data[i / 32] &= ~(1 << (i % 32));
}
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
inline void clrRange(unsigned int i, unsigned int n)
{
assert((i + n) <= size && (((i % 32) + n) <= 32));
data[i / 32] &= ~(((1 << n) - 1) << (i % 32));
}
 
inline bool test(unsigned int i) const
{
assert(i < size);
return data[i / 32] & (1 << (i % 32));
}
// NOTE: range may not cross 32 bit boundary (implies n <= 32)
inline bool testRange(unsigned int i, unsigned int n) const
{
assert((i + n) <= size && (((i % 32) + n) <= 32));
return data[i / 32] & (((1 << n) - 1) << (i % 32));
}
 
// Find a range of size (<= 32) clear bits aligned to roundup_pow2(size).
int findFreeRange(unsigned int size) const;
 
BitSet& operator|=(const BitSet&);
 
BitSet& operator=(const BitSet& set)
{
assert(data && set.data);
assert(size == set.size);
memcpy(data, set.data, (set.size + 7) / 8);
return *this;
}
 
void andNot(const BitSet&);
 
// bits = (bits | setMask) & ~clrMask
inline void periodicMask32(uint32_t setMask, uint32_t clrMask)
{
for (unsigned int i = 0; i < (size + 31) / 32; ++i)
data[i] = (data[i] | setMask) & ~clrMask;
}
 
unsigned int popCount() const;
 
void print() const;
 
public:
bool marker; // for user
 
private:
uint32_t *data;
unsigned int size;
};
 
void Interval::checkTail() const
{
#if NV50_DEBUG & NV50_DEBUG_PROG_RA
Range *r = head;
while (r->next)
r = r->next;
assert(tail == r);
#endif
}
 
class MemoryPool
{
private:
inline bool enlargeAllocationsArray(const unsigned int id, unsigned int nr)
{
const unsigned int size = sizeof(uint8_t *) * id;
const unsigned int incr = sizeof(uint8_t *) * nr;
 
uint8_t **alloc = (uint8_t **)REALLOC(allocArray, size, size + incr);
if (!alloc)
return false;
allocArray = alloc;
return true;
}
 
inline bool enlargeCapacity()
{
const unsigned int id = count >> objStepLog2;
 
uint8_t *const mem = (uint8_t *)MALLOC(objSize << objStepLog2);
if (!mem)
return false;
 
if (!(id % 32)) {
if (!enlargeAllocationsArray(id, 32)) {
FREE(mem);
return false;
}
}
allocArray[id] = mem;
return true;
}
 
public:
MemoryPool(unsigned int size, unsigned int incr) : objSize(size),
objStepLog2(incr)
{
allocArray = NULL;
released = NULL;
count = 0;
}
 
~MemoryPool()
{
unsigned int allocCount = (count + (1 << objStepLog2) - 1) >> objStepLog2;
for (unsigned int i = 0; i < allocCount && allocArray[i]; ++i)
FREE(allocArray[i]);
if (allocArray)
FREE(allocArray);
}
 
void *allocate()
{
void *ret;
const unsigned int mask = (1 << objStepLog2) - 1;
 
if (released) {
ret = released;
released = *(void **)released;
return ret;
}
 
if (!(count & mask))
if (!enlargeCapacity())
return NULL;
 
ret = allocArray[count >> objStepLog2] + (count & mask) * objSize;
++count;
return ret;
}
 
void release(void *ptr)
{
*(void **)ptr = released;
released = ptr;
}
 
private:
uint8_t **allocArray; // array (list) of MALLOC allocations
 
void *released; // list of released objects
 
unsigned int count; // highest allocated object
 
const unsigned int objSize;
const unsigned int objStepLog2;
};
 
/**
* Composite object cloning policy.
*
* Encapsulates how sub-objects are to be handled (if at all) when a
* composite object is being cloned.
*/
template<typename C>
class ClonePolicy
{
protected:
C *c;
 
public:
ClonePolicy(C *c) : c(c) {}
 
C *context() { return c; }
 
template<typename T> T *get(T *obj)
{
void *clone = lookup(obj);
if (!clone)
clone = obj->clone(*this);
return reinterpret_cast<T *>(clone);
}
 
template<typename T> void set(const T *obj, T *clone)
{
insert(obj, clone);
}
 
protected:
virtual void *lookup(void *obj) = 0;
virtual void insert(const void *obj, void *clone) = 0;
};
 
/**
* Shallow non-recursive cloning policy.
*
* Objects cloned with the "shallow" policy don't clone their
* children recursively, instead, the new copy shares its children
* with the original object.
*/
template<typename C>
class ShallowClonePolicy : public ClonePolicy<C>
{
public:
ShallowClonePolicy(C *c) : ClonePolicy<C>(c) {}
 
protected:
virtual void *lookup(void *obj)
{
return obj;
}
 
virtual void insert(const void *obj, void *clone)
{
}
};
 
template<typename C, typename T>
inline T *cloneShallow(C *c, T *obj)
{
ShallowClonePolicy<C> pol(c);
return obj->clone(pol);
}
 
/**
* Recursive cloning policy.
*
* Objects cloned with the "deep" policy clone their children
* recursively, keeping track of what has already been cloned to
* avoid making several new copies of the same object.
*/
template<typename C>
class DeepClonePolicy : public ClonePolicy<C>
{
public:
DeepClonePolicy(C *c) : ClonePolicy<C>(c) {}
 
private:
std::map<const void *, void *> map;
 
protected:
virtual void *lookup(void *obj)
{
return map[obj];
}
 
virtual void insert(const void *obj, void *clone)
{
map[obj] = clone;
}
};
 
template<typename S, typename T>
struct bimap
{
std::map<S, T> forth;
std::map<T, S> back;
 
public:
bimap() : l(back), r(forth) { }
bimap(const bimap<S, T> &m)
: forth(m.forth), back(m.back), l(back), r(forth) { }
 
void insert(const S &s, const T &t)
{
forth.insert(std::make_pair(s, t));
back.insert(std::make_pair(t, s));
}
 
typedef typename std::map<T, S>::const_iterator l_iterator;
const std::map<T, S> &l;
typedef typename std::map<S, T>::const_iterator r_iterator;
const std::map<S, T> &r;
};
 
} // namespace nv50_ir
 
#endif // __NV50_IR_UTIL_H__