Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
//
2
// DIV U32
3
//
4
// UNR recurrence (q = a / b):
5
// look for z such that 2^32 - b <= b * z < 2^32
6
// then q - 1 <= (a * z) / 2^32 <= q
7
//
8
// INPUT:   $r0: dividend, $r1: divisor
9
// OUTPUT:  $r0: result, $r1: modulus
10
// CLOBBER: $r2 - $r3, $p0 - $p1
11
// SIZE:    22 / 14 * 8 bytes
12
//
13
bfind u32 $r2 $r1
14
xor b32 $r2 $r2 0x1f
15
mov b32 $r3 0x1
16
shl b32 $r2 $r3 clamp $r2
17
cvt u32 $r1 neg u32 $r1
18
mul $r3 u32 $r1 u32 $r2
19
add $r2 (mul high u32 $r2 u32 $r3) $r2
20
mul $r3 u32 $r1 u32 $r2
21
add $r2 (mul high u32 $r2 u32 $r3) $r2
22
mul $r3 u32 $r1 u32 $r2
23
add $r2 (mul high u32 $r2 u32 $r3) $r2
24
mul $r3 u32 $r1 u32 $r2
25
add $r2 (mul high u32 $r2 u32 $r3) $r2
26
mul $r3 u32 $r1 u32 $r2
27
add $r2 (mul high u32 $r2 u32 $r3) $r2
28
mov b32 $r3 $r0
29
mul high $r0 u32 $r0 u32 $r2
30
cvt u32 $r2 neg u32 $r1
31
add $r1 (mul u32 $r1 u32 $r0) $r3
32
set $p0 0x1 ge u32 $r1 $r2
33
$p0 sub b32 $r1 $r1 $r2
34
$p0 add b32 $r0 $r0 0x1
35
$p0 set $p0 0x1 ge u32 $r1 $r2
36
$p0 sub b32 $r1 $r1 $r2
37
$p0 add b32 $r0 $r0 0x1
38
ret
39
//
40
// DIV S32, like DIV U32 after taking ABS(inputs)
41
//
42
// INPUT:   $r0: dividend, $r1: divisor
43
// OUTPUT:  $r0: result, $r1: modulus
44
// CLOBBER: $r2 - $r3, $p0 - $p3
45
//
46
set $p2 0x1 lt s32 $r0 0x0
47
set $p3 0x1 lt s32 $r1 0x0 xor $p2
48
cvt s32 $r0 abs s32 $r0
49
cvt s32 $r1 abs s32 $r1
50
bfind u32 $r2 $r1
51
xor b32 $r2 $r2 0x1f
52
mov b32 $r3 0x1
53
shl b32 $r2 $r3 clamp $r2
54
cvt u32 $r1 neg u32 $r1
55
mul $r3 u32 $r1 u32 $r2
56
add $r2 (mul high u32 $r2 u32 $r3) $r2
57
mul $r3 u32 $r1 u32 $r2
58
add $r2 (mul high u32 $r2 u32 $r3) $r2
59
mul $r3 u32 $r1 u32 $r2
60
add $r2 (mul high u32 $r2 u32 $r3) $r2
61
mul $r3 u32 $r1 u32 $r2
62
add $r2 (mul high u32 $r2 u32 $r3) $r2
63
mul $r3 u32 $r1 u32 $r2
64
add $r2 (mul high u32 $r2 u32 $r3) $r2
65
mov b32 $r3 $r0
66
mul high $r0 u32 $r0 u32 $r2
67
cvt u32 $r2 neg u32 $r1
68
add $r1 (mul u32 $r1 u32 $r0) $r3
69
set $p0 0x1 ge u32 $r1 $r2
70
$p0 sub b32 $r1 $r1 $r2
71
$p0 add b32 $r0 $r0 0x1
72
$p0 set $p0 0x1 ge u32 $r1 $r2
73
$p0 sub b32 $r1 $r1 $r2
74
$p0 add b32 $r0 $r0 0x1
75
$p3 cvt s32 $r0 neg s32 $r0
76
$p2 cvt s32 $r1 neg s32 $r1
77
ret
78
//
79
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
80
//
81
// INPUT:   $r0d (x)
82
// OUTPUT:  $r0d (rcp(x))
83
// CLOBBER: $r2 - $r7
84
// SIZE:    9 * 8 bytes
85
//
86
nop
87
ret
88
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
89
//
90
// INPUT:   $r0d (x)
91
// OUTPUT:  $r0d (rsqrt(x))
92
// CLOBBER: $r2 - $r7
93
// SIZE:    14 * 8 bytes
94
//
95
nop
96
ret