-
Notifications
You must be signed in to change notification settings - Fork 17.9k
/
Copy pathsqrt.go
153 lines (147 loc) · 5.18 KB
/
sqrt.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Copy of math/sqrt.go, here for use by ARM softfloat.
// Modified to not use any floating point arithmetic so
// that we don't clobber any floating-point registers
// while emulating the sqrt instruction.
package runtime
// The original C code and the long comment below are
// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and
// came with this notice. The go code is a simplified
// version of the original C.
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
//
// __ieee754_sqrt(x)
// Return correctly rounded sqrt.
// -----------------------------------------
// | Use the hardware sqrt if you have one |
// -----------------------------------------
// Method:
// Bit by bit method using integer arithmetic. (Slow, but portable)
// 1. Normalization
// Scale x to y in [1,4) with even powers of 2:
// find an integer k such that 1 <= (y=x*2**(2k)) < 4, then
// sqrt(x) = 2**k * sqrt(y)
// 2. Bit by bit computation
// Let q = sqrt(y) truncated to i bit after binary point (q = 1),
// i 0
// i+1 2
// s = 2*q , and y = 2 * ( y - q ). (1)
// i i i i
//
// To compute q from q , one checks whether
// i+1 i
//
// -(i+1) 2
// (q + 2 ) <= y. (2)
// i
// -(i+1)
// If (2) is false, then q = q ; otherwise q = q + 2 .
// i+1 i i+1 i
//
// With some algebraic manipulation, it is not difficult to see
// that (2) is equivalent to
// -(i+1)
// s + 2 <= y (3)
// i i
//
// The advantage of (3) is that s and y can be computed by
// i i
// the following recurrence formula:
// if (3) is false
//
// s = s , y = y ; (4)
// i+1 i i+1 i
//
// otherwise,
// -i -(i+1)
// s = s + 2 , y = y - s - 2 (5)
// i+1 i i+1 i i
//
// One may easily use induction to prove (4) and (5).
// Note. Since the left hand side of (3) contain only i+2 bits,
// it does not necessary to do a full (53-bit) comparison
// in (3).
// 3. Final rounding
// After generating the 53 bits result, we compute one more bit.
// Together with the remainder, we can decide whether the
// result is exact, bigger than 1/2ulp, or less than 1/2ulp
// (it will never equal to 1/2ulp).
// The rounding mode can be detected by checking whether
// huge + tiny is equal to huge, and whether huge - tiny is
// equal to huge for some floating point number "huge" and "tiny".
//
//
// Notes: Rounding mode detection omitted.
const (
float64Mask = 0x7FF
float64Shift = 64 - 11 - 1
float64Bias = 1023
float64NaN = 0x7FF8000000000001
float64Inf = 0x7FF0000000000000
maxFloat64 = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52
)
// isnanu returns whether ix represents a NaN floating point number.
func isnanu(ix uint64) bool {
exp := (ix >> float64Shift) & float64Mask
sig := ix << (64 - float64Shift) >> (64 - float64Shift)
return exp == float64Mask && sig != 0
}
func sqrt(ix uint64) uint64 {
// special cases
switch {
case ix == 0 || ix == 1<<63: // x == 0
return ix
case isnanu(ix): // x != x
return ix
case ix&(1<<63) != 0: // x < 0
return float64NaN
case ix == float64Inf: // x > MaxFloat
return ix
}
// normalize x
exp := int((ix >> float64Shift) & float64Mask)
if exp == 0 { // subnormal x
for ix&1<<float64Shift == 0 {
ix <<= 1
exp--
}
exp++
}
exp -= float64Bias // unbias exponent
ix &^= float64Mask << float64Shift
ix |= 1 << float64Shift
if exp&1 == 1 { // odd exp, double x to make it even
ix <<= 1
}
exp >>= 1 // exp = exp/2, exponent of square root
// generate sqrt(x) bit by bit
ix <<= 1
var q, s uint64 // q = sqrt(x)
r := uint64(1 << (float64Shift + 1)) // r = moving bit from MSB to LSB
for r != 0 {
t := s + r
if t <= ix {
s = t + r
ix -= t
q += r
}
ix <<= 1
r >>= 1
}
// final rounding
if ix != 0 { // remainder, result not exact
q += q & 1 // round according to extra bit
}
ix = q>>1 + uint64(exp-1+float64Bias)<<float64Shift // significand + biased exponent
return ix
}