-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for new float64 representation
Numbits can be constructed from float64 and can be losslessly converted back to float64. The current fast branchless conversion is possible due to a nerd-snipe of @Merovius. He also threw it a godbolt and gave it some scrutiny. Thanks, Axel!
- Loading branch information
1 parent
6fb7cd4
commit 6a81b27
Showing
4 changed files
with
275 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
//go:build !go1.20 | ||
|
||
package quamina | ||
|
||
// code below is copied and slightly adapted from Go 1.20+ | ||
|
||
// compare returns | ||
// | ||
// -1 if x is less than y, | ||
// 0 if x equals y, | ||
// +1 if x is greater than y. | ||
// | ||
// For floating-point types, a NaN is considered less than any non-NaN, | ||
// a NaN is considered equal to a NaN, and -0.0 is equal to 0.0. | ||
func compare[T go_1_19_Ordered](x, y T) int { | ||
xNaN := go_1_19_isNaN(x) | ||
yNaN := go_1_19_isNaN(y) | ||
if xNaN && yNaN { | ||
return 0 | ||
} | ||
if xNaN || x < y { | ||
return -1 | ||
} | ||
if yNaN || x > y { | ||
return +1 | ||
} | ||
return 0 | ||
} | ||
|
||
// Ordered is a constraint that permits any ordered type: any type | ||
// that supports the operators < <= >= >. | ||
// If future releases of Go add new ordered types, | ||
// this constraint will be modified to include them. | ||
// | ||
// Note that floating-point types may contain NaN ("not-a-number") values. | ||
// An operator such as == or < will always report false when | ||
// comparing a NaN value with any other value, NaN or not. | ||
// See the [Compare] function for a consistent way to compare NaN values. | ||
type go_1_19_Ordered interface { | ||
~int | ~int8 | ~int16 | ~int32 | ~int64 | | ||
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | | ||
~float32 | ~float64 | | ||
~string | ||
} | ||
|
||
// isNaN reports whether x is a NaN without requiring the math package. | ||
// This will always return false if T is not floating-point. | ||
func go_1_19_isNaN[T go_1_19_Ordered](x T) bool { | ||
return x != x | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
//go:build go1.20 | ||
|
||
package quamina | ||
|
||
import "cmp" | ||
|
||
// TODO: when Go 1.19 support is dropped, replace invocations with cmp.Compare directly. | ||
|
||
func compare[T cmp.Ordered](x, y T) int { | ||
return cmp.Compare[T](x, y) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package quamina | ||
|
||
import ( | ||
"encoding/binary" | ||
"math" | ||
) | ||
|
||
// float64 are stored as (sign | exponent | mantissa) | ||
// with 1 bit sign, 11 bits exponent, 52 bits mantissa | ||
const ( | ||
maskSign uint64 = 1 << 63 | ||
maskExponent uint64 = 0b11111111111 << 52 | ||
maskMantissa uint64 = ^uint64(0) >> 12 | ||
) | ||
|
||
// Numbits representation of some boundary values. | ||
const ( | ||
numbitsZero = Numbits(maskSign) | ||
numbitsNegZero = numbitsZero - 1 | ||
numbitsNegInf = Numbits(maskMantissa) | ||
numbitsPosInf = Numbits(maskSign | maskExponent) | ||
numbitsNormalizedNaN = numbitsNegInf - 1 | ||
) | ||
|
||
// Numbits is an alternative binary representation of float64 numbers. | ||
// They can be represented as [8]byte or as string and can be created from | ||
// these representations. | ||
// All possible float64 values are representable as Numbits. | ||
// | ||
// The comparability differs from cmp.Compare for float64, though: | ||
// - 0.0 and -0.0 are not equal. | ||
// - NaNs are equal if their representation as bits is equal. | ||
// - NaNs can be either larger than Infinity | ||
// or smaller than -Infinity (depending on the sign bit). | ||
// - use Normalize() to align the comparability. | ||
type Numbits uint64 | ||
|
||
// NumbitsFromFloat64 converts a float64 value to its Numbits representation. | ||
func NumbitsFromFloat64(f float64) Numbits { | ||
u := math.Float64bits(f) | ||
// transform without branching (inverse of Numbits.Float64): | ||
// if high bit is 0, xor with sign bit 1 << 63, else negate (xor with ^0) | ||
mask := (u>>63)*^uint64(0) | (1 << 63) | ||
return Numbits(u ^ mask) | ||
} | ||
|
||
// NumbitsFromBytes converts a [8]byte value to its Numbits representation. | ||
func NumbitsFromBytes(b [8]byte) Numbits { | ||
return Numbits(binary.BigEndian.Uint64(b[:])) | ||
} | ||
|
||
// NumbitsFromBinaryString converts a string value created by BinaryString to its Numbits representation. | ||
// It uses the first 8 bytes from the string and panics if it is shorter. | ||
func NumbitsFromBinaryString(s string) Numbits { | ||
// This code could use slice to array conversion, but at implementation time, | ||
// quamina still supported Go 1.19. The feature was introduced in 1.20. | ||
return Numbits(binary.BigEndian.Uint64([]byte(s[:8]))) | ||
} | ||
|
||
// Float64 converts Numbits back to its float64 representation | ||
func (n Numbits) Float64() float64 { | ||
u := uint64(n) | ||
// transform without branching (inverse of NumbitsFromFloat64): | ||
// if high bit is 1, xor with sign bit 1 << 63, else negate (xor with ^0) | ||
mask := (1-(u>>63))*^uint64(0) | (1 << 63) | ||
return math.Float64frombits(u ^ mask) | ||
} | ||
|
||
// Normalize the value to align the comparability to cmp.Compare. | ||
// | ||
// Normalization only affects -0.0 (converted to 0.0) and NaN (all converted to the same representation). | ||
func (n Numbits) Normalize() Numbits { | ||
if n == numbitsNegZero { | ||
return numbitsZero | ||
} | ||
if n < numbitsNegInf || numbitsPosInf < n { | ||
return numbitsNormalizedNaN | ||
} | ||
return n | ||
} | ||
|
||
// IsFinite returns true iff n is not infinite or NaN. | ||
func (n Numbits) IsFinite() bool { | ||
return numbitsNegInf < n && n < numbitsPosInf | ||
} | ||
|
||
// Bytes retrieves a representation as [8]byte. | ||
// The returned bytes are in big-endian order. | ||
func (n Numbits) Bytes() [8]byte { | ||
var b [8]byte | ||
binary.BigEndian.PutUint64(b[:], uint64(n)) | ||
return b | ||
} | ||
|
||
// BinaryString retrieves a lexically ordered string representation. | ||
func (n Numbits) BinaryString() string { | ||
b := n.Bytes() | ||
return string(b[:]) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package quamina | ||
|
||
import ( | ||
"cmp" | ||
"math" | ||
"testing" | ||
) | ||
|
||
var ( | ||
// special case, compiler does not create it when writing -0.0 | ||
f64_negZero = math.Float64frombits(0b1_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000000) | ||
|
||
// boundaries of floating point value ranges | ||
f64_zero = math.Float64frombits(0b0_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000000) | ||
f64_subnormLo = math.Float64frombits(0b0_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000001) | ||
f64_subnormHi = math.Float64frombits(0b0_00000000000_1111_11111111_11111111_11111111_11111111_11111111_11111111) | ||
f64_normLoLo = math.Float64frombits(0b0_00000000001_0000_00000000_00000000_00000000_00000000_00000000_00000000) | ||
f64_normLoHi = math.Float64frombits(0b0_00000000001_1111_11111111_11111111_11111111_11111111_11111111_11111111) | ||
f64_normHiLo = math.Float64frombits(0b0_11111111110_0000_00000000_00000000_00000000_00000000_00000000_00000000) | ||
f64_normHiHi = math.Float64frombits(0b0_11111111110_1111_11111111_11111111_11111111_11111111_11111111_11111111) | ||
f64_inf = math.Float64frombits(0b0_11111111111_0000_00000000_00000000_00000000_00000000_00000000_00000000) | ||
f64_nanLo = math.Float64frombits(0b0_11111111111_0000_00000000_00000000_00000000_00000000_00000000_00000001) | ||
f64_nanHi = math.Float64frombits(0b0_11111111111_1111_11111111_11111111_11111111_11111111_11111111_11111111) | ||
|
||
// named values including boundaries | ||
values = func(positive map[string]float64) map[string]float64 { | ||
// this function mirrors the values to negative | ||
const sign uint64 = 1 << 63 | ||
m2 := make(map[string]float64, len(positive)*2) | ||
for n, v := range positive { | ||
m2[n] = v | ||
m2["negative "+n] = math.Float64frombits(math.Float64bits(v) | sign) | ||
} | ||
return m2 | ||
}(map[string]float64{ | ||
"zero": f64_zero, | ||
"subnormal;lo": f64_subnormLo, | ||
"subnormal;hi": f64_subnormHi, | ||
"normal;lo-exp,lo-mant": f64_normLoLo, | ||
"normal;lo-exp,hi-mant": f64_normLoHi, | ||
"normal;hi-exp,lo-mant": f64_normHiLo, | ||
"normal;hi-exp,hi-mant": f64_normHiHi, | ||
"infinity": f64_inf, | ||
"NaN;lo": f64_nanLo, | ||
"NaN;hi": f64_nanHi, | ||
"0.1": 0.1, | ||
"1.0": 1.0, | ||
"pi": math.Pi, | ||
}) | ||
) | ||
|
||
func TestNumbits(t *testing.T) { | ||
// roundtrips for various values - creation and conversion | ||
normalNaN := NumbitsFromFloat64(math.NaN()).Normalize() | ||
for n, f := range values { | ||
t.Run(n, func(t *testing.T) { | ||
got := NumbitsFromFloat64(f) | ||
if f2 := got.Float64(); cmp.Compare(f2, f) != 0 { | ||
t.Errorf("NumbitsFromFloat64().Float64() = %v, want %v", f2, f) | ||
} | ||
if bin := got.Bytes(); NumbitsFromBytes(bin) != got { | ||
t.Errorf("NumbitsFromBytes().Bytes() = %x, want %x", got, bin) | ||
} | ||
if str := got.BinaryString(); NumbitsFromBinaryString(str) != got { | ||
t.Errorf("NumbitsFromBytes().BinaryString() = %x, want %x", got, str) | ||
} | ||
if math.IsNaN(f) && got.Normalize() != normalNaN { | ||
t.Errorf("Normalize for NaN failed") | ||
} | ||
if got.IsFinite() == (math.IsNaN(f) || math.IsInf(f, 0)) { | ||
t.Errorf("IsFinite failed for %v", f) | ||
} | ||
}) | ||
} | ||
t.Run("neg-zero_to_zero", func(t *testing.T) { | ||
negZero := NumbitsFromFloat64(f64_negZero) | ||
normZero := negZero.Normalize() | ||
if normZero == negZero { | ||
t.Errorf("Normalize for -0.0 failed") | ||
} | ||
if negZero.Float64() != 0 || normZero.Float64() != 0 { | ||
t.Errorf("0.0 representation error") | ||
} | ||
}) | ||
} | ||
|
||
func TestNumbits_Compare(t *testing.T) { | ||
for n1, f1 := range values { | ||
v1 := NumbitsFromFloat64(f1) | ||
nanf1 := math.IsNaN(f1) | ||
for n2, f2 := range values { | ||
// redefine in scope so v1 can be changed without changing the outer one | ||
v1 := v1 | ||
v2 := NumbitsFromFloat64(f2) | ||
nanf2 := math.IsNaN(f2) | ||
order := compare(f1, f2) | ||
if o := compare(v1.Float64(), v2.Float64()); order != o { | ||
t.Errorf("%v->%v: comparison after Float64() failed: want %v, got %v", n1, n2, order, o) | ||
} | ||
if nanf1 || (f1 == 0 && f2 == 0) { | ||
v1 = v1.Normalize() | ||
} | ||
if nanf2 || (f1 == 0 && f2 == 0) { | ||
v2 = v2.Normalize() | ||
} | ||
b1, b2 := v1.BinaryString(), v2.BinaryString() | ||
if o := compare(v1, v2); order != o { | ||
t.Errorf("%v->%v: direct comparison of Numbits failed: want %v, got %v for %x -> %x", n1, n2, order, o, b1, b2) | ||
} | ||
if o := compare(v1.BinaryString(), v2.BinaryString()); order != o { | ||
t.Errorf("%v->%v: comparison after BinaryString() failed: want %v, got %v for %x -> %x", n1, n2, order, o, b1, b2) | ||
} | ||
} | ||
} | ||
} |