From 6a81b27db8ca81ee3f338a0050aa2511181a8221 Mon Sep 17 00:00:00 2001 From: Arne Hormann Date: Sat, 20 Jul 2024 07:51:37 +0200 Subject: [PATCH] Add support for new float64 representation Numbits can be constructed from float64 and can be losslessly converted back to float64. The current fast branchless conversion is possible due to a nerd-snipe of @Merovius. He also threw it a godbolt and gave it some scrutiny. Thanks, Axel! --- numbits-cmp1_19_test.go | 50 +++++++++++++++++ numbits-cmp1_20_test.go | 11 ++++ numbits.go | 99 ++++++++++++++++++++++++++++++++++ numbits_test.go | 115 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 275 insertions(+) create mode 100644 numbits-cmp1_19_test.go create mode 100644 numbits-cmp1_20_test.go create mode 100644 numbits.go create mode 100644 numbits_test.go diff --git a/numbits-cmp1_19_test.go b/numbits-cmp1_19_test.go new file mode 100644 index 0000000..6c2c073 --- /dev/null +++ b/numbits-cmp1_19_test.go @@ -0,0 +1,50 @@ +//go:build !go1.20 + +package quamina + +// code below is copied and slightly adapted from Go 1.20+ + +// compare returns +// +// -1 if x is less than y, +// 0 if x equals y, +// +1 if x is greater than y. +// +// For floating-point types, a NaN is considered less than any non-NaN, +// a NaN is considered equal to a NaN, and -0.0 is equal to 0.0. +func compare[T go_1_19_Ordered](x, y T) int { + xNaN := go_1_19_isNaN(x) + yNaN := go_1_19_isNaN(y) + if xNaN && yNaN { + return 0 + } + if xNaN || x < y { + return -1 + } + if yNaN || x > y { + return +1 + } + return 0 +} + +// Ordered is a constraint that permits any ordered type: any type +// that supports the operators < <= >= >. +// If future releases of Go add new ordered types, +// this constraint will be modified to include them. +// +// Note that floating-point types may contain NaN ("not-a-number") values. +// An operator such as == or < will always report false when +// comparing a NaN value with any other value, NaN or not. +// See the [Compare] function for a consistent way to compare NaN values. +type go_1_19_Ordered interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 | + ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | + ~float32 | ~float64 | + ~string +} + +// isNaN reports whether x is a NaN without requiring the math package. +// This will always return false if T is not floating-point. +func go_1_19_isNaN[T go_1_19_Ordered](x T) bool { + return x != x +} diff --git a/numbits-cmp1_20_test.go b/numbits-cmp1_20_test.go new file mode 100644 index 0000000..b3bd73d --- /dev/null +++ b/numbits-cmp1_20_test.go @@ -0,0 +1,11 @@ +//go:build go1.20 + +package quamina + +import "cmp" + +// TODO: when Go 1.19 support is dropped, replace invocations with cmp.Compare directly. + +func compare[T cmp.Ordered](x, y T) int { + return cmp.Compare[T](x, y) +} diff --git a/numbits.go b/numbits.go new file mode 100644 index 0000000..e28d8a9 --- /dev/null +++ b/numbits.go @@ -0,0 +1,99 @@ +package quamina + +import ( + "encoding/binary" + "math" +) + +// float64 are stored as (sign | exponent | mantissa) +// with 1 bit sign, 11 bits exponent, 52 bits mantissa +const ( + maskSign uint64 = 1 << 63 + maskExponent uint64 = 0b11111111111 << 52 + maskMantissa uint64 = ^uint64(0) >> 12 +) + +// Numbits representation of some boundary values. +const ( + numbitsZero = Numbits(maskSign) + numbitsNegZero = numbitsZero - 1 + numbitsNegInf = Numbits(maskMantissa) + numbitsPosInf = Numbits(maskSign | maskExponent) + numbitsNormalizedNaN = numbitsNegInf - 1 +) + +// Numbits is an alternative binary representation of float64 numbers. +// They can be represented as [8]byte or as string and can be created from +// these representations. +// All possible float64 values are representable as Numbits. +// +// The comparability differs from cmp.Compare for float64, though: +// - 0.0 and -0.0 are not equal. +// - NaNs are equal if their representation as bits is equal. +// - NaNs can be either larger than Infinity +// or smaller than -Infinity (depending on the sign bit). +// - use Normalize() to align the comparability. +type Numbits uint64 + +// NumbitsFromFloat64 converts a float64 value to its Numbits representation. +func NumbitsFromFloat64(f float64) Numbits { + u := math.Float64bits(f) + // transform without branching (inverse of Numbits.Float64): + // if high bit is 0, xor with sign bit 1 << 63, else negate (xor with ^0) + mask := (u>>63)*^uint64(0) | (1 << 63) + return Numbits(u ^ mask) +} + +// NumbitsFromBytes converts a [8]byte value to its Numbits representation. +func NumbitsFromBytes(b [8]byte) Numbits { + return Numbits(binary.BigEndian.Uint64(b[:])) +} + +// NumbitsFromBinaryString converts a string value created by BinaryString to its Numbits representation. +// It uses the first 8 bytes from the string and panics if it is shorter. +func NumbitsFromBinaryString(s string) Numbits { + // This code could use slice to array conversion, but at implementation time, + // quamina still supported Go 1.19. The feature was introduced in 1.20. + return Numbits(binary.BigEndian.Uint64([]byte(s[:8]))) +} + +// Float64 converts Numbits back to its float64 representation +func (n Numbits) Float64() float64 { + u := uint64(n) + // transform without branching (inverse of NumbitsFromFloat64): + // if high bit is 1, xor with sign bit 1 << 63, else negate (xor with ^0) + mask := (1-(u>>63))*^uint64(0) | (1 << 63) + return math.Float64frombits(u ^ mask) +} + +// Normalize the value to align the comparability to cmp.Compare. +// +// Normalization only affects -0.0 (converted to 0.0) and NaN (all converted to the same representation). +func (n Numbits) Normalize() Numbits { + if n == numbitsNegZero { + return numbitsZero + } + if n < numbitsNegInf || numbitsPosInf < n { + return numbitsNormalizedNaN + } + return n +} + +// IsFinite returns true iff n is not infinite or NaN. +func (n Numbits) IsFinite() bool { + return numbitsNegInf < n && n < numbitsPosInf +} + +// Bytes retrieves a representation as [8]byte. +// The returned bytes are in big-endian order. +func (n Numbits) Bytes() [8]byte { + var b [8]byte + binary.BigEndian.PutUint64(b[:], uint64(n)) + return b +} + +// BinaryString retrieves a lexically ordered string representation. +func (n Numbits) BinaryString() string { + b := n.Bytes() + return string(b[:]) +} diff --git a/numbits_test.go b/numbits_test.go new file mode 100644 index 0000000..3da3ed3 --- /dev/null +++ b/numbits_test.go @@ -0,0 +1,115 @@ +package quamina + +import ( + "cmp" + "math" + "testing" +) + +var ( + // special case, compiler does not create it when writing -0.0 + f64_negZero = math.Float64frombits(0b1_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000000) + + // boundaries of floating point value ranges + f64_zero = math.Float64frombits(0b0_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000000) + f64_subnormLo = math.Float64frombits(0b0_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000001) + f64_subnormHi = math.Float64frombits(0b0_00000000000_1111_11111111_11111111_11111111_11111111_11111111_11111111) + f64_normLoLo = math.Float64frombits(0b0_00000000001_0000_00000000_00000000_00000000_00000000_00000000_00000000) + f64_normLoHi = math.Float64frombits(0b0_00000000001_1111_11111111_11111111_11111111_11111111_11111111_11111111) + f64_normHiLo = math.Float64frombits(0b0_11111111110_0000_00000000_00000000_00000000_00000000_00000000_00000000) + f64_normHiHi = math.Float64frombits(0b0_11111111110_1111_11111111_11111111_11111111_11111111_11111111_11111111) + f64_inf = math.Float64frombits(0b0_11111111111_0000_00000000_00000000_00000000_00000000_00000000_00000000) + f64_nanLo = math.Float64frombits(0b0_11111111111_0000_00000000_00000000_00000000_00000000_00000000_00000001) + f64_nanHi = math.Float64frombits(0b0_11111111111_1111_11111111_11111111_11111111_11111111_11111111_11111111) + + // named values including boundaries + values = func(positive map[string]float64) map[string]float64 { + // this function mirrors the values to negative + const sign uint64 = 1 << 63 + m2 := make(map[string]float64, len(positive)*2) + for n, v := range positive { + m2[n] = v + m2["negative "+n] = math.Float64frombits(math.Float64bits(v) | sign) + } + return m2 + }(map[string]float64{ + "zero": f64_zero, + "subnormal;lo": f64_subnormLo, + "subnormal;hi": f64_subnormHi, + "normal;lo-exp,lo-mant": f64_normLoLo, + "normal;lo-exp,hi-mant": f64_normLoHi, + "normal;hi-exp,lo-mant": f64_normHiLo, + "normal;hi-exp,hi-mant": f64_normHiHi, + "infinity": f64_inf, + "NaN;lo": f64_nanLo, + "NaN;hi": f64_nanHi, + "0.1": 0.1, + "1.0": 1.0, + "pi": math.Pi, + }) +) + +func TestNumbits(t *testing.T) { + // roundtrips for various values - creation and conversion + normalNaN := NumbitsFromFloat64(math.NaN()).Normalize() + for n, f := range values { + t.Run(n, func(t *testing.T) { + got := NumbitsFromFloat64(f) + if f2 := got.Float64(); cmp.Compare(f2, f) != 0 { + t.Errorf("NumbitsFromFloat64().Float64() = %v, want %v", f2, f) + } + if bin := got.Bytes(); NumbitsFromBytes(bin) != got { + t.Errorf("NumbitsFromBytes().Bytes() = %x, want %x", got, bin) + } + if str := got.BinaryString(); NumbitsFromBinaryString(str) != got { + t.Errorf("NumbitsFromBytes().BinaryString() = %x, want %x", got, str) + } + if math.IsNaN(f) && got.Normalize() != normalNaN { + t.Errorf("Normalize for NaN failed") + } + if got.IsFinite() == (math.IsNaN(f) || math.IsInf(f, 0)) { + t.Errorf("IsFinite failed for %v", f) + } + }) + } + t.Run("neg-zero_to_zero", func(t *testing.T) { + negZero := NumbitsFromFloat64(f64_negZero) + normZero := negZero.Normalize() + if normZero == negZero { + t.Errorf("Normalize for -0.0 failed") + } + if negZero.Float64() != 0 || normZero.Float64() != 0 { + t.Errorf("0.0 representation error") + } + }) +} + +func TestNumbits_Compare(t *testing.T) { + for n1, f1 := range values { + v1 := NumbitsFromFloat64(f1) + nanf1 := math.IsNaN(f1) + for n2, f2 := range values { + // redefine in scope so v1 can be changed without changing the outer one + v1 := v1 + v2 := NumbitsFromFloat64(f2) + nanf2 := math.IsNaN(f2) + order := compare(f1, f2) + if o := compare(v1.Float64(), v2.Float64()); order != o { + t.Errorf("%v->%v: comparison after Float64() failed: want %v, got %v", n1, n2, order, o) + } + if nanf1 || (f1 == 0 && f2 == 0) { + v1 = v1.Normalize() + } + if nanf2 || (f1 == 0 && f2 == 0) { + v2 = v2.Normalize() + } + b1, b2 := v1.BinaryString(), v2.BinaryString() + if o := compare(v1, v2); order != o { + t.Errorf("%v->%v: direct comparison of Numbits failed: want %v, got %v for %x -> %x", n1, n2, order, o, b1, b2) + } + if o := compare(v1.BinaryString(), v2.BinaryString()); order != o { + t.Errorf("%v->%v: comparison after BinaryString() failed: want %v, got %v for %x -> %x", n1, n2, order, o, b1, b2) + } + } + } +}