Skip to content

Commit

Permalink
Add support for new float64 representation
Browse files Browse the repository at this point in the history
Numbits can be constructed from float64 and can be
losslessly converted back to float64.
The current fast branchless conversion is possible due to
a nerd-snipe of @Merovius. He also threw it a godbolt and
gave it some scrutiny. Thanks, Axel!
  • Loading branch information
arnehormann committed Jul 20, 2024
1 parent 6fb7cd4 commit 6a81b27
Show file tree
Hide file tree
Showing 4 changed files with 275 additions and 0 deletions.
50 changes: 50 additions & 0 deletions numbits-cmp1_19_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//go:build !go1.20

package quamina

// code below is copied and slightly adapted from Go 1.20+

// compare returns
//
// -1 if x is less than y,
// 0 if x equals y,
// +1 if x is greater than y.
//
// For floating-point types, a NaN is considered less than any non-NaN,
// a NaN is considered equal to a NaN, and -0.0 is equal to 0.0.
func compare[T go_1_19_Ordered](x, y T) int {
xNaN := go_1_19_isNaN(x)
yNaN := go_1_19_isNaN(y)
if xNaN && yNaN {
return 0
}
if xNaN || x < y {
return -1
}
if yNaN || x > y {
return +1
}
return 0
}

// Ordered is a constraint that permits any ordered type: any type
// that supports the operators < <= >= >.
// If future releases of Go add new ordered types,
// this constraint will be modified to include them.
//
// Note that floating-point types may contain NaN ("not-a-number") values.
// An operator such as == or < will always report false when
// comparing a NaN value with any other value, NaN or not.
// See the [Compare] function for a consistent way to compare NaN values.
type go_1_19_Ordered interface {
~int | ~int8 | ~int16 | ~int32 | ~int64 |
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr |
~float32 | ~float64 |
~string
}

// isNaN reports whether x is a NaN without requiring the math package.
// This will always return false if T is not floating-point.
func go_1_19_isNaN[T go_1_19_Ordered](x T) bool {
return x != x
}
11 changes: 11 additions & 0 deletions numbits-cmp1_20_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//go:build go1.20

package quamina

import "cmp"

// TODO: when Go 1.19 support is dropped, replace invocations with cmp.Compare directly.

func compare[T cmp.Ordered](x, y T) int {
return cmp.Compare[T](x, y)
}
99 changes: 99 additions & 0 deletions numbits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package quamina

import (
"encoding/binary"
"math"
)

// float64 are stored as (sign | exponent | mantissa)
// with 1 bit sign, 11 bits exponent, 52 bits mantissa
const (
maskSign uint64 = 1 << 63
maskExponent uint64 = 0b11111111111 << 52
maskMantissa uint64 = ^uint64(0) >> 12
)

// Numbits representation of some boundary values.
const (
numbitsZero = Numbits(maskSign)
numbitsNegZero = numbitsZero - 1
numbitsNegInf = Numbits(maskMantissa)
numbitsPosInf = Numbits(maskSign | maskExponent)
numbitsNormalizedNaN = numbitsNegInf - 1
)

// Numbits is an alternative binary representation of float64 numbers.
// They can be represented as [8]byte or as string and can be created from
// these representations.
// All possible float64 values are representable as Numbits.
//
// The comparability differs from cmp.Compare for float64, though:
// - 0.0 and -0.0 are not equal.
// - NaNs are equal if their representation as bits is equal.
// - NaNs can be either larger than Infinity
// or smaller than -Infinity (depending on the sign bit).
// - use Normalize() to align the comparability.
type Numbits uint64

// NumbitsFromFloat64 converts a float64 value to its Numbits representation.
func NumbitsFromFloat64(f float64) Numbits {
u := math.Float64bits(f)
// transform without branching (inverse of Numbits.Float64):
// if high bit is 0, xor with sign bit 1 << 63, else negate (xor with ^0)
mask := (u>>63)*^uint64(0) | (1 << 63)
return Numbits(u ^ mask)
}

// NumbitsFromBytes converts a [8]byte value to its Numbits representation.
func NumbitsFromBytes(b [8]byte) Numbits {
return Numbits(binary.BigEndian.Uint64(b[:]))
}

// NumbitsFromBinaryString converts a string value created by BinaryString to its Numbits representation.
// It uses the first 8 bytes from the string and panics if it is shorter.
func NumbitsFromBinaryString(s string) Numbits {
// This code could use slice to array conversion, but at implementation time,
// quamina still supported Go 1.19. The feature was introduced in 1.20.
return Numbits(binary.BigEndian.Uint64([]byte(s[:8])))
}

// Float64 converts Numbits back to its float64 representation
func (n Numbits) Float64() float64 {
u := uint64(n)
// transform without branching (inverse of NumbitsFromFloat64):
// if high bit is 1, xor with sign bit 1 << 63, else negate (xor with ^0)
mask := (1-(u>>63))*^uint64(0) | (1 << 63)
return math.Float64frombits(u ^ mask)
}

// Normalize the value to align the comparability to cmp.Compare.
//
// Normalization only affects -0.0 (converted to 0.0) and NaN (all converted to the same representation).
func (n Numbits) Normalize() Numbits {
if n == numbitsNegZero {
return numbitsZero
}
if n < numbitsNegInf || numbitsPosInf < n {
return numbitsNormalizedNaN
}
return n
}

// IsFinite returns true iff n is not infinite or NaN.
func (n Numbits) IsFinite() bool {
return numbitsNegInf < n && n < numbitsPosInf
}

// Bytes retrieves a representation as [8]byte.
// The returned bytes are in big-endian order.
func (n Numbits) Bytes() [8]byte {
var b [8]byte
binary.BigEndian.PutUint64(b[:], uint64(n))
return b
}

// BinaryString retrieves a lexically ordered string representation.
func (n Numbits) BinaryString() string {
b := n.Bytes()
return string(b[:])
}
115 changes: 115 additions & 0 deletions numbits_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package quamina

import (
"cmp"
"math"
"testing"
)

var (
// special case, compiler does not create it when writing -0.0
f64_negZero = math.Float64frombits(0b1_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000000)

// boundaries of floating point value ranges
f64_zero = math.Float64frombits(0b0_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000000)
f64_subnormLo = math.Float64frombits(0b0_00000000000_0000_00000000_00000000_00000000_00000000_00000000_00000001)
f64_subnormHi = math.Float64frombits(0b0_00000000000_1111_11111111_11111111_11111111_11111111_11111111_11111111)
f64_normLoLo = math.Float64frombits(0b0_00000000001_0000_00000000_00000000_00000000_00000000_00000000_00000000)
f64_normLoHi = math.Float64frombits(0b0_00000000001_1111_11111111_11111111_11111111_11111111_11111111_11111111)
f64_normHiLo = math.Float64frombits(0b0_11111111110_0000_00000000_00000000_00000000_00000000_00000000_00000000)
f64_normHiHi = math.Float64frombits(0b0_11111111110_1111_11111111_11111111_11111111_11111111_11111111_11111111)
f64_inf = math.Float64frombits(0b0_11111111111_0000_00000000_00000000_00000000_00000000_00000000_00000000)
f64_nanLo = math.Float64frombits(0b0_11111111111_0000_00000000_00000000_00000000_00000000_00000000_00000001)
f64_nanHi = math.Float64frombits(0b0_11111111111_1111_11111111_11111111_11111111_11111111_11111111_11111111)

// named values including boundaries
values = func(positive map[string]float64) map[string]float64 {
// this function mirrors the values to negative
const sign uint64 = 1 << 63
m2 := make(map[string]float64, len(positive)*2)
for n, v := range positive {
m2[n] = v
m2["negative "+n] = math.Float64frombits(math.Float64bits(v) | sign)
}
return m2
}(map[string]float64{
"zero": f64_zero,
"subnormal;lo": f64_subnormLo,
"subnormal;hi": f64_subnormHi,
"normal;lo-exp,lo-mant": f64_normLoLo,
"normal;lo-exp,hi-mant": f64_normLoHi,
"normal;hi-exp,lo-mant": f64_normHiLo,
"normal;hi-exp,hi-mant": f64_normHiHi,
"infinity": f64_inf,
"NaN;lo": f64_nanLo,
"NaN;hi": f64_nanHi,
"0.1": 0.1,
"1.0": 1.0,
"pi": math.Pi,
})
)

func TestNumbits(t *testing.T) {
// roundtrips for various values - creation and conversion
normalNaN := NumbitsFromFloat64(math.NaN()).Normalize()
for n, f := range values {
t.Run(n, func(t *testing.T) {
got := NumbitsFromFloat64(f)
if f2 := got.Float64(); cmp.Compare(f2, f) != 0 {
t.Errorf("NumbitsFromFloat64().Float64() = %v, want %v", f2, f)
}
if bin := got.Bytes(); NumbitsFromBytes(bin) != got {
t.Errorf("NumbitsFromBytes().Bytes() = %x, want %x", got, bin)
}
if str := got.BinaryString(); NumbitsFromBinaryString(str) != got {
t.Errorf("NumbitsFromBytes().BinaryString() = %x, want %x", got, str)
}
if math.IsNaN(f) && got.Normalize() != normalNaN {
t.Errorf("Normalize for NaN failed")
}
if got.IsFinite() == (math.IsNaN(f) || math.IsInf(f, 0)) {
t.Errorf("IsFinite failed for %v", f)
}
})
}
t.Run("neg-zero_to_zero", func(t *testing.T) {
negZero := NumbitsFromFloat64(f64_negZero)
normZero := negZero.Normalize()
if normZero == negZero {
t.Errorf("Normalize for -0.0 failed")
}
if negZero.Float64() != 0 || normZero.Float64() != 0 {
t.Errorf("0.0 representation error")
}
})
}

func TestNumbits_Compare(t *testing.T) {
for n1, f1 := range values {
v1 := NumbitsFromFloat64(f1)
nanf1 := math.IsNaN(f1)
for n2, f2 := range values {
// redefine in scope so v1 can be changed without changing the outer one
v1 := v1
v2 := NumbitsFromFloat64(f2)
nanf2 := math.IsNaN(f2)
order := compare(f1, f2)
if o := compare(v1.Float64(), v2.Float64()); order != o {
t.Errorf("%v->%v: comparison after Float64() failed: want %v, got %v", n1, n2, order, o)
}
if nanf1 || (f1 == 0 && f2 == 0) {
v1 = v1.Normalize()
}
if nanf2 || (f1 == 0 && f2 == 0) {
v2 = v2.Normalize()
}
b1, b2 := v1.BinaryString(), v2.BinaryString()
if o := compare(v1, v2); order != o {
t.Errorf("%v->%v: direct comparison of Numbits failed: want %v, got %v for %x -> %x", n1, n2, order, o, b1, b2)
}
if o := compare(v1.BinaryString(), v2.BinaryString()); order != o {
t.Errorf("%v->%v: comparison after BinaryString() failed: want %v, got %v for %x -> %x", n1, n2, order, o, b1, b2)
}
}
}
}

0 comments on commit 6a81b27

Please sign in to comment.