Skip to content

Commit

Permalink
faster hashing by avoiding UB
Browse files Browse the repository at this point in the history
In LLVM (inherited from C), fptosi has undefined behavior if the result
does not fit the integer size after rounding down. But by using the same
strategy as generic hashing of Real values, we actually can end up with
a sitatuion that is faster for the CPU to deal with and avoids the UB.

Refs #6624 (3696968)
Fixes #37800
  • Loading branch information
vtjnash committed Oct 26, 2020
1 parent bac321d commit 24dde5d
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 34 deletions.
4 changes: 2 additions & 2 deletions base/abstractset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ julia> union!(a, 1:2:8);
julia> a
Set{Int64} with 5 elements:
7
5
4
7
3
5
1
```
"""
Expand Down
30 changes: 21 additions & 9 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -460,17 +460,29 @@ Test whether a number is infinite.
"""
isinf(x::Real) = !isnan(x) & !isfinite(x)

## hashing small, built-in numeric types ##

hx(a::UInt64, b::Float64, h::UInt) = hash_uint64((3a + reinterpret(UInt64,b)) - h)
const hx_NaN = hx(UInt64(0), NaN, UInt(0 ))

hash(x::UInt64, h::UInt) = hx(x, Float64(x), h)
hash(x::Int64, h::UInt) = hx(reinterpret(UInt64, abs(x)), Float64(x), h)
hash(x::Float64, h::UInt) = isnan(x) ? (hx_NaN h) : hx(fptoui(UInt64, abs(x)), x, h)
const hx_NaN = hash_uint64(reinterpret(UInt64, NaN))
let Tf = Float64, Tu = UInt64, Ti = Int64
@eval function hash(x::$Tf, h::UInt)
# see comments on trunc and hash(Real, UInt)
if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
xi = fptosi($Ti, x)
if isequal(xi, x)
return hash(xi, h)
end
elseif $(Tf(typemin(Tu))) <= x < $(Tf(typemax(Tu)))
xu = fptoui($Tu, x)
if isequal(xu, x)
return hash(xu, h)
end
elseif isnan(x)
return hx_NaN h # NaN does not have a stable bit pattern
end
return hash_uint64(bitcast(UInt64, x)) - 3h
end
end

hash(x::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}, h::UInt) = hash(Int64(x), h)
hash(x::Float32, h::UInt) = hash(Float64(x), h)
hash(x::Float16, h::UInt) = hash(Float64(x), h)

"""
precision(num::AbstractFloat)
Expand Down
8 changes: 4 additions & 4 deletions base/hashing2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## efficient value-based hashing of integers ##

hash(x::Int64, h::UInt) = hash_uint64(bitcast(UInt64, x)) - 3h
hash(x::UInt64, h::UInt) = hash_uint64(x) - 3h
hash(x::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}, h::UInt) = hash(Int64(x), h)

function hash_integer(n::Integer, h::UInt)
h ⊻= hash_uint((n % UInt) h)
n = abs(n)
Expand Down Expand Up @@ -226,7 +230,3 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
h = hash_integer(num, h)
return h
end

## hashing Float16s ##

hash(x::Float16, h::UInt) = hash(Float64(x), h)
37 changes: 19 additions & 18 deletions test/hashing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,29 @@ function coerce(T::Type, x)
end
end

for T = types[2:end],
x = vals,
for T = types[2:end], x = vals
a = coerce(T, x)
@test hash(a,zero(UInt)) == invoke(hash, Tuple{Real, UInt}, a, zero(UInt))
@test hash(a,one(UInt)) == invoke(hash, Tuple{Real, UInt}, a, one(UInt))
@test hash(a, zero(UInt)) == invoke(hash, Tuple{Real, UInt}, a, zero(UInt))
@test hash(a, one(UInt)) == invoke(hash, Tuple{Real, UInt}, a, one(UInt))
end

for T = types,
S = types,
x = vals,
a = coerce(T, x),
b = coerce(S, x)
#println("$(typeof(a)) $a")
#println("$(typeof(b)) $b")
@test isequal(a,b) == (hash(a)==hash(b))
# for y=vals
# println("T=$T; S=$S; x=$x; y=$y")
# c = convert(T,x//y)
# d = convert(S,x//y)
# @test !isequal(a,b) || hash(a)==hash(b)
# end
let collides = 0
for T = types, S = types, x = vals
a = coerce(T, x)
b = coerce(S, x)
eq = hash(a) == hash(b)
#println("$(typeof(a)) $a")
#println("$(typeof(b)) $b")
if isequal(a, b)
@test eq
else
collides += eq
end
end
# each pair of types has one collision for these values
@test collides <= (length(types) - 1)^2
end
@test hash(0.0) != hash(-0.0)

# issue #8619
@test hash(nextfloat(2.0^63)) == hash(UInt64(nextfloat(2.0^63)))
Expand Down
2 changes: 1 addition & 1 deletion test/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1608,7 +1608,7 @@ end

# issue #27680
@test showstr(Set([(1.0,1.0), (2.0,2.0), (3.0, 3.0)])) == (sizeof(Int) == 8 ?
"Set([(3.0, 3.0), (2.0, 2.0), (1.0, 1.0)])" :
"Set([(1.0, 1.0), (3.0, 3.0), (2.0, 2.0)])" :
"Set([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)])")

# issue #27747
Expand Down

0 comments on commit 24dde5d

Please sign in to comment.