Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Random: allow string seeds #51527

Merged
merged 5 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ Standard library changes

#### Random
* `rand` now supports sampling over `Tuple` types ([#35856], [#50251]).
* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]).

* `rand` now supports sampling over `Pair` types ([#28705]).
* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]).
* Seedable random number generators from `Random` can now be seeded by a string, e.g.
`seed!(rng, "a random seed")` ([#51527]).

#### REPL

Expand Down
29 changes: 24 additions & 5 deletions stdlib/Random/src/RNGs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ MersenneTwister(seed, state::DSFMT_state) =
Create a `MersenneTwister` RNG object. Different RNG objects can have
their own seeds, which may be useful for generating different streams
of random numbers.
The `seed` may be an integer or a vector of `UInt32` integers.
The `seed` may be an integer, a string, or a vector of `UInt32` integers.
If no seed is provided, a randomly generated one is created (using entropy from the system).
See the [`seed!`](@ref) function for reseeding an already existing `MersenneTwister` object.

Expand Down Expand Up @@ -316,12 +316,31 @@ function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}})
SHA.digest!(ctx)
end

function hash_seed(str::AbstractString)
ctx = SHA.SHA2_256_CTX()
# convert to String such that `codeunits(str)` below is consistent between equal
# strings of different types
str = String(str)
SHA.update!(ctx, codeunits(str))
# signature for strings: so far, all hash_seed functions end-up hashing a multiple
# of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many
# bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding),
# and then hash the signature 0x05; in order for strings of different lengths to have
# different hashes, padding bytes are set equal to the number of padding bytes
pad = 4 - mod(ncodeunits(str), 4)
for _=1:pad
SHA.update!(ctx, (pad % UInt8,))
end
SHA.update!(ctx, (0x05,))
rfourquet marked this conversation as resolved.
Show resolved Hide resolved
end


"""
hash_seed(seed) -> AbstractVector{UInt8}

Return a cryptographic hash of `seed` of size 256 bits (32 bytes).
`seed` can currently be of type `Union{Integer, DenseArray{UInt32}, DenseArray{UInt64}}`,
`seed` can currently be of type
`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`,
but modules can extend this function for types they own.

`hash_seed` is "injective" : if `n != m`, then `hash_seed(n) != `hash_seed(m)`.
Expand Down Expand Up @@ -750,13 +769,13 @@ jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps))
# 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown))
# 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional)

Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) =
Random.MersenneTwister(seed, advance::NTuple{6,Integer}) =
advance!(MersenneTwister(seed), advance...)

Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) =
Random.MersenneTwister(seed, advance::NTuple{4,Integer}) =
MersenneTwister(seed, (advance..., 0, 0))

Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) =
Random.MersenneTwister(seed, advance::NTuple{2,Integer}) =
MersenneTwister(seed, (advance..., 0, 0, 0, 0))

# advances raw state (per fill_array!) of r by n steps (Float64 values)
Expand Down
2 changes: 1 addition & 1 deletion stdlib/Random/src/Xoshiro.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Lots of implementation is shared with TaskLocalRNG

"""
Xoshiro(seed::Integer)
Xoshiro(seed::Union{Integer, AbstractString})
Xoshiro()

Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
Expand Down
25 changes: 23 additions & 2 deletions stdlib/Random/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ end
# test that the following is not an error (#16925)
@test Random.seed!(m..., typemax(UInt)) === m2
@test Random.seed!(m..., typemax(UInt128)) === m2
@test Random.seed!(m..., "a random seed") === m2
end
end

Expand Down Expand Up @@ -702,7 +703,7 @@ end
end

@testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), randstring(), randstring(), rand(UInt128, 3)...]
if RNG == Xoshiro
push!(seeds, rand(UInt64, rand(1:4)))
end
Expand All @@ -715,7 +716,7 @@ end
end

@testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
seeds = Any[0, rand(UInt128), rand(UInt64, 4), randstring(20)]

for seed=seeds
Random.seed!(seed)
Expand Down Expand Up @@ -932,6 +933,15 @@ end
@test string(m) == "MersenneTwister(-3)"
Random.seed!(m, typemin(Int8))
@test string(m) == "MersenneTwister(-128)"

# string seeds
Random.seed!(m, "seed 1")
@test string(m) == "MersenneTwister(\"seed 1\")"
x = rand(m)
@test x == rand(MersenneTwister("seed 1"))
@test string(m) == """MersenneTwister("seed 1", (0, 1002, 0, 1))"""
# test that MersenneTwister's fancy constructors accept string seeds
@test MersenneTwister("seed 1", (0, 1002, 0, 1)) == m
end

@testset "RandomDevice" begin
Expand Down Expand Up @@ -1188,6 +1198,17 @@ end
hash32 = Random.hash_seed(seed32)
@test Random.hash_seed(map(UInt64, seed32)) == hash32
@test hash32 ∉ keys(vseeds)

seed_str = randstring()
seed_gstr = GenericString(seed_str)
@test Random.hash_seed(seed_str) == Random.hash_seed(seed_gstr)
string_seeds = Set{Vector{UInt8}}()
for ch = 'A':'z'
vseed = Random.hash_seed(string(ch))
@test vseed ∉ keys(vseeds)
@test vseed ∉ string_seeds
push!(string_seeds, vseed)
end
end

@testset "rand(::Type{<:Pair})" begin
Expand Down