From 810ce09ae57abf63504ceadfa0a0077ac0de5722 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Fri, 22 Sep 2023 15:30:23 +0200 Subject: [PATCH 1/5] Random: allow string seeds We used to be able to seed RNGs with a string, but that string was interpreted as the filename containing the actual seed. This was deprecated in #21359, in order to later allow using a string seed directly, which this patch does. --- NEWS.md | 5 +++-- stdlib/Random/src/RNGs.jl | 20 +++++++++++++++----- stdlib/Random/src/Xoshiro.jl | 2 +- stdlib/Random/test/runtests.jl | 25 +++++++++++++++++++++++-- 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index cd68e8c5882b5..e567987a52a08 100644 --- a/NEWS.md +++ b/NEWS.md @@ -53,9 +53,10 @@ Standard library changes #### Random * `rand` now supports sampling over `Tuple` types ([#35856], [#50251]). -* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]). - * `rand` now supports sampling over `Pair` types ([#28705]). +* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]). +* Seedable random number generators from `Random` can now be seeded by a string, e.g. + `seed!(rng, "a random seed")` ([#51527]). #### REPL diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 8e766bfb98e53..3eb56d7db31b9 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -83,7 +83,7 @@ MersenneTwister(seed, state::DSFMT_state) = Create a `MersenneTwister` RNG object. Different RNG objects can have their own seeds, which may be useful for generating different streams of random numbers. -The `seed` may be an integer or a vector of `UInt32` integers. +The `seed` may be an integer, a string, or a vector of `UInt32` integers. If no seed is provided, a randomly generated one is created (using entropy from the system). See the [`seed!`](@ref) function for reseeding an already existing `MersenneTwister` object. @@ -316,12 +316,22 @@ function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}}) SHA.digest!(ctx) end +function hash_seed(str::AbstractString) + ctx = SHA.SHA2_256_CTX() + for chr in str + SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, UInt32(chr))) + end + SHA.update!(ctx, (0x05,)) + SHA.digest!(ctx) +end + """ hash_seed(seed) -> AbstractVector{UInt8} Return a cryptographic hash of `seed` of size 256 bits (32 bytes). -`seed` can currently be of type `Union{Integer, DenseArray{UInt32}, DenseArray{UInt64}}`, +`seed` can currently be of type +`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`, but modules can extend this function for types they own. `hash_seed` is "injective" : if `n != m`, then `hash_seed(n) != `hash_seed(m)`. @@ -750,13 +760,13 @@ jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps)) # 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown)) # 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional) -Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) = +Random.MersenneTwister(seed, advance::NTuple{6,Integer}) = advance!(MersenneTwister(seed), advance...) -Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) = +Random.MersenneTwister(seed, advance::NTuple{4,Integer}) = MersenneTwister(seed, (advance..., 0, 0)) -Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) = +Random.MersenneTwister(seed, advance::NTuple{2,Integer}) = MersenneTwister(seed, (advance..., 0, 0, 0, 0)) # advances raw state (per fill_array!) of r by n steps (Float64 values) diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl index bf48fe8e7e9b4..b16668e99584b 100644 --- a/stdlib/Random/src/Xoshiro.jl +++ b/stdlib/Random/src/Xoshiro.jl @@ -4,7 +4,7 @@ # Lots of implementation is shared with TaskLocalRNG """ - Xoshiro(seed::Integer) + Xoshiro(seed::Union{Integer, AbstractString}) Xoshiro() Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl index 010d04a99778d..3a8d43c35253f 100644 --- a/stdlib/Random/test/runtests.jl +++ b/stdlib/Random/test/runtests.jl @@ -648,6 +648,7 @@ end # test that the following is not an error (#16925) @test Random.seed!(m..., typemax(UInt)) === m2 @test Random.seed!(m..., typemax(UInt128)) === m2 + @test Random.seed!(m..., "a random seed") === m2 end end @@ -702,7 +703,7 @@ end end @testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro) - seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...] + seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), randstring(), randstring(), rand(UInt128, 3)...] if RNG == Xoshiro push!(seeds, rand(UInt64, rand(1:4))) end @@ -715,7 +716,7 @@ end end @testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin - seeds = Any[0, rand(UInt128), rand(UInt64, 4)] + seeds = Any[0, rand(UInt128), rand(UInt64, 4), randstring(20)] for seed=seeds Random.seed!(seed) @@ -932,6 +933,15 @@ end @test string(m) == "MersenneTwister(-3)" Random.seed!(m, typemin(Int8)) @test string(m) == "MersenneTwister(-128)" + + # string seeds + Random.seed!(m, "seed 1") + @test string(m) == "MersenneTwister(\"seed 1\")" + x = rand(m) + @test x == rand(MersenneTwister("seed 1")) + @test string(m) == """MersenneTwister("seed 1", (0, 1002, 0, 1))""" + # test that MersenneTwister's fancy constructors accept string seeds + @test MersenneTwister("seed 1", (0, 1002, 0, 1)) == m end @testset "RandomDevice" begin @@ -1188,6 +1198,17 @@ end hash32 = Random.hash_seed(seed32) @test Random.hash_seed(map(UInt64, seed32)) == hash32 @test hash32 ∉ keys(vseeds) + + seed_str = randstring() + seed_gstr = GenericString(seed_str) + @test Random.hash_seed(seed_str) == Random.hash_seed(seed_gstr) + string_seeds = Set{Vector{UInt8}}() + for ch = 'A':'z' + vseed = Random.hash_seed(string(ch)) + @test vseed ∉ keys(vseeds) + @test vseed ∉ string_seeds + push!(string_seeds, vseed) + end end @testset "rand(::Type{<:Pair})" begin From 2f1d74a9fa568d4481a166aafc16095ef402c582 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Sun, 1 Oct 2023 17:29:36 +0200 Subject: [PATCH 2/5] use `codeunits`: allows invalid bytes, and is faster --- stdlib/Random/src/RNGs.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 3eb56d7db31b9..48d34ef61231d 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -318,10 +318,14 @@ end function hash_seed(str::AbstractString) ctx = SHA.SHA2_256_CTX() - for chr in str - SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, UInt32(chr))) - end - SHA.update!(ctx, (0x05,)) + # convert to String such that `codeunits(str)` below is consistent between equal + # strings of different types + str = String(str) + SHA.update!(ctx, codeunits(str)) + # signature for strings: so far, all hash_seed functions end-up hashing a multiple + # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many + # 0x05 bytes as necessary to have a total number of hashed bytes equal to 1 mod 4 + SHA.update!(ctx, ntuple(Returns(0x05), 5 - mod1(ncodeunits(str), 4))) SHA.digest!(ctx) end From 63fcb20a7792384bf3b657f136359da3f65b75f6 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Mon, 2 Oct 2023 11:45:39 +0200 Subject: [PATCH 3/5] Update stdlib/Random/src/RNGs.jl Co-authored-by: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> --- stdlib/Random/src/RNGs.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 48d34ef61231d..444fa3f96d609 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -325,7 +325,8 @@ function hash_seed(str::AbstractString) # signature for strings: so far, all hash_seed functions end-up hashing a multiple # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many # 0x05 bytes as necessary to have a total number of hashed bytes equal to 1 mod 4 - SHA.update!(ctx, ntuple(Returns(0x05), 5 - mod1(ncodeunits(str), 4))) + a = 4 - mod(ncodeunits(str), 4) + SHA.update!(ctx, (0x05, ntuple(Returns(UInt8(a)), a)...)) SHA.digest!(ctx) end From 187f7d17f60c87b0dcbcbf117683b5186aea96e0 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Mon, 2 Oct 2023 11:47:18 +0200 Subject: [PATCH 4/5] fix collisions --- stdlib/Random/src/RNGs.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 444fa3f96d609..7db5d570b8887 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -324,10 +324,14 @@ function hash_seed(str::AbstractString) SHA.update!(ctx, codeunits(str)) # signature for strings: so far, all hash_seed functions end-up hashing a multiple # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many - # 0x05 bytes as necessary to have a total number of hashed bytes equal to 1 mod 4 - a = 4 - mod(ncodeunits(str), 4) - SHA.update!(ctx, (0x05, ntuple(Returns(UInt8(a)), a)...)) - SHA.digest!(ctx) + # bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding), + # and then hash the signature 0x05; in order for strings of different lengths to have + # different hashes, padding bytes are set equal to the number of padding bytes + pad = 4 - mod(ncodeunits(str), 4) + for _=1:pad + SHA.update!(ctx, (pad % UInt8,)) + end + SHA.update!(ctx, (0x05,)) end From e4712ba1fb0cc51c562ba4d3097d2b30356e095e Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Mon, 9 Oct 2023 11:40:58 +0200 Subject: [PATCH 5/5] add digest! Co-authored-by: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> --- stdlib/Random/src/RNGs.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 7db5d570b8887..7782de88ba537 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -332,6 +332,7 @@ function hash_seed(str::AbstractString) SHA.update!(ctx, (pad % UInt8,)) end SHA.update!(ctx, (0x05,)) + SHA.digest!(ctx) end