diff --git a/CHANGELOG.md b/CHANGELOG.md index d071e09391..3b96bc2eb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update. - Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487) - Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487) - Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491) +- Improve SmallRng initialization performance ## [0.9.0-alpha.1] - 2024-03-18 - Add the `Slice::num_choices` method to the Slice distribution (#1402) diff --git a/benches/benches/generators.rs b/benches/benches/generators.rs index 580d989755..bea4f60a6b 100644 --- a/benches/benches/generators.rs +++ b/benches/benches/generators.rs @@ -19,7 +19,7 @@ use rand_pcg::{Pcg32, Pcg64, Pcg64Dxsm, Pcg64Mcg}; criterion_group!( name = benches; config = Criterion::default(); - targets = gen_bytes, gen_u32, gen_u64, init_gen, reseeding_bytes + targets = gen_bytes, gen_u32, gen_u64, init_gen, init_from_u64, init_from_seed, reseeding_bytes ); criterion_main!(benches); @@ -133,6 +133,62 @@ pub fn init_gen(c: &mut Criterion) { bench::(&mut g, "chacha12"); bench::(&mut g, "chacha20"); bench::(&mut g, "std"); + bench::(&mut g, "small"); + + g.finish() +} + +pub fn init_from_u64(c: &mut Criterion) { + let mut g = c.benchmark_group("init_from_u64"); + g.warm_up_time(Duration::from_millis(500)); + g.measurement_time(Duration::from_millis(1000)); + + fn bench(g: &mut BenchmarkGroup, name: &str) { + g.bench_function(name, |b| { + let mut rng = Pcg32::from_os_rng(); + let seed = rng.random(); + b.iter(|| R::seed_from_u64(black_box(seed))); + }); + } + + bench::(&mut g, "pcg32"); + bench::(&mut g, "pcg64"); + bench::(&mut g, "pcg64mcg"); + bench::(&mut g, "pcg64dxsm"); + bench::(&mut g, "chacha8"); + bench::(&mut g, "chacha12"); + bench::(&mut g, "chacha20"); + bench::(&mut g, "std"); + bench::(&mut g, "small"); + + g.finish() +} + +pub fn init_from_seed(c: &mut Criterion) { + let mut g = c.benchmark_group("init_from_seed"); + g.warm_up_time(Duration::from_millis(500)); + g.measurement_time(Duration::from_millis(1000)); + + fn bench(g: &mut BenchmarkGroup, name: &str) + where + rand::distr::Standard: Distribution<::Seed>, + { + g.bench_function(name, |b| { + let mut rng = Pcg32::from_os_rng(); + let seed = rng.random(); + b.iter(|| R::from_seed(black_box(seed.clone()))); + }); + } + + bench::(&mut g, "pcg32"); + bench::(&mut g, "pcg64"); + bench::(&mut g, "pcg64mcg"); + bench::(&mut g, "pcg64dxsm"); + bench::(&mut g, "chacha8"); + bench::(&mut g, "chacha12"); + bench::(&mut g, "chacha20"); + bench::(&mut g, "std"); + bench::(&mut g, "small"); g.finish() } diff --git a/src/rngs/small.rs b/src/rngs/small.rs index ea7df06284..cfc6b0c988 100644 --- a/src/rngs/small.rs +++ b/src/rngs/small.rs @@ -83,7 +83,8 @@ impl SeedableRng for SmallRng { #[inline(always)] fn from_seed(seed: Self::Seed) -> Self { - // With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap(); + // This is for compatibility with 32-bit platforms where Rng::Seed has a different seed size + // With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap() const LEN: usize = core::mem::size_of::<::Seed>(); let seed = (&seed[..LEN]).try_into().unwrap(); SmallRng(Rng::from_seed(seed)) diff --git a/src/rngs/xoshiro128plusplus.rs b/src/rngs/xoshiro128plusplus.rs index 44e4222c88..df63761cfd 100644 --- a/src/rngs/xoshiro128plusplus.rs +++ b/src/rngs/xoshiro128plusplus.rs @@ -33,29 +33,36 @@ impl SeedableRng for Xoshiro128PlusPlus { /// mapped to a different seed. #[inline] fn from_seed(seed: [u8; 16]) -> Xoshiro128PlusPlus { - if seed.iter().all(|&x| x == 0) { - return Self::seed_from_u64(0); - } let mut state = [0; 4]; read_u32_into(&seed, &mut state); + // Check for zero on aligned integers for better code generation. + // Furtermore, seed_from_u64(0) will expand to a constant when optimized. + if state.iter().all(|&x| x == 0) { + return Self::seed_from_u64(0); + } Xoshiro128PlusPlus { s: state } } /// Create a new `Xoshiro128PlusPlus` from a `u64` seed. /// /// This uses the SplitMix64 generator internally. + #[inline] fn seed_from_u64(mut state: u64) -> Self { const PHI: u64 = 0x9e3779b97f4a7c15; - let mut seed = Self::Seed::default(); - for chunk in seed.as_mut().chunks_mut(8) { + let mut s = [0; 4]; + for i in s.chunks_exact_mut(2) { state = state.wrapping_add(PHI); let mut z = state; z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9); z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb); z = z ^ (z >> 31); - chunk.copy_from_slice(&z.to_le_bytes()); + i[0] = z.to_le() as u32; + i[1] = (z.to_le() >> 32) as u32; } - Self::from_seed(seed) + // By using a non-zero PHI we are guaranteed to generate a non-zero state + // Thus preventing a recursion between from_seed and seed_from_u64. + debug_assert_ne!(s, [0; 4]); + Xoshiro128PlusPlus { s } } } diff --git a/src/rngs/xoshiro256plusplus.rs b/src/rngs/xoshiro256plusplus.rs index b356ff510c..ebb3eac2e3 100644 --- a/src/rngs/xoshiro256plusplus.rs +++ b/src/rngs/xoshiro256plusplus.rs @@ -33,29 +33,35 @@ impl SeedableRng for Xoshiro256PlusPlus { /// mapped to a different seed. #[inline] fn from_seed(seed: [u8; 32]) -> Xoshiro256PlusPlus { - if seed.iter().all(|&x| x == 0) { - return Self::seed_from_u64(0); - } let mut state = [0; 4]; read_u64_into(&seed, &mut state); + // Check for zero on aligned integers for better code generation. + // Furtermore, seed_from_u64(0) will expand to a constant when optimized. + if state.iter().all(|&x| x == 0) { + return Self::seed_from_u64(0); + } Xoshiro256PlusPlus { s: state } } /// Create a new `Xoshiro256PlusPlus` from a `u64` seed. /// /// This uses the SplitMix64 generator internally. + #[inline] fn seed_from_u64(mut state: u64) -> Self { const PHI: u64 = 0x9e3779b97f4a7c15; - let mut seed = Self::Seed::default(); - for chunk in seed.as_mut().chunks_mut(8) { + let mut s = [0; 4]; + for i in s.iter_mut() { state = state.wrapping_add(PHI); let mut z = state; z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9); z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb); z = z ^ (z >> 31); - chunk.copy_from_slice(&z.to_le_bytes()); + *i = z.to_le(); } - Self::from_seed(seed) + // By using a non-zero PHI we are guaranteed to generate a non-zero state + // Thus preventing a recursion between from_seed and seed_from_u64. + debug_assert_ne!(s, [0; 4]); + Xoshiro256PlusPlus { s } } }