From 04888102e9e98a3bd7ab45247445b6471fcb7eed Mon Sep 17 00:00:00 2001 From: Paul Crowley Date: Thu, 29 Dec 2022 08:43:42 -0800 Subject: [PATCH] Simpler and faster implementation of Floyd's F2 The previous implementation used either `Vec::insert` or a second F-Y shuffling phase to achieve fair random order. Instead, use the random numbers already drawn to achieve a fair shuffle. --- CHANGELOG.md | 7 +++++++ src/seq/index.rs | 29 +++++++---------------------- src/seq/mod.rs | 2 +- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0872af6d39..b27a5edb493 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ A [separate changelog is kept for rand_core](rand_core/CHANGELOG.md). You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.html) useful. +## [Unreleased API changing release] + +### Other +- Simpler and faster implementation of Floyd's F2 (#1277). This + changes some outputs from `rand::seq::index::sample` and + `rand::seq::SliceRandom::choose_multiple`. + ## [0.8.5] - 2021-08-20 ### Fixes - Fix build on non-32/64-bit architectures (#1144) diff --git a/src/seq/index.rs b/src/seq/index.rs index ecd7d9f7383..b7bc6a9b26a 100644 --- a/src/seq/index.rs +++ b/src/seq/index.rs @@ -380,33 +380,18 @@ where /// This implementation uses `O(amount)` memory and `O(amount^2)` time. fn sample_floyd(rng: &mut R, length: u32, amount: u32) -> IndexVec where R: Rng + ?Sized { - // For small amount we use Floyd's fully-shuffled variant. For larger - // amounts this is slow due to Vec::insert performance, so we shuffle - // afterwards. Benchmarks show little overhead from extra logic. - let floyd_shuffle = amount < 50; - + // Note that the values returned by `rng.gen_range()` can be + // inferred from the returned vector by working backwards from + // the last entry. This bijection proves the algorithm fair. debug_assert!(amount <= length); let mut indices = Vec::with_capacity(amount as usize); for j in length - amount..length { let t = rng.gen_range(0..=j); - if floyd_shuffle { - if let Some(pos) = indices.iter().position(|&x| x == t) { - indices.insert(pos, j); - continue; - } - } else if indices.contains(&t) { - indices.push(j); - continue; + if let Some(pos) = indices.iter().position(|&x| x == t) { + indices[pos] = j; } indices.push(t); } - if !floyd_shuffle { - // Reimplement SliceRandom::shuffle with smaller indices - for i in (1..amount).rev() { - // invariant: elements with index > i have been locked in place. - indices.swap(i as usize, rng.gen_range(0..=i) as usize); - } - } IndexVec::from(indices) } @@ -628,8 +613,8 @@ mod test { ); }; - do_test(10, 6, &[8, 0, 3, 5, 9, 6]); // floyd - do_test(25, 10, &[18, 15, 14, 9, 0, 13, 5, 24]); // floyd + do_test(10, 6, &[8, 3, 5, 9, 0, 6]); // floyd + do_test(25, 10, &[18, 14, 9, 15, 0, 13, 5, 24]); // floyd do_test(300, 8, &[30, 283, 150, 1, 73, 13, 285, 35]); // floyd do_test(300, 80, &[31, 289, 248, 154, 5, 78, 19, 286]); // inplace do_test(300, 180, &[31, 289, 248, 154, 5, 78, 19, 286]); // inplace diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 24c65bc9f08..a61e516924c 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -725,7 +725,7 @@ mod test { .choose_multiple(&mut r, 8) .cloned() .collect::>(), - &['d', 'm', 'b', 'n', 'c', 'k', 'h', 'e'] + &['d', 'm', 'n', 'k', 'h', 'e', 'b', 'c'] ); #[cfg(feature = "alloc")]