From 2e28810156a556e0d492a2075a7fe06333f05433 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 16:55:43 +0000
Subject: [PATCH 01/21] Add KS test for WeightedIndex

---
 distr_test/tests/weighted.rs | 37 ++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 distr_test/tests/weighted.rs
diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
new file mode 100644
index 0000000000..0a3078ccd0
--- /dev/null
+++ b/distr_test/tests/weighted.rs
@@ -0,0 +1,37 @@
+// Copyright 2024 Developers of the Rand project.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+mod ks;
+use ks::test_discrete;
+use rand::distr::WeightedIndex;
+
+#[test]
+fn weighted_index() {
+    fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
+        let distr = WeightedIndex::new((0..num).map(|i| weight(i as i64))).unwrap();
+
+        let mut cdf = Vec::with_capacity(num);
+        let mut ac = 0.0;
+        for i in 0..num {
+            ac += weight(i as i64);
+            cdf.push(ac);
+        }
+        let frac = 1.0 / ac;
+        for x in &mut cdf {
+            *x = *x * frac;
+        }
+
+        test_discrete(0, distr, |i| if i < 0 { 0.0 } else { cdf[i as usize] });
+    }
+
+    test_weights(100, |_| 1.0);
+    test_weights(100, |i| ((i + 1) as f64).ln());
+    test_weights(100, |i| i as f64);
+    test_weights(100, |i| (i as f64).powi(3));
+    test_weights(100, |i| 1.0 / ((i + 1) as f64));
+}

From eb1836bb343627d189371331115b6478f5f18feb Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 17:05:51 +0000
Subject: [PATCH 02/21] Add KS test for WeightedAliasIndex

---
 distr_test/Cargo.toml        |  2 +-
 distr_test/tests/weighted.rs | 48 +++++++++++++++++++++++++++---------
 2 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/distr_test/Cargo.toml b/distr_test/Cargo.toml
index 7f37853b3c..36314b37cf 100644
--- a/distr_test/Cargo.toml
+++ b/distr_test/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"
 publish = false
 
 [dev-dependencies]
-rand_distr = { path = "../rand_distr", version = "=0.5.0-alpha.1", default-features = false }
+rand_distr = { path = "../rand_distr", version = "=0.5.0-alpha.1", default-features = false, features = ["alloc"] }
 rand = { path = "..", version = "=0.9.0-alpha.1", features = ["small_rng"] }
 num-traits = "0.2.19"
 # Special functions for testing distributions
diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index 0a3078ccd0..fe31346bd2 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -9,24 +9,50 @@
 mod ks;
 use ks::test_discrete;
 use rand::distr::WeightedIndex;
+use rand_distr::WeightedAliasIndex;
+
+fn make_cdf(num: usize, f: impl Fn(i64) -> f64) -> impl Fn(i64) -> f64 {
+    let mut cdf = Vec::with_capacity(num);
+    let mut ac = 0.0;
+    for i in 0..num {
+        ac += f(i as i64);
+        cdf.push(ac);
+    }
+
+    let frac = 1.0 / ac;
+    for x in &mut cdf {
+        *x = *x * frac;
+    }
+
+    move |i| {
+        if i < 0 {
+            0.0
+        } else {
+            cdf[i as usize]
+        }
+    }
+}
 
 #[test]
 fn weighted_index() {
     fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
         let distr = WeightedIndex::new((0..num).map(|i| weight(i as i64))).unwrap();
+        test_discrete(0, distr, make_cdf(num, weight));
+    }
 
-        let mut cdf = Vec::with_capacity(num);
-        let mut ac = 0.0;
-        for i in 0..num {
-            ac += weight(i as i64);
-            cdf.push(ac);
-        }
-        let frac = 1.0 / ac;
-        for x in &mut cdf {
-            *x = *x * frac;
-        }
+    test_weights(100, |_| 1.0);
+    test_weights(100, |i| ((i + 1) as f64).ln());
+    test_weights(100, |i| i as f64);
+    test_weights(100, |i| (i as f64).powi(3));
+    test_weights(100, |i| 1.0 / ((i + 1) as f64));
+}
 
-        test_discrete(0, distr, |i| if i < 0 { 0.0 } else { cdf[i as usize] });
+#[test]
+fn weighted_alias_index() {
+    fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
+        let weights = (0..num).map(|i| weight(i as i64)).collect();
+        let distr = WeightedAliasIndex::new(weights).unwrap();
+        test_discrete(0, distr, make_cdf(num, weight));
     }
 
     test_weights(100, |_| 1.0);

From a8ce256881a2733ee36162de9e8a3e4417bf3be3 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 17:06:58 +0000
Subject: [PATCH 03/21] Add KS test for WeightedTreeIndex

---
 distr_test/tests/weighted.rs | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index fe31346bd2..fadbad620e 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -9,7 +9,7 @@
 mod ks;
 use ks::test_discrete;
 use rand::distr::WeightedIndex;
-use rand_distr::WeightedAliasIndex;
+use rand_distr::{WeightedAliasIndex, WeightedTreeIndex};
 
 fn make_cdf(num: usize, f: impl Fn(i64) -> f64) -> impl Fn(i64) -> f64 {
     let mut cdf = Vec::with_capacity(num);
@@ -61,3 +61,17 @@ fn weighted_alias_index() {
     test_weights(100, |i| (i as f64).powi(3));
     test_weights(100, |i| 1.0 / ((i + 1) as f64));
 }
+
+#[test]
+fn weighted_tree_index() {
+    fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
+        let distr = WeightedTreeIndex::new((0..num).map(|i| weight(i as i64))).unwrap();
+        test_discrete(0, distr, make_cdf(num, weight));
+    }
+
+    test_weights(100, |_| 1.0);
+    test_weights(100, |i| ((i + 1) as f64).ln());
+    test_weights(100, |i| i as f64);
+    test_weights(100, |i| (i as f64).powi(3));
+    test_weights(100, |i| 1.0 / ((i + 1) as f64));
+}

From 9e03a1571064cf5428675169cb95329c833ffd92 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 17:20:35 +0000
Subject: [PATCH 04/21] Add KS test for IndexedRandom::choose_weighted

---
 distr_test/tests/weighted.rs | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index fadbad620e..d577535f11 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -8,7 +8,8 @@
 
 mod ks;
 use ks::test_discrete;
-use rand::distr::WeightedIndex;
+use rand::distr::{Distribution, WeightedIndex};
+use rand::seq::IndexedRandom;
 use rand_distr::{WeightedAliasIndex, WeightedTreeIndex};
 
 fn make_cdf(num: usize, f: impl Fn(i64) -> f64) -> impl Fn(i64) -> f64 {
@@ -75,3 +76,24 @@ fn weighted_tree_index() {
     test_weights(100, |i| (i as f64).powi(3));
     test_weights(100, |i| 1.0 / ((i + 1) as f64));
 }
+
+#[test]
+fn choose_weighted_indexed() {
+    struct Adapter<F: Fn(i64) -> f64>(Vec<i64>, F);
+    impl<F: Fn(i64) -> f64> Distribution<i64> for Adapter<F> {
+        fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> i64 {
+            *IndexedRandom::choose_weighted(&self.0[..], rng, |i| (self.1)(*i)).unwrap()
+        }
+    }
+
+    fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
+        let distr = Adapter((0..num).map(|i| i as i64).collect(), &weight);
+        test_discrete(0, distr, make_cdf(num, |i| weight(i)));
+    }
+
+    test_weights(100, |_| 1.0);
+    test_weights(100, |i| ((i + 1) as f64).ln());
+    test_weights(100, |i| i as f64);
+    test_weights(100, |i| (i as f64).powi(3));
+    test_weights(100, |i| 1.0 / ((i + 1) as f64));
+}

From 4b0a2969eba758f851fb95fd998a4c7914b8f245 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 17:26:14 +0000
Subject: [PATCH 05/21] Add KS test for IndexedRandom::choose_multiple_weighted
 (one element)

---
 distr_test/tests/weighted.rs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index d577535f11..8097533087 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -97,3 +97,27 @@ fn choose_weighted_indexed() {
     test_weights(100, |i| (i as f64).powi(3));
     test_weights(100, |i| 1.0 / ((i + 1) as f64));
 }
+
+#[test]
+fn choose_one_weighted_indexed() {
+    struct Adapter<F: Fn(i64) -> f64>(Vec<i64>, F);
+    impl<F: Fn(i64) -> f64> Distribution<i64> for Adapter<F> {
+        fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> i64 {
+            *IndexedRandom::choose_multiple_weighted(&self.0[..], rng, 1, |i| (self.1)(*i))
+                .unwrap()
+                .next()
+                .unwrap()
+        }
+    }
+
+    fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
+        let distr = Adapter((0..num).map(|i| i as i64).collect(), &weight);
+        test_discrete(0, distr, make_cdf(num, |i| weight(i)));
+    }
+
+    test_weights(100, |_| 1.0);
+    test_weights(100, |i| ((i + 1) as f64).ln());
+    test_weights(100, |i| i as f64);
+    test_weights(100, |i| (i as f64).powi(3));
+    test_weights(100, |i| 1.0 / ((i + 1) as f64));
+}

From 25842126ff396d7b1f1a5e90061833eb91561142 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 17:42:42 +0000
Subject: [PATCH 06/21] Add KS test for IndexedRandom::choose_multiple_weighted
 (two elements)

Some failures
---
 distr_test/tests/weighted.rs | 48 ++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index 8097533087..d0628c37fa 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -121,3 +121,51 @@ fn choose_one_weighted_indexed() {
     test_weights(100, |i| (i as f64).powi(3));
     test_weights(100, |i| 1.0 / ((i + 1) as f64));
 }
+
+#[test]
+fn choose_two_weighted_indexed() {
+    struct Adapter<F: Fn(i64) -> f64>(Vec<i64>, F);
+    impl<F: Fn(i64) -> f64> Distribution<i64> for Adapter<F> {
+        fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> i64 {
+            let mut iter =
+                IndexedRandom::choose_multiple_weighted(&self.0[..], rng, 2, |i| (self.1)(*i))
+                    .unwrap();
+            let a = *iter.next().unwrap();
+            let b = *iter.next().unwrap();
+            assert!(iter.next().is_none());
+            a * self.0.len() as i64 + b
+        }
+    }
+
+    fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
+        let distr = Adapter((0..num).map(|i| i as i64).collect(), &weight);
+
+        let pmf1 = (0..num).map(|i| weight(i as i64)).collect::<Vec<f64>>();
+        let total: f64 = pmf1.iter().sum();
+        let mut ac = 0.0;
+        let frac = total.powi(-2);
+        let mut cdf = Vec::with_capacity(num * num);
+        for a in 0..num {
+            for b in 0..num {
+                ac += pmf1[a] * pmf1[b];
+                cdf.push(ac * frac);
+            }
+        }
+
+        let cdf = |i| {
+            if i < 0 {
+                0.0
+            } else {
+                cdf[i as usize]
+            }
+        };
+
+        test_discrete(0, distr, cdf);
+    }
+
+    test_weights(100, |_| 1.0);
+    test_weights(100, |i| ((i + 1) as f64).ln());
+    // test_weights(100, |i| i as f64);
+    // test_weights(100, |i| (i as f64).powi(3));
+    // test_weights(100, |i| 1.0 / ((i + 1) as f64));
+}

From 336ddbe8fe5b435a8f8fbbb29a49f1f3d7e519b2 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 17:48:48 +0000
Subject: [PATCH 07/21] Add KS test for IteratorRandom::choose

---
 distr_test/tests/weighted.rs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index d0628c37fa..8df9604399 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -9,7 +9,7 @@
 mod ks;
 use ks::test_discrete;
 use rand::distr::{Distribution, WeightedIndex};
-use rand::seq::IndexedRandom;
+use rand::seq::{IndexedRandom, IteratorRandom};
 use rand_distr::{WeightedAliasIndex, WeightedTreeIndex};
 
 fn make_cdf(num: usize, f: impl Fn(i64) -> f64) -> impl Fn(i64) -> f64 {
@@ -169,3 +169,16 @@ fn choose_two_weighted_indexed() {
     // test_weights(100, |i| (i as f64).powi(3));
     // test_weights(100, |i| 1.0 / ((i + 1) as f64));
 }
+
+#[test]
+fn choose_iterator() {
+    struct Adapter<I>(I);
+    impl<I: Clone + Iterator<Item = i64>> Distribution<i64> for Adapter<I> {
+        fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> i64 {
+            IteratorRandom::choose(self.0.clone(), rng).unwrap()
+        }
+    }
+
+    let distr = Adapter((0..100).map(|i| i as i64));
+    test_discrete(0, distr, make_cdf(100, |_| 1.0));
+}

From 2ef212b19653de10d52a3f0dc288123843a0a18f Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 17:50:10 +0000
Subject: [PATCH 08/21] Add KS test for IteratorRandom::choose_stable

---
 distr_test/tests/weighted.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index 8df9604399..9adefb73e8 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -182,3 +182,16 @@ fn choose_iterator() {
     let distr = Adapter((0..100).map(|i| i as i64));
     test_discrete(0, distr, make_cdf(100, |_| 1.0));
 }
+
+#[test]
+fn choose_stable_iterator() {
+    struct Adapter<I>(I);
+    impl<I: Clone + Iterator<Item = i64>> Distribution<i64> for Adapter<I> {
+        fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> i64 {
+            IteratorRandom::choose_stable(self.0.clone(), rng).unwrap()
+        }
+    }
+
+    let distr = Adapter((0..100).map(|i| i as i64));
+    test_discrete(0, distr, make_cdf(100, |_| 1.0));
+}

From a0908bc44a3220ba1baa56b7a5d502e5ab6063f7 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 19:10:49 +0000
Subject: [PATCH 09/21] Add KS test for IteratorRandom::choose_multiple_fill
 (two elements)

---
 distr_test/tests/weighted.rs | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index 9adefb73e8..a4c74224cf 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -195,3 +195,26 @@ fn choose_stable_iterator() {
     let distr = Adapter((0..100).map(|i| i as i64));
     test_discrete(0, distr, make_cdf(100, |_| 1.0));
 }
+
+#[test]
+fn choose_two_iterator() {
+    struct Adapter<I>(I);
+    impl<I: Clone + Iterator<Item = i64>> Distribution<i64> for Adapter<I> {
+        fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> i64 {
+            let mut buf = [0; 2];
+            IteratorRandom::choose_multiple_fill(self.0.clone(), rng, &mut buf);
+            buf.sort_unstable();
+            assert!(buf[0] < 99 && buf[1] >= 1);
+            let a = buf[0];
+            4950 - (99 - a) * (100 - a) / 2 + buf[1] - a - 1
+        }
+    }
+
+    let distr = Adapter((0..100).map(|i| i as i64));
+
+    test_discrete(
+        0,
+        distr,
+        |i| if i < 0 { 0.0 } else { (i + 1) as f64 / 4950.0 },
+    );
+}

From 865aba2c4f4fe02a80bbb8d262973691994d23c2 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 18 Nov 2024 19:28:11 +0000
Subject: [PATCH 10/21] Fix cdf for choose_two_weighted_indexed

---
 distr_test/tests/weighted.rs | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index a4c74224cf..cb89287556 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -130,9 +130,12 @@ fn choose_two_weighted_indexed() {
             let mut iter =
                 IndexedRandom::choose_multiple_weighted(&self.0[..], rng, 2, |i| (self.1)(*i))
                     .unwrap();
-            let a = *iter.next().unwrap();
-            let b = *iter.next().unwrap();
+            let mut a = *iter.next().unwrap();
+            let mut b = *iter.next().unwrap();
             assert!(iter.next().is_none());
+            if b < a {
+                std::mem::swap(&mut a, &mut b);
+            }
             a * self.0.len() as i64 + b
         }
     }
@@ -141,16 +144,21 @@ fn choose_two_weighted_indexed() {
         let distr = Adapter((0..num).map(|i| i as i64).collect(), &weight);
 
         let pmf1 = (0..num).map(|i| weight(i as i64)).collect::<Vec<f64>>();
-        let total: f64 = pmf1.iter().sum();
+        let sum: f64 = pmf1.iter().sum();
+        let sum_sq: f64 = pmf1.iter().map(|x| x * x).sum();
+        let frac = 2.0 / (sum * sum - sum_sq);
+
         let mut ac = 0.0;
-        let frac = total.powi(-2);
         let mut cdf = Vec::with_capacity(num * num);
         for a in 0..num {
             for b in 0..num {
-                ac += pmf1[a] * pmf1[b];
+                if a < b {
+                    ac += pmf1[a] * pmf1[b];
+                }
                 cdf.push(ac * frac);
             }
         }
+        assert!((ac * frac - 1.0).abs() < 1e-9);
 
         let cdf = |i| {
             if i < 0 {

From 16a16c6420c9e1569d7a277da0344b2e17329e46 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 19 Nov 2024 10:09:34 +0000
Subject: [PATCH 11/21] More complex test for choose_multiple_weighted

---
 src/seq/slice.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/seq/slice.rs b/src/seq/slice.rs
index 4144f91345..1fc10c0985 100644
--- a/src/seq/slice.rs
+++ b/src/seq/slice.rs
@@ -732,17 +732,17 @@ mod test {
         use super::*;
 
         // The theoretical probabilities of the different outcomes are:
-        // AB: 0.5  * 0.5  = 0.250
-        // AC: 0.5  * 0.5  = 0.250
-        // BA: 0.25 * 0.67 = 0.167
-        // BC: 0.25 * 0.33 = 0.082
-        // CA: 0.25 * 0.67 = 0.167
-        // CB: 0.25 * 0.33 = 0.082
-        let choices = [('a', 2), ('b', 1), ('c', 1)];
+        // AB: 0.5   * 0.667 = 0.3333
+        // AC: 0.5   * 0.333 = 0.1667
+        // BA: 0.333 * 0.75  = 0.25
+        // BC: 0.333 * 0.25  = 0.0833
+        // CA: 0.167 * 0.6   = 0.1
+        // CB: 0.167 * 0.4   = 0.0667
+        let choices = [('a', 3), ('b', 2), ('c', 1)];
         let mut rng = crate::test::rng(414);
 
         let mut results = [0i32; 3];
-        let expected_results = [4167, 4167, 1666];
+        let expected_results = [5833, 2667, 1500];
         for _ in 0..10000 {
             let result = choices
                 .choose_multiple_weighted(&mut rng, 2, |item| item.1)

From 554d3311a32ce91305d644d2c4250fdf05f6699d Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 19 Nov 2024 10:23:49 +0000
Subject: [PATCH 12/21] Fix calculated CDF in choose_two_weighted_indexed

---
 distr_test/tests/weighted.rs | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index cb89287556..0d24c88909 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -145,20 +145,25 @@ fn choose_two_weighted_indexed() {
 
         let pmf1 = (0..num).map(|i| weight(i as i64)).collect::<Vec<f64>>();
         let sum: f64 = pmf1.iter().sum();
-        let sum_sq: f64 = pmf1.iter().map(|x| x * x).sum();
-        let frac = 2.0 / (sum * sum - sum_sq);
+        let frac = 1.0 / sum;
 
         let mut ac = 0.0;
         let mut cdf = Vec::with_capacity(num * num);
         for a in 0..num {
             for b in 0..num {
                 if a < b {
-                    ac += pmf1[a] * pmf1[b];
+                    let pa = pmf1[a] * frac;
+                    let pab = pa * pmf1[b] / (sum - pmf1[a]);
+
+                    let pb = pmf1[b] * frac;
+                    let pba = pb * pmf1[a] / (sum - pmf1[b]);
+
+                    ac += pab + pba;
                 }
-                cdf.push(ac * frac);
+                cdf.push(ac);
             }
         }
-        assert!((ac * frac - 1.0).abs() < 1e-9);
+        assert!((cdf.last().unwrap() - 1.0).abs() < 1e-9);
 
         let cdf = |i| {
             if i < 0 {
@@ -173,9 +178,9 @@ fn choose_two_weighted_indexed() {
 
     test_weights(100, |_| 1.0);
     test_weights(100, |i| ((i + 1) as f64).ln());
-    // test_weights(100, |i| i as f64);
-    // test_weights(100, |i| (i as f64).powi(3));
-    // test_weights(100, |i| 1.0 / ((i + 1) as f64));
+    test_weights(100, |i| i as f64);
+    test_weights(100, |i| (i as f64).powi(3));
+    test_weights(100, |i| 1.0 / ((i + 1) as f64));
 }
 
 #[test]

From d6459528c5f04e93dcbd9c241c1cd9166ff65ac9 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 19 Nov 2024 10:32:29 +0000
Subject: [PATCH 13/21] Test and fix choose_multiple_weighted with very small
 probabilities: #1476

Also improves choose_two_weighted_indexed time by 23% (excluding new test)
---
 distr_test/tests/weighted.rs | 1 +
 src/seq/index.rs             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index 0d24c88909..d617de2b77 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -181,6 +181,7 @@ fn choose_two_weighted_indexed() {
     test_weights(100, |i| i as f64);
     test_weights(100, |i| (i as f64).powi(3));
     test_weights(100, |i| 1.0 / ((i + 1) as f64));
+    test_weights(10, |i| ((i + 1) as f64).powi(-8));
 }
 
 #[test]
diff --git a/src/seq/index.rs b/src/seq/index.rs
index 70231dde2c..f457e8a162 100644
--- a/src/seq/index.rs
+++ b/src/seq/index.rs
@@ -392,7 +392,7 @@ where
     while index < length {
         let weight = weight(index.as_usize()).into();
         if weight > 0.0 {
-            let key = rng.random::<f64>().powf(1.0 / weight);
+            let key = rng.random::<f64>().ln() / weight;
             candidates.push(Element { index, key });
         } else if !(weight >= 0.0) {
             return Err(WeightError::InvalidWeight);

From b806b29812c05e8b705358b5fd73041e475c704b Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 19 Nov 2024 10:44:00 +0000
Subject: [PATCH 14/21] sample_efraimidis_spirakis: keep at most amount
 candidates

Approx 2% improvement to tests sampling 2 of 100 elements
---
 src/seq/index.rs | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/seq/index.rs b/src/seq/index.rs
index f457e8a162..24baa47fcb 100644
--- a/src/seq/index.rs
+++ b/src/seq/index.rs
@@ -387,13 +387,18 @@ where
 
     impl<N> Eq for Element<N> {}
 
-    let mut candidates = Vec::with_capacity(length.as_usize());
+    let mut candidates = Vec::with_capacity(amount.as_usize());
     let mut index = N::zero();
     while index < length {
         let weight = weight(index.as_usize()).into();
         if weight > 0.0 {
             let key = rng.random::<f64>().ln() / weight;
-            candidates.push(Element { index, key });
+            if candidates.len() < amount.as_usize() {
+                candidates.push(Element { index, key });
+            } else if key > candidates[0].key {
+                candidates[0] = Element { index, key };
+            }
+            candidates.sort_unstable();
         } else if !(weight >= 0.0) {
             return Err(WeightError::InvalidWeight);
         }
@@ -406,18 +411,7 @@ where
         return Err(WeightError::InsufficientNonZero);
     }
 
-    // Partially sort the array to find the `amount` elements with the greatest
-    // keys. Do this by using `select_nth_unstable` to put the elements with
-    // the *smallest* keys at the beginning of the list in `O(n)` time, which
-    // provides equivalent information about the elements with the *greatest* keys.
-    let (_, mid, greater) = candidates.select_nth_unstable(avail - amount.as_usize());
-
-    let mut result: Vec<N> = Vec::with_capacity(amount.as_usize());
-    result.push(mid.index);
-    for element in greater {
-        result.push(element.index);
-    }
-    Ok(IndexVec::from(result))
+    Ok(IndexVec::from(candidates.iter().map(|elt| elt.index).collect()))
 }
 
 /// Randomly sample exactly `amount` indices from `0..length`, using Floyd's

From 0f662b1d94180d715ad006f2ce2fed9dd22501db Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 19 Nov 2024 11:00:15 +0000
Subject: [PATCH 15/21] sample_efraimidis_spirakis: use algorithm A-ExpJ

This results in approx 18% faster tests choosing 2-in-100 items
---
 src/seq/index.rs | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/seq/index.rs b/src/seq/index.rs
index 24baa47fcb..00b9cac047 100644
--- a/src/seq/index.rs
+++ b/src/seq/index.rs
@@ -389,28 +389,44 @@ where
 
     let mut candidates = Vec::with_capacity(amount.as_usize());
     let mut index = N::zero();
-    while index < length {
+    while index < length && candidates.len() < amount.as_usize() {
         let weight = weight(index.as_usize()).into();
         if weight > 0.0 {
             let key = rng.random::<f64>().ln() / weight;
-            if candidates.len() < amount.as_usize() {
-                candidates.push(Element { index, key });
-            } else if key > candidates[0].key {
-                candidates[0] = Element { index, key };
-            }
-            candidates.sort_unstable();
+            // let key = rng.random::<f64>().powf(1.0 / weight);
+            candidates.push(Element { index, key });
         } else if !(weight >= 0.0) {
             return Err(WeightError::InvalidWeight);
         }
 
         index += N::one();
     }
+    candidates.sort_unstable();
 
-    let avail = candidates.len();
-    if avail < amount.as_usize() {
+    if candidates.len() < amount.as_usize() {
         return Err(WeightError::InsufficientNonZero);
     }
 
+    let mut x = rng.random::<f64>().ln() / candidates[0].key;
+    while index < length {
+        let weight = weight(index.as_usize()).into();
+        if weight > 0.0 {
+            x -= weight;
+            if x <= 0.0 {
+                let t = core::f64::consts::E.powf(candidates[0].key * weight);
+                let key = rng.random_range(t..1.0).ln() / weight;
+                candidates[0] = Element { index, key };
+                candidates.sort_unstable();
+
+                x = rng.random::<f64>().ln() / candidates[0].key;
+            }
+        } else if !(weight >= 0.0) {
+            return Err(WeightError::InvalidWeight);
+        }
+
+        index += N::one();
+    }
+
     Ok(IndexVec::from(candidates.iter().map(|elt| elt.index).collect()))
 }
 

From 51c6fdd3071fc9aaf8788bcfae9ff68a7cad1ded Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 19 Nov 2024 11:04:28 +0000
Subject: [PATCH 16/21] Rustfmt

---
 src/seq/index.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/seq/index.rs b/src/seq/index.rs
index 00b9cac047..6f3695602a 100644
--- a/src/seq/index.rs
+++ b/src/seq/index.rs
@@ -427,7 +427,9 @@ where
         index += N::one();
     }
 
-    Ok(IndexVec::from(candidates.iter().map(|elt| elt.index).collect()))
+    Ok(IndexVec::from(
+        candidates.iter().map(|elt| elt.index).collect(),
+    ))
 }
 
 /// Randomly sample exactly `amount` indices from `0..length`, using Floyd's

From a1f61ae0e63959927b8deda24d56fe21824abe25 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 19 Nov 2024 11:06:44 +0000
Subject: [PATCH 17/21] Clippy

---
 distr_test/tests/weighted.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index d617de2b77..c73ab6d632 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -22,7 +22,7 @@ fn make_cdf(num: usize, f: impl Fn(i64) -> f64) -> impl Fn(i64) -> f64 {
 
     let frac = 1.0 / ac;
     for x in &mut cdf {
-        *x = *x * frac;
+        *x *= frac;
     }
 
     move |i| {
@@ -88,7 +88,7 @@ fn choose_weighted_indexed() {
 
     fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
         let distr = Adapter((0..num).map(|i| i as i64).collect(), &weight);
-        test_discrete(0, distr, make_cdf(num, |i| weight(i)));
+        test_discrete(0, distr, make_cdf(num, &weight));
     }
 
     test_weights(100, |_| 1.0);
@@ -112,7 +112,7 @@ fn choose_one_weighted_indexed() {
 
     fn test_weights(num: usize, weight: impl Fn(i64) -> f64) {
         let distr = Adapter((0..num).map(|i| i as i64).collect(), &weight);
-        test_discrete(0, distr, make_cdf(num, |i| weight(i)));
+        test_discrete(0, distr, make_cdf(num, &weight));
     }
 
     test_weights(100, |_| 1.0);

From 1c147a4bc3a512e5ef01d554c46a01497cb73d63 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Sat, 23 Nov 2024 15:35:45 +0000
Subject: [PATCH 18/21] Temp: extra test

---
 tests/choose_multiple_weighted.rs | 39 +++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 tests/choose_multiple_weighted.rs

diff --git a/tests/choose_multiple_weighted.rs b/tests/choose_multiple_weighted.rs
new file mode 100644
index 0000000000..23082e2cff
--- /dev/null
+++ b/tests/choose_multiple_weighted.rs
@@ -0,0 +1,39 @@
+use rand::{prelude::IndexedRandom, rng};
+
+#[test]
+fn main() {
+    let values: Vec<(i32, f64)> =
+        vec![(1, 0.080), (2, 0.0078), (3, 0.012)]
+            .into_iter()
+            .map(|v| (v.0, f64::powf(v.1, 4.0)))
+            .collect();
+
+    for v in &values {
+        println!("value={} has weight={:.10}", v.0, v.1);
+    }
+    let weight_sum: f64 = values.iter().map(|(_, w)| w).sum();
+    let weights: Vec<f64> = values.iter().map(|(_, w)| w / weight_sum).collect();
+    println!("expected: {weights:?}");
+
+    let largest_weight = values[0].1;
+
+    let mut results = [0u32; 9];
+
+    for _ in 0..100_000 {
+        let mut iter = values.choose_multiple_weighted(&mut rng(), 2, |a| a.1 / largest_weight).unwrap();
+        let a = iter.next().unwrap().0;
+        let b = iter.next().unwrap().0;
+        assert!(iter.next().is_none());
+
+        let i = (a - 1) * 3 + b - 1;
+        results[i as usize] += 1;
+    }
+
+    println!("got following results:");
+    for i in 0..9 {
+        let i0 = i / 3 + 1;
+        let i1 = i % 3 + 1;
+        println!("({}, {}):\t{}", i0, i1, results[i]);
+    }
+    assert!(false);
+}

From ddb8f5f887bcdc082f5274744124f52de9bc09a7 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Mon, 25 Nov 2024 17:05:13 +0000
Subject: [PATCH 19/21] Review feedback

---
 distr_test/tests/weighted.rs |  1 +
 src/seq/index.rs             | 11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/distr_test/tests/weighted.rs b/distr_test/tests/weighted.rs
index c73ab6d632..cf87b3ee63 100644
--- a/distr_test/tests/weighted.rs
+++ b/distr_test/tests/weighted.rs
@@ -12,6 +12,7 @@ use rand::distr::{Distribution, WeightedIndex};
 use rand::seq::{IndexedRandom, IteratorRandom};
 use rand_distr::{WeightedAliasIndex, WeightedTreeIndex};
 
+/// Takes the unnormalized pdf and creates the cdf of a discrete distribution
 fn make_cdf(num: usize, f: impl Fn(i64) -> f64) -> impl Fn(i64) -> f64 {
     let mut cdf = Vec::with_capacity(num);
     let mut ac = 0.0;
diff --git a/src/seq/index.rs b/src/seq/index.rs
index 6f3695602a..39ac708a67 100644
--- a/src/seq/index.rs
+++ b/src/seq/index.rs
@@ -333,8 +333,8 @@ where
 /// ordering). The weights are to be provided by the input function `weights`,
 /// which will be called once for each index.
 ///
-/// This implementation uses the algorithm described by Efraimidis and Spirakis
-/// in this paper: <https://doi.org/10.1016/j.ipl.2005.11.003>
+/// This implementation is based on the algorithm A-ExpJ as found in
+/// [Efraimidis and Spirakis, 2005](https://doi.org/10.1016/j.ipl.2005.11.003).
 /// It uses `O(length + amount)` space and `O(length)` time.
 ///
 /// Error cases:
@@ -392,8 +392,9 @@ where
     while index < length && candidates.len() < amount.as_usize() {
         let weight = weight(index.as_usize()).into();
         if weight > 0.0 {
+            // We use the log of the key used in A-ExpJ to improve precision
+            // for small weights:
             let key = rng.random::<f64>().ln() / weight;
-            // let key = rng.random::<f64>().powf(1.0 / weight);
             candidates.push(Element { index, key });
         } else if !(weight >= 0.0) {
             return Err(WeightError::InvalidWeight);
@@ -413,9 +414,11 @@ where
         if weight > 0.0 {
             x -= weight;
             if x <= 0.0 {
-                let t = core::f64::consts::E.powf(candidates[0].key * weight);
+                let t = (candidates[0].key * weight).exp();
                 let key = rng.random_range(t..1.0).ln() / weight;
                 candidates[0] = Element { index, key };
+                // TODO: consider using a binary tree instead of sorting at each
+                // step. This should be faster for some THRESHOLD < amount.
                 candidates.sort_unstable();
 
                 x = rng.random::<f64>().ln() / candidates[0].key;

From a039a7fdf9c61b54c9fc0704ebdb7b3c318e8ed8 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 26 Nov 2024 09:44:03 +0000
Subject: [PATCH 20/21] Remove extra test

---
 tests/choose_multiple_weighted.rs | 39 -------------------------------
 1 file changed, 39 deletions(-)
 delete mode 100644 tests/choose_multiple_weighted.rs

diff --git a/tests/choose_multiple_weighted.rs b/tests/choose_multiple_weighted.rs
deleted file mode 100644
index 23082e2cff..0000000000
--- a/tests/choose_multiple_weighted.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use rand::{prelude::IndexedRandom, rng};
-
-#[test]
-fn main() {
-    let values: Vec<(i32, f64)> =
-        vec![(1, 0.080), (2, 0.0078), (3, 0.012)]
-            .into_iter()
-            .map(|v| (v.0, f64::powf(v.1, 4.0)))
-            .collect();
-
-    for v in &values {
-        println!("value={} has weight={:.10}", v.0, v.1);
-    }
-    let weight_sum: f64 = values.iter().map(|(_, w)| w).sum();
-    let weights: Vec<f64> = values.iter().map(|(_, w)| w / weight_sum).collect();
-    println!("expected: {weights:?}");
-
-    let largest_weight = values[0].1;
-
-    let mut results = [0u32; 9];
-
-    for _ in 0..100_000 {
-        let mut iter = values.choose_multiple_weighted(&mut rng(), 2, |a| a.1 / largest_weight).unwrap();
-        let a = iter.next().unwrap().0;
-        let b = iter.next().unwrap().0;
-        assert!(iter.next().is_none());
-
-        let i = (a - 1) * 3 + b - 1;
-        results[i as usize] += 1;
-    }
-
-    println!("got following results:");
-    for i in 0..9 {
-        let i0 = i / 3 + 1;
-        let i1 = i % 3 + 1;
-        println!("({}, {}):\t{}", i0, i1, results[i]);
-    }
-    assert!(false);
-}

From 831353ff55bb30dadadd64c383b102e69cbf28e0 Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Tue, 26 Nov 2024 09:44:24 +0000
Subject: [PATCH 21/21] fn sample_efraimidis_spirakis: use BinaryHeap

---
 src/seq/index.rs | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/seq/index.rs b/src/seq/index.rs
index 39ac708a67..852bdac76c 100644
--- a/src/seq/index.rs
+++ b/src/seq/index.rs
@@ -354,7 +354,7 @@ where
     N: UInt,
     IndexVec: From<Vec<N>>,
 {
-    use std::cmp::Ordering;
+    use std::{cmp::Ordering, collections::BinaryHeap};
 
     if amount == N::zero() {
         return Ok(IndexVec::U32(Vec::new()));
@@ -373,9 +373,9 @@ where
 
     impl<N> Ord for Element<N> {
         fn cmp(&self, other: &Self) -> Ordering {
-            // partial_cmp will always produce a value,
-            // because we check that the weights are not nan
-            self.key.partial_cmp(&other.key).unwrap()
+            // unwrap() should not panic since weights should not be NaN
+            // We reverse so that BinaryHeap::peek shows the smallest item
+            self.key.partial_cmp(&other.key).unwrap().reverse()
         }
     }
 
@@ -387,7 +387,7 @@ where
 
     impl<N> Eq for Element<N> {}
 
-    let mut candidates = Vec::with_capacity(amount.as_usize());
+    let mut candidates = BinaryHeap::with_capacity(amount.as_usize());
     let mut index = N::zero();
     while index < length && candidates.len() < amount.as_usize() {
         let weight = weight(index.as_usize()).into();
@@ -402,26 +402,23 @@ where
 
         index += N::one();
     }
-    candidates.sort_unstable();
 
     if candidates.len() < amount.as_usize() {
         return Err(WeightError::InsufficientNonZero);
     }
 
-    let mut x = rng.random::<f64>().ln() / candidates[0].key;
+    let mut x = rng.random::<f64>().ln() / candidates.peek().unwrap().key;
     while index < length {
         let weight = weight(index.as_usize()).into();
         if weight > 0.0 {
             x -= weight;
             if x <= 0.0 {
-                let t = (candidates[0].key * weight).exp();
+                let min_candidate = candidates.pop().unwrap();
+                let t = (min_candidate.key * weight).exp();
                 let key = rng.random_range(t..1.0).ln() / weight;
-                candidates[0] = Element { index, key };
-                // TODO: consider using a binary tree instead of sorting at each
-                // step. This should be faster for some THRESHOLD < amount.
-                candidates.sort_unstable();
+                candidates.push(Element { index, key });
 
-                x = rng.random::<f64>().ln() / candidates[0].key;
+                x = rng.random::<f64>().ln() / candidates.peek().unwrap().key;
             }
         } else if !(weight >= 0.0) {
             return Err(WeightError::InvalidWeight);