From 23adc1d3cfa34f387fa4c5973252f54b4ca53a2c Mon Sep 17 00:00:00 2001
From: AvhiMaz <avhimazumder5@outlook.com>
Date: Wed, 12 Nov 2025 16:53:12 +0530
Subject: [PATCH 1/4] fix: re-enable test_scheduler_drop_idle by resolving race
 condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  Remove #[ignore] attribute and fix timing to eliminate race condition
  in test_scheduler_drop_idle, which was disabled in PR #8278.

  The test verifies the scheduler pool's cleaner thread correctly removes
  idle schedulers while preserving recently-pooled ones.

  Root cause:
  The original test used a 100ms idle threshold with 1000ms sleep, but
  timing was still unreliable due to system variations. The race occurred
  when old_scheduler and new_scheduler had unclear age differences.

  Fix:
  - Use explicit 300ms idle threshold for this test (instead of 100ms)
  - Sleep 350ms before returning new_scheduler (provides 50ms+ safety margin)
  - This guarantees old_scheduler is idle while new_scheduler is not

  Result:
  - old_scheduler: 350ms old (> 300ms threshold) → definitely idle → removed
  - new_scheduler: ~0ms old (< 300ms threshold) → definitely not idle → kept
  - Test is now deterministic and matches expected checkpoint sequence

  Fixes #8279

Signed-off-by: AvhiMaz <avhimazumder5@outlook.com>
---
 unified-scheduler-pool/src/lib.rs | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/unified-scheduler-pool/src/lib.rs b/unified-scheduler-pool/src/lib.rs
index 49926bdd53b0da..a94d448a2a9f76 100644
--- a/unified-scheduler-pool/src/lib.rs
+++ b/unified-scheduler-pool/src/lib.rs
@@ -2872,7 +2872,6 @@ mod tests {
     const SHORTENED_MAX_POOLING_DURATION: Duration = Duration::from_millis(100);
 
     #[test]
-    #[ignore]
     fn test_scheduler_drop_idle() {
         agave_logger::setup();
 
@@ -2884,6 +2883,9 @@ mod tests {
         ]);
 
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
+        // Use a longer pooling duration for this test to avoid race conditions.
+        // old_scheduler and new_scheduler need to have a clear age difference.
+        let test_max_pooling_duration = Duration::from_millis(300);
         let pool_raw = DefaultSchedulerPool::do_new(
             None,
             None,
@@ -2891,7 +2893,7 @@ mod tests {
             None,
             ignored_prioritization_fee_cache,
             SHORTENED_POOL_CLEANER_INTERVAL,
-            SHORTENED_MAX_POOLING_DURATION,
+            test_max_pooling_duration,
             DEFAULT_MAX_USAGE_QUEUE_COUNT,
             DEFAULT_TIMEOUT_DURATION,
         );
@@ -2905,8 +2907,10 @@ mod tests {
         let new_scheduler_id = new_scheduler.id();
         Box::new(old_scheduler.into_inner().1).return_to_pool();
 
-        // sleepless_testing can't be used; wait a bit here to see real progress of wall time...
-        sleep(SHORTENED_MAX_POOLING_DURATION * 10);
+        // Wait for old_scheduler to be considered idle by the cleaner (> 300ms old).
+        // This ensures when the cleaner runs, old_scheduler will definitely be idle.
+        sleep(Duration::from_millis(350));
+
         Box::new(new_scheduler.into_inner().1).return_to_pool();
 
         // Block solScCleaner until we see returned schedlers...
@@ -2916,12 +2920,9 @@ mod tests {
         // See the old (= idle) scheduler gone only after solScCleaner did its job...
         sleepless_testing::at(&TestCheckPoint::AfterIdleSchedulerCleaned);
 
-        // The following assertion is racy.
-        //
-        // We need to make sure new_scheduler isn't treated as idle up to now since being returned
-        // to the pool after sleep(SHORTENED_MAX_POOLING_DURATION * 10).
-        // Removing only old_scheduler is the expected behavior. So, make
-        // SHORTENED_MAX_POOLING_DURATION rather long...
+        // After the cleaner finishes, only the new_scheduler should remain.
+        // The old_scheduler is idle (pooled > 100ms ago) so it gets removed.
+        // The new_scheduler was just pooled so it's not idle yet.
         assert_eq!(pool_raw.scheduler_inners.lock().unwrap().len(), 1);
         assert_eq!(
             pool_raw

From 5781563deac55a49bc1867edcf7b7e72b4d7c881 Mon Sep 17 00:00:00 2001
From: AvhiMaz <avhimazumder5@outlook.com>
Date: Mon, 17 Nov 2025 12:51:49 +0530
Subject: [PATCH 2/4] Address all reviewer feedback on test_scheduler_drop_idle

   - Move SHORTENED_MAX_POOLING_DURATION constant to test_scheduler_drop_stale where it's used
   - Convert test_max_pooling_duration from let to const TEST_MAX_POOLING_DURATION following conventions
   - Create TEST_WAIT_FOR_IDLE constant (500ms) with explicit safety margin tied to TEST_MAX_POOLING_DURATION
   - Increase safety margin from 50ms to 200ms for CI reliability
   - Add detailed comments explaining 300ms pooling duration tradeoff (speed vs race condition window)
   - Update assertion comment with explicit timing guarantees: old_scheduler ~500ms old (exceeds 300ms threshold),
   new_scheduler ~50ms old (below threshold)

Signed-off-by: AvhiMaz <avhimazumder5@outlook.com>
---
 unified-scheduler-pool/src/lib.rs | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/unified-scheduler-pool/src/lib.rs b/unified-scheduler-pool/src/lib.rs
index a94d448a2a9f76..69d067d5a04538 100644
--- a/unified-scheduler-pool/src/lib.rs
+++ b/unified-scheduler-pool/src/lib.rs
@@ -2869,7 +2869,6 @@ mod tests {
     }
 
     const SHORTENED_POOL_CLEANER_INTERVAL: Duration = Duration::from_millis(1);
-    const SHORTENED_MAX_POOLING_DURATION: Duration = Duration::from_millis(100);
 
     #[test]
     fn test_scheduler_drop_idle() {
@@ -2882,10 +2881,15 @@ mod tests {
             &TestCheckPoint::AfterIdleSchedulerCleaned,
         ]);
 
+        // Use 300ms pooling duration as a balance between:
+        // - Keeping the test fast (as small as possible)
+        // - Providing a large enough window to avoid the race condition where the second
+        //   scheduler could also be freed before we can assert only one remains
+        const TEST_MAX_POOLING_DURATION: Duration = Duration::from_millis(300);
+        // Add a generous safety margin to avoid flakiness in CI. We need old_scheduler to be
+        // reliably older than TEST_MAX_POOLING_DURATION when the cleaner runs.
+        const TEST_WAIT_FOR_IDLE: Duration = Duration::from_millis(500);
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
-        // Use a longer pooling duration for this test to avoid race conditions.
-        // old_scheduler and new_scheduler need to have a clear age difference.
-        let test_max_pooling_duration = Duration::from_millis(300);
         let pool_raw = DefaultSchedulerPool::do_new(
             None,
             None,
@@ -2893,7 +2897,7 @@ mod tests {
             None,
             ignored_prioritization_fee_cache,
             SHORTENED_POOL_CLEANER_INTERVAL,
-            test_max_pooling_duration,
+            TEST_MAX_POOLING_DURATION,
             DEFAULT_MAX_USAGE_QUEUE_COUNT,
             DEFAULT_TIMEOUT_DURATION,
         );
@@ -2907,9 +2911,8 @@ mod tests {
         let new_scheduler_id = new_scheduler.id();
         Box::new(old_scheduler.into_inner().1).return_to_pool();
 
-        // Wait for old_scheduler to be considered idle by the cleaner (> 300ms old).
-        // This ensures when the cleaner runs, old_scheduler will definitely be idle.
-        sleep(Duration::from_millis(350));
+        // Wait for old_scheduler to be considered idle by the cleaner.
+        sleep(TEST_WAIT_FOR_IDLE);
 
         Box::new(new_scheduler.into_inner().1).return_to_pool();
 
@@ -2920,9 +2923,8 @@ mod tests {
         // See the old (= idle) scheduler gone only after solScCleaner did its job...
         sleepless_testing::at(&TestCheckPoint::AfterIdleSchedulerCleaned);
 
-        // After the cleaner finishes, only the new_scheduler should remain.
-        // The old_scheduler is idle (pooled > 100ms ago) so it gets removed.
-        // The new_scheduler was just pooled so it's not idle yet.
+        // Only new_scheduler should remain. old_scheduler (~500ms old) exceeds the
+        // 300ms idle threshold, while new_scheduler (~50ms old) does not.
         assert_eq!(pool_raw.scheduler_inners.lock().unwrap().len(), 1);
         assert_eq!(
             pool_raw
@@ -3013,6 +3015,7 @@ mod tests {
 
     #[test]
     fn test_scheduler_drop_stale() {
+        const SHORTENED_MAX_POOLING_DURATION: Duration = Duration::from_millis(100);
         agave_logger::setup();
 
         let _progress = sleepless_testing::setup(&[

From af5aaaa1ba11e448872fe40ad81845a811972053 Mon Sep 17 00:00:00 2001
From: AvhiMaz <avhimazumder5@outlook.com>
Date: Mon, 17 Nov 2025 23:58:09 +0530
Subject: [PATCH 3/4] refactor: tie test duration constants together and
 improve comment accuracy

Signed-off-by: AvhiMaz <avhimazumder5@outlook.com>
---
 unified-scheduler-pool/src/lib.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/unified-scheduler-pool/src/lib.rs b/unified-scheduler-pool/src/lib.rs
index 69d067d5a04538..6fe33857e6475e 100644
--- a/unified-scheduler-pool/src/lib.rs
+++ b/unified-scheduler-pool/src/lib.rs
@@ -2885,10 +2885,10 @@ mod tests {
         // - Keeping the test fast (as small as possible)
         // - Providing a large enough window to avoid the race condition where the second
         //   scheduler could also be freed before we can assert only one remains
-        const TEST_MAX_POOLING_DURATION: Duration = Duration::from_millis(300);
-        // Add a generous safety margin to avoid flakiness in CI. We need old_scheduler to be
-        // reliably older than TEST_MAX_POOLING_DURATION when the cleaner runs.
-        const TEST_WAIT_FOR_IDLE: Duration = Duration::from_millis(500);
+        const TEST_MAX_POOLING_DURATION_MS: u64 = 300;
+        const TEST_MAX_POOLING_DURATION: Duration = Duration::from_millis(TEST_MAX_POOLING_DURATION_MS);
+        const TEST_WAIT_FOR_IDLE_MS: u64 = TEST_MAX_POOLING_DURATION_MS + 200;
+        const TEST_WAIT_FOR_IDLE: Duration = Duration::from_millis(TEST_WAIT_FOR_IDLE_MS);
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
         let pool_raw = DefaultSchedulerPool::do_new(
             None,
@@ -2923,8 +2923,8 @@ mod tests {
         // See the old (= idle) scheduler gone only after solScCleaner did its job...
         sleepless_testing::at(&TestCheckPoint::AfterIdleSchedulerCleaned);
 
-        // Only new_scheduler should remain. old_scheduler (~500ms old) exceeds the
-        // 300ms idle threshold, while new_scheduler (~50ms old) does not.
+        // Only new_scheduler should remain. old_scheduler exceeds the
+        // TEST_MAX_POOLING_DURATION idle threshold, while new_scheduler does not.
         assert_eq!(pool_raw.scheduler_inners.lock().unwrap().len(), 1);
         assert_eq!(
             pool_raw

From a037011751b2ac2478dfc12268bddcf0bae294a0 Mon Sep 17 00:00:00 2001
From: AvhiMaz <avhimazumder5@outlook.com>
Date: Wed, 19 Nov 2025 13:23:51 +0530
Subject: [PATCH 4/4] fix: cargo fmt

Signed-off-by: AvhiMaz <avhimazumder5@outlook.com>
---
 unified-scheduler-pool/src/lib.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unified-scheduler-pool/src/lib.rs b/unified-scheduler-pool/src/lib.rs
index 6fe33857e6475e..3e8f6532983812 100644
--- a/unified-scheduler-pool/src/lib.rs
+++ b/unified-scheduler-pool/src/lib.rs
@@ -2886,7 +2886,8 @@ mod tests {
         // - Providing a large enough window to avoid the race condition where the second
         //   scheduler could also be freed before we can assert only one remains
         const TEST_MAX_POOLING_DURATION_MS: u64 = 300;
-        const TEST_MAX_POOLING_DURATION: Duration = Duration::from_millis(TEST_MAX_POOLING_DURATION_MS);
+        const TEST_MAX_POOLING_DURATION: Duration =
+            Duration::from_millis(TEST_MAX_POOLING_DURATION_MS);
         const TEST_WAIT_FOR_IDLE_MS: u64 = TEST_MAX_POOLING_DURATION_MS + 200;
         const TEST_WAIT_FOR_IDLE: Duration = Duration::from_millis(TEST_WAIT_FOR_IDLE_MS);
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));