diff --git a/Cargo.lock b/Cargo.lock
index 08c2fa97d31ef7..b9e63af2280fcb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -820,7 +820,7 @@ dependencies = [
  "lazy_static",
  "lazycell",
  "peeking_take_while",
- "prettyplease 0.2.4",
+ "prettyplease 0.2.16",
  "proc-macro2",
  "quote",
  "regex",
@@ -829,6 +829,29 @@ dependencies = [
  "syn 2.0.52",
 ]
 
+[[package]]
+name = "bindgen"
+version = "0.69.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4c69fae65a523209d34240b60abe0c42d33d1045d445c0839d8a4894a736e2d"
+dependencies = [
+ "bitflags 2.4.2",
+ "cexpr",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "peeking_take_while",
+ "prettyplease 0.2.16",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn 2.0.52",
+ "which",
+]
+
 [[package]]
 name = "bit-set"
 version = "0.5.2"
@@ -1450,7 +1473,7 @@ version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c278839b831783b70278b14df4d45e1beb1aad306c07bb796637de9a0e323e8e"
 dependencies = [
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
 ]
 
 [[package]]
@@ -1546,6 +1569,16 @@ dependencies = [
  "winapi 0.2.8",
 ]
 
+[[package]]
+name = "cpu-time"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded"
+dependencies = [
+ "libc",
+ "winapi 0.3.9",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.7"
@@ -1615,11 +1648,10 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-channel"
-version = "0.5.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95"
+version = "0.5.11"
+source = "git+https://github.com/ryoqun/crossbeam?rev=438ec7cdaf6c6a8f593e50344c725fef8a13c7a5#438ec7cdaf6c6a8f593e50344c725fef8a13c7a5"
 dependencies = [
- "crossbeam-utils",
+ "crossbeam-utils 0.8.19",
 ]
 
 [[package]]
@@ -1630,7 +1662,7 @@ checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
 dependencies = [
  "cfg-if 1.0.0",
  "crossbeam-epoch",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
 ]
 
 [[package]]
@@ -1639,7 +1671,7 @@ version = "0.9.5"
 source = "git+https://github.com/solana-labs/crossbeam?rev=fd279d707025f0e60951e429bf778b4813d1b6bf#fd279d707025f0e60951e429bf778b4813d1b6bf"
 dependencies = [
  "cfg-if 1.0.0",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
  "lazy_static",
  "memoffset 0.6.4",
  "scopeguard",
@@ -1654,6 +1686,11 @@ dependencies = [
  "cfg-if 1.0.0",
 ]
 
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.19"
+source = "git+https://github.com/ryoqun/crossbeam?rev=438ec7cdaf6c6a8f593e50344c725fef8a13c7a5#438ec7cdaf6c6a8f593e50344c725fef8a13c7a5"
+
 [[package]]
 name = "crunchy"
 version = "0.2.2"
@@ -1720,6 +1757,12 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
+[[package]]
+name = "cty"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35"
+
 [[package]]
 name = "curve25519-dalek"
 version = "3.2.1"
@@ -1802,6 +1845,15 @@ dependencies = [
  "rusticata-macros",
 ]
 
+[[package]]
+name = "deranged"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3"
+dependencies = [
+ "powerfmt",
+]
+
 [[package]]
 name = "derivation-path"
 version = "0.2.0"
@@ -2798,6 +2850,43 @@ dependencies = [
  "tokio-native-tls",
 ]
 
+[[package]]
+name = "iai-callgrind"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e99bf26f496b13ac6273014f40afda46a233fbfb0289ce50fb4daaad2f2ffc80"
+dependencies = [
+ "bincode",
+ "bindgen 0.69.2",
+ "cc",
+ "cfg-if 1.0.0",
+ "cty",
+ "iai-callgrind-macros",
+ "iai-callgrind-runner",
+ "regex",
+]
+
+[[package]]
+name = "iai-callgrind-macros"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2a4bb39225592c0a28cfca6f70af52ebd8da23f533c2cdd0a3329c1fa252d56"
+dependencies = [
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.52",
+]
+
+[[package]]
+name = "iai-callgrind-runner"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c23a951b9eccaa1e38556d27473d1462a9c247a27961812edcaac156af861282"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "iana-time-zone"
 version = "0.1.46"
@@ -3177,7 +3266,7 @@ version = "0.11.0+8.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e"
 dependencies = [
- "bindgen",
+ "bindgen 0.65.1",
  "bzip2-sys",
  "cc",
  "glob",
@@ -3665,15 +3754,6 @@ dependencies = [
  "syn 2.0.52",
 ]
 
-[[package]]
-name = "num_threads"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97ba99ba6393e2c3734791401b66902d981cb03bf190af674ca69949b6d5fb15"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "number_prefix"
 version = "0.4.0"
@@ -4076,6 +4156,12 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dc59d1bcc64fc5d021d67521f818db868368028108d37f0e98d74e33f68297b5"
 
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
 [[package]]
 name = "ppv-lite86"
 version = "0.2.15"
@@ -4130,9 +4216,9 @@ dependencies = [
 
 [[package]]
 name = "prettyplease"
-version = "0.2.4"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058"
+checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
 dependencies = [
  "proc-macro2",
  "syn 2.0.52",
@@ -4195,6 +4281,32 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "procfs"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
+dependencies = [
+ "bitflags 2.4.2",
+ "chrono",
+ "flate2",
+ "hex",
+ "lazy_static",
+ "procfs-core",
+ "rustix",
+]
+
+[[package]]
+name = "procfs-core"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
+dependencies = [
+ "bitflags 2.4.2",
+ "chrono",
+ "hex",
+]
+
 [[package]]
 name = "proptest"
 version = "1.4.0"
@@ -4503,7 +4615,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
 dependencies = [
  "crossbeam-deque",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
 ]
 
 [[package]]
@@ -4752,9 +4864,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.31"
+version = "0.38.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
+checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89"
 dependencies = [
  "bitflags 2.4.2",
  "errno",
@@ -5020,8 +5132,13 @@ version = "2.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe"
 dependencies = [
+ "base64 0.13.1",
+ "chrono",
+ "hex",
  "serde",
+ "serde_json",
  "serde_with_macros",
+ "time",
 ]
 
 [[package]]
@@ -7559,8 +7676,12 @@ name = "solana-unified-scheduler-logic"
 version = "2.0.0"
 dependencies = [
  "assert_matches",
+ "iai-callgrind",
+ "qualifier_attr",
  "solana-sdk",
+ "solana-unified-scheduler-logic",
  "static_assertions",
+ "triomphe",
 ]
 
 [[package]]
@@ -7568,18 +7689,30 @@ name = "solana-unified-scheduler-pool"
 version = "2.0.0"
 dependencies = [
  "assert_matches",
+ "bincode",
+ "cpu-time",
+ "criterion",
  "crossbeam-channel",
  "dashmap",
  "derivative",
  "log",
+ "procfs",
  "qualifier_attr",
+ "rand 0.8.5",
+ "rustix",
+ "serde_json",
  "solana-ledger",
  "solana-logger",
+ "solana-measure",
+ "solana-metrics",
+ "solana-nohash-hasher",
  "solana-program-runtime",
  "solana-runtime",
  "solana-sdk",
  "solana-unified-scheduler-logic",
+ "solana-unified-scheduler-pool",
  "solana-vote",
+ "tikv-jemallocator",
 ]
 
 [[package]]
@@ -8020,6 +8153,12 @@ dependencies = [
  "spl-program-error",
 ]
 
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
 [[package]]
 name = "static_assertions"
 version = "1.1.0"
@@ -8374,21 +8513,32 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.9"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd"
+checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5"
 dependencies = [
+ "deranged",
  "itoa",
- "libc",
- "num_threads",
+ "powerfmt",
+ "serde",
+ "time-core",
  "time-macros",
 ]
 
+[[package]]
+name = "time-core"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
+
 [[package]]
 name = "time-macros"
-version = "0.2.4"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
+checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20"
+dependencies = [
+ "time-core",
+]
 
 [[package]]
 name = "tiny-bip39"
@@ -8758,6 +8908,16 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0de5f738ceab88e2491a94ddc33c3feeadfa95fedc60363ef110845df12f3878"
 
+[[package]]
+name = "triomphe"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+]
+
 [[package]]
 name = "try-lock"
 version = "0.2.3"
diff --git a/Cargo.toml b/Cargo.toml
index f96d072d931633..95814a448d6683 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -181,9 +181,10 @@ console_error_panic_hook = "0.1.7"
 console_log = "0.2.2"
 const_format = "0.2.32"
 core_affinity = "0.5.10"
+cpu-time = "1.0.0"
 criterion = "0.5.1"
 criterion-stats = "0.3.0"
-crossbeam-channel = "0.5.12"
+crossbeam-channel = "0.5.11"
 csv = "1.3.0"
 ctrlc = "3.4.4"
 curve25519-dalek = "3.2.1"
@@ -269,6 +270,7 @@ predicates = "2.1"
 pretty-hex = "0.3.0"
 prio-graph = "0.2.1"
 proc-macro2 = "1.0.79"
+procfs = "0.16.0"
 proptest = "1.4"
 prost = "0.11.9"
 prost-build = "0.11.9"
@@ -289,6 +291,7 @@ reqwest = { version = "0.11.23", default-features = false }
 rolling-file = "0.2.0"
 rpassword = "7.3"
 rustc_version = "0.4"
+rustix = "0.38.32"
 rustls = { version = "0.21.11", default-features = false, features = ["quic"] }
 rustversion = "1.0.14"
 scopeguard = "1.2.0"
@@ -443,6 +446,8 @@ zstd = "0.11.2"
 # for details, see https://github.com/solana-labs/crossbeam/commit/fd279d707025f0e60951e429bf778b4813d1b6bf
 crossbeam-epoch = { git = "https://github.com/solana-labs/crossbeam", rev = "fd279d707025f0e60951e429bf778b4813d1b6bf" }
 
+crossbeam-channel = { git = "https://github.com/ryoqun/crossbeam", rev = "438ec7cdaf6c6a8f593e50344c725fef8a13c7a5" }
+
 # We include the following crates as our dependencies above from crates.io:
 #
 #  * spl-associated-token-account
diff --git a/ci/test-bench.sh b/ci/test-bench.sh
index aacc82cffbb0a6..1444405bcccf5c 100755
--- a/ci/test-bench.sh
+++ b/ci/test-bench.sh
@@ -56,6 +56,10 @@ _ $cargoNightly bench --manifest-path gossip/Cargo.toml ${V:+--verbose} \
 _ $cargoNightly bench --manifest-path poh/Cargo.toml ${V:+--verbose} \
   -- -Z unstable-options --format=json | tee -a "$BENCH_FILE"
 
+# Run scheduler-pool benches
+_ $cargoNightly bench --manifest-path scheduler-pool/Cargo.toml ${V:+--verbose} \
+  -- -Z unstable-options --format=json | tee -a "$BENCH_FILE"
+
 # Run core benches
 _ $cargoNightly bench --manifest-path core/Cargo.toml ${V:+--verbose} \
   -- -Z unstable-options --format=json | tee -a "$BENCH_FILE"
diff --git a/core/src/validator.rs b/core/src/validator.rs
index 47153dd2706dd9..17a277c041e201 100644
--- a/core/src/validator.rs
+++ b/core/src/validator.rs
@@ -149,8 +149,8 @@ const WAIT_FOR_WEN_RESTART_SUPERMAJORITY_THRESHOLD_PERCENT: u64 =
 #[derive(Clone, EnumString, EnumVariantNames, Default, IntoStaticStr, Display)]
 #[strum(serialize_all = "kebab-case")]
 pub enum BlockVerificationMethod {
-    #[default]
     BlockstoreProcessor,
+    #[default]
     UnifiedScheduler,
 }
 
@@ -1462,8 +1462,10 @@ impl Validator {
 
     // Used for notifying many nodes in parallel to exit
     pub fn exit(&mut self) {
+        info!("exit1");
         self.validator_exit.write().unwrap().exit();
 
+        info!("exit2");
         // drop all signals in blockstore
         self.blockstore.drop_signal();
     }
@@ -1499,24 +1501,29 @@ impl Validator {
     }
 
     pub fn join(self) {
-        drop(self.bank_forks);
+        info!("join1");
         drop(self.cluster_info);
 
+        info!("join2");
         self.poh_service.join().expect("poh_service");
         drop(self.poh_recorder);
 
+        info!("join3");
         if let Some(json_rpc_service) = self.json_rpc_service {
             json_rpc_service.join().expect("rpc_service");
         }
 
+        info!("join4");
         if let Some(pubsub_service) = self.pubsub_service {
             pubsub_service.join().expect("pubsub_service");
         }
 
+        info!("join5");
         self.rpc_completed_slots_service
             .join()
             .expect("rpc_completed_slots_service");
 
+        info!("join6");
         if let Some(optimistically_confirmed_bank_tracker) =
             self.optimistically_confirmed_bank_tracker
         {
@@ -1525,96 +1532,126 @@ impl Validator {
                 .expect("optimistically_confirmed_bank_tracker");
         }
 
+        info!("join7");
         if let Some(transaction_status_service) = self.transaction_status_service {
             transaction_status_service
                 .join()
                 .expect("transaction_status_service");
         }
 
+        info!("join8");
         if let Some(rewards_recorder_service) = self.rewards_recorder_service {
             rewards_recorder_service
                 .join()
                 .expect("rewards_recorder_service");
         }
 
+        info!("join9");
         if let Some(cache_block_meta_service) = self.cache_block_meta_service {
             cache_block_meta_service
                 .join()
                 .expect("cache_block_meta_service");
         }
 
+        info!("join10");
         if let Some(system_monitor_service) = self.system_monitor_service {
             system_monitor_service
                 .join()
                 .expect("system_monitor_service");
         }
 
+        info!("join11");
         if let Some(sample_performance_service) = self.sample_performance_service {
             sample_performance_service
                 .join()
                 .expect("sample_performance_service");
         }
 
+        info!("join12");
         if let Some(entry_notifier_service) = self.entry_notifier_service {
             entry_notifier_service
                 .join()
                 .expect("entry_notifier_service");
         }
 
+        info!("join13");
         if let Some(s) = self.snapshot_packager_service {
             s.join().expect("snapshot_packager_service");
         }
 
+        info!("join14");
         self.gossip_service.join().expect("gossip_service");
         if let Some(repair_quic_endpoint) = &self.repair_quic_endpoint {
             repair::quic_endpoint::close_quic_endpoint(repair_quic_endpoint);
         }
+        info!("join15");
         self.serve_repair_service
             .join()
             .expect("serve_repair_service");
+        info!("join15");
         if let Some(repair_quic_endpoint_join_handle) = self.repair_quic_endpoint_join_handle {
             self.repair_quic_endpoint_runtime
                 .map(|runtime| runtime.block_on(repair_quic_endpoint_join_handle))
                 .transpose()
                 .unwrap();
         };
+        info!("join16");
         self.stats_reporter_service
             .join()
             .expect("stats_reporter_service");
+        info!("join17");
         self.blockstore_metric_report_service
             .join()
             .expect("ledger_metric_report_service");
+        info!("join18");
         self.accounts_background_service
             .join()
             .expect("accounts_background_service");
+        info!("join19");
         self.accounts_hash_verifier
             .join()
             .expect("accounts_hash_verifier");
+        info!("join20");
         if let Some(turbine_quic_endpoint) = &self.turbine_quic_endpoint {
             solana_turbine::quic_endpoint::close_quic_endpoint(turbine_quic_endpoint);
         }
+        info!("join21");
         self.tpu.join().expect("tpu");
+        info!("join22");
         self.tvu.join().expect("tvu");
+        info!("join23");
         if let Some(turbine_quic_endpoint_join_handle) = self.turbine_quic_endpoint_join_handle {
             self.turbine_quic_endpoint_runtime
                 .map(|runtime| runtime.block_on(turbine_quic_endpoint_join_handle))
                 .transpose()
                 .unwrap();
         }
+        info!("join24");
         self.completed_data_sets_service
             .join()
             .expect("completed_data_sets_service");
+        info!("join25");
         if let Some(ip_echo_server) = self.ip_echo_server {
             ip_echo_server.shutdown_background();
         }
 
+        info!("join26");
         if let Some(geyser_plugin_service) = self.geyser_plugin_service {
             geyser_plugin_service.join().expect("geyser_plugin_service");
         }
 
+        info!("join27");
         self.poh_timing_report_service
             .join()
             .expect("poh_timing_report_service");
+        info!("join28");
+        self.bank_forks.write().unwrap().prepare_to_drop();
+        let sc = Arc::strong_count(&self.bank_forks);
+        if let Some(bank_forks) = Arc::into_inner(self.bank_forks) {
+            drop::<BankForks>(bank_forks.into_inner().unwrap());
+        } else {
+            warn!("seems bankforks are leaking...{}:", sc);
+        }
     }
 }
 
diff --git a/core/tests/unified_scheduler.rs b/core/tests/unified_scheduler.rs
index fae6f3cccfe698..fa79cbcfb030c3 100644
--- a/core/tests/unified_scheduler.rs
+++ b/core/tests/unified_scheduler.rs
@@ -18,7 +18,8 @@ use {
     solana_program_runtime::timings::ExecuteTimings,
     solana_runtime::{
         accounts_background_service::AbsRequestSender, bank::Bank, bank_forks::BankForks,
-        genesis_utils::GenesisConfigInfo, prioritization_fee_cache::PrioritizationFeeCache,
+        genesis_utils::GenesisConfigInfo, installed_scheduler_pool::DefaultScheduleExecutionArg,
+        prioritization_fee_cache::PrioritizationFeeCache,
     },
     solana_sdk::{
         hash::Hash,
@@ -27,11 +28,14 @@ use {
         transaction::{Result, SanitizedTransaction},
     },
     solana_unified_scheduler_pool::{
-        DefaultTaskHandler, HandlerContext, PooledScheduler, SchedulerPool, TaskHandler,
+        DefaultTaskHandler, HandlerContext, PooledScheduler, SchedulerPool, SpawnableScheduler,
+        TaskHandler,
     },
     std::{
         collections::HashMap,
         sync::{Arc, Mutex},
+        thread::sleep,
+        time::Duration,
     },
 };
 
@@ -41,10 +45,11 @@ fn test_scheduler_waited_by_drop_bank_service() {
 
     static LOCK_TO_STALL: Mutex<()> = Mutex::new(());
 
-    #[derive(Debug)]
+    #[derive(Clone, Debug)]
     struct StallingHandler;
-    impl TaskHandler for StallingHandler {
+    impl TaskHandler<DefaultScheduleExecutionArg> for StallingHandler {
         fn handle(
+            &self,
             result: &mut Result<()>,
             timings: &mut ExecuteTimings,
             bank: &Arc<Bank>,
@@ -55,10 +60,24 @@ fn test_scheduler_waited_by_drop_bank_service() {
             info!("Stalling at StallingHandler::handle()...");
             *LOCK_TO_STALL.lock().unwrap();
             // Wait a bit for the replay stage to prune banks
-            std::thread::sleep(std::time::Duration::from_secs(3));
+            sleep(Duration::from_secs(3));
             info!("Now entering into DefaultTaskHandler::handle()...");
 
-            DefaultTaskHandler::handle(result, timings, bank, transaction, index, handler_context);
+            <DefaultTaskHandler as TaskHandler<DefaultScheduleExecutionArg>>::handle(
+                &DefaultTaskHandler,
+                result,
+                timings,
+                bank,
+                transaction,
+                index,
+                handler_context,
+            );
+        }
+
+        fn create<T: SpawnableScheduler<Self, DefaultScheduleExecutionArg>>(
+            _pool: &SchedulerPool<T, Self, DefaultScheduleExecutionArg>,
+        ) -> Self {
+            Self
         }
     }
 
@@ -72,7 +91,7 @@ fn test_scheduler_waited_by_drop_bank_service() {
     let genesis_bank = Bank::new_for_tests(&genesis_config);
     let bank_forks = BankForks::new_rw_arc(genesis_bank);
     let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
-    let pool_raw = SchedulerPool::<PooledScheduler<StallingHandler>, _>::new(
+    let pool_raw = SchedulerPool::<PooledScheduler<StallingHandler, _>, _, _>::new(
         None,
         None,
         None,
@@ -107,7 +126,9 @@ fn test_scheduler_waited_by_drop_bank_service() {
     // Delay transaction execution to ensure transaction execution happens after termintion has
     // been started
     let lock_to_stall = LOCK_TO_STALL.lock().unwrap();
-    pruned_bank.schedule_transaction_executions([(&tx, &0)].into_iter());
+    pruned_bank
+        .schedule_transaction_executions([(&tx, &0)].into_iter())
+        .unwrap();
     drop(pruned_bank);
     assert_eq!(pool_raw.pooled_scheduler_count(), 0);
     drop(lock_to_stall);
diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs
index 814c30a4f7b384..236c9f9f060d5c 100644
--- a/ledger-tool/src/main.rs
+++ b/ledger-tool/src/main.rs
@@ -864,7 +864,6 @@ fn main() {
                 .takes_value(true)
                 .possible_values(BlockVerificationMethod::cli_names())
                 .global(true)
-                .hidden(hidden_unless_forced())
                 .help(BlockVerificationMethod::cli_message()),
         )
         .arg(
@@ -874,7 +873,6 @@ fn main() {
                 .takes_value(true)
                 .validator(|s| is_within_range(s, 1..))
                 .global(true)
-                .hidden(hidden_unless_forced())
                 .help(DefaultSchedulerPool::cli_message()),
         )
         .arg(
@@ -1823,6 +1821,8 @@ fn main() {
 
                     exit_signal.store(true, Ordering::Relaxed);
                     system_monitor_service.join().unwrap();
+                    bank_forks.write().unwrap().prepare_to_drop();
+                    drop::<BankForks>(Arc::into_inner(bank_forks).unwrap().into_inner().unwrap());
                 }
                 ("graph", Some(arg_matches)) => {
                     let output_file = value_t_or_exit!(arg_matches, "graph_filename", String);
diff --git a/ledger/src/blockstore_processor.rs b/ledger/src/blockstore_processor.rs
index bc2a60efb2de87..bdd7a188b3df12 100644
--- a/ledger/src/blockstore_processor.rs
+++ b/ledger/src/blockstore_processor.rs
@@ -341,8 +341,7 @@ fn process_batches(
         // scheduling always succeeds here without being blocked on actual transaction executions.
         // The transaction execution errors will be collected via the blocking fn called
         // BankWithScheduler::wait_for_completed_scheduler(), if any.
-        schedule_batches_for_execution(bank, batches);
-        Ok(())
+        schedule_batches_for_execution(bank, batches)
     } else {
         debug!(
             "process_batches()/rebatch_and_execute_batches({} batches)",
@@ -364,7 +363,7 @@ fn process_batches(
 fn schedule_batches_for_execution(
     bank: &BankWithScheduler,
     batches: &[TransactionBatchWithIndexes],
-) {
+) -> Result<()> {
     for TransactionBatchWithIndexes {
         batch,
         transaction_indexes,
@@ -375,8 +374,9 @@ fn schedule_batches_for_execution(
                 .sanitized_transactions()
                 .iter()
                 .zip(transaction_indexes.iter()),
-        );
+        )?;
     }
+    Ok(())
 }
 
 fn rebatch_transactions<'a>(
@@ -445,9 +445,7 @@ fn rebatch_and_execute_batches(
     {
         let mut cost_tracker = bank.write_cost_tracker().unwrap();
         for tx_cost in &tx_costs {
-            cost_tracker
-                .try_add(tx_cost)
-                .map_err(TransactionError::from)?;
+            cost_tracker.try_add(tx_cost)?;
         }
     }
 
@@ -2149,6 +2147,7 @@ pub mod tests {
             instruction::{Instruction, InstructionError},
             native_token::LAMPORTS_PER_SOL,
             pubkey::Pubkey,
+            scheduling::SchedulingMode,
             signature::{Keypair, Signer},
             system_instruction::SystemError,
             system_transaction,
@@ -4751,7 +4750,7 @@ pub mod tests {
             ..
         } = create_genesis_config_with_leader(500, &dummy_leader_pubkey, 100);
         let bank = Arc::new(Bank::new_for_tests(&genesis_config));
-        let context = SchedulingContext::new(bank.clone());
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
 
         let txs = create_test_transactions(&mint_keypair, &genesis_config.hash());
 
@@ -4766,7 +4765,7 @@ pub mod tests {
         mocked_scheduler
             .expect_schedule_execution()
             .times(txs.len())
-            .returning(|_| ());
+            .returning(|_| Ok(()));
         mocked_scheduler
             .expect_wait_for_termination()
             .with(mockall::predicate::eq(true))
diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs
index 3ddd2aa19dcaa8..36b2379e77033a 100644
--- a/local-cluster/tests/local_cluster.rs
+++ b/local-cluster/tests/local_cluster.rs
@@ -4,7 +4,7 @@ use {
     crossbeam_channel::{unbounded, Receiver},
     gag::BufferRedirect,
     log::*,
-    rand::seq::IteratorRandom,
+    rand::seq::SliceRandom,
     serial_test::serial,
     solana_accounts_db::{
         hardened_unpack::open_genesis_config, utils::create_accounts_run_and_snapshot_dirs,
@@ -5675,12 +5675,14 @@ fn test_randomly_mixed_block_verification_methods_between_bootstrap_and_not() {
     );
 
     // Randomly switch to use unified scheduler
-    config
-        .validator_configs
-        .iter_mut()
-        .choose(&mut rand::thread_rng())
-        .unwrap()
-        .block_verification_method = BlockVerificationMethod::UnifiedScheduler;
+    let mut methods = [
+        BlockVerificationMethod::UnifiedScheduler,
+        BlockVerificationMethod::BlockstoreProcessor,
+    ];
+    methods.shuffle(&mut rand::thread_rng());
+    for (validator_config, method) in config.validator_configs.iter_mut().zip(methods) {
+        validator_config.block_verification_method = method;
+    }
 
     let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
     cluster_tests::spend_and_verify_all_nodes(
diff --git a/metrics/src/datapoint.rs b/metrics/src/datapoint.rs
index e2740ce3aecc47..8a13a112da0636 100644
--- a/metrics/src/datapoint.rs
+++ b/metrics/src/datapoint.rs
@@ -60,6 +60,15 @@ impl DataPoint {
         }
     }
 
+    pub fn at(timestamp: SystemTime, name: &'static str) -> Self {
+        DataPoint {
+            name,
+            timestamp,
+            tags: vec![],
+            fields: vec![],
+        }
+    }
+
     pub fn add_tag(&mut self, name: &'static str, value: &str) -> &mut Self {
         self.tags.push((name, value.to_string()));
         self
@@ -160,6 +169,56 @@ macro_rules! create_datapoint {
     };
 }
 
+#[macro_export]
+macro_rules! create_datapoint_at {
+    (@field $point:ident $name:expr, $string:expr, String) => {
+        $point.add_field_str($name, &$string);
+    };
+    (@field $point:ident $name:expr, $value:expr, i64) => {
+        $point.add_field_i64($name, $value as i64);
+    };
+    (@field $point:ident $name:expr, $value:expr, f64) => {
+        $point.add_field_f64($name, $value as f64);
+    };
+    (@field $point:ident $name:expr, $value:expr, bool) => {
+        $point.add_field_bool($name, $value as bool);
+    };
+    (@tag $point:ident $tag_name:expr, $tag_value:expr) => {
+        $point.add_tag($tag_name, &$tag_value);
+    };
+
+    (@fields $point:ident) => {};
+
+    // process tags
+    (@fields $point:ident $tag_name:expr => $tag_value:expr, $($rest:tt)*) => {
+        $crate::create_datapoint!(@tag $point $tag_name, $tag_value);
+        $crate::create_datapoint!(@fields $point $($rest)*);
+    };
+    (@fields $point:ident $tag_name:expr => $tag_value:expr) => {
+        $crate::create_datapoint!(@tag $point $tag_name, $tag_value);
+    };
+
+    // process fields
+    (@fields $point:ident ($name:expr, $value:expr, $type:ident) , $($rest:tt)*) => {
+        $crate::create_datapoint!(@field $point $name, $value, $type);
+        $crate::create_datapoint!(@fields $point $($rest)*);
+    };
+    (@fields $point:ident ($name:expr, $value:expr, $type:ident)) => {
+        $crate::create_datapoint!(@field $point $name, $value, $type);
+    };
+
+    (@point $name:expr, $at:expr, $($fields:tt)+) => {
+        {
+            let mut point = $crate::datapoint::DataPoint::at($at, &$name);
+            $crate::create_datapoint!(@fields point $($fields)+);
+            point
+        }
+    };
+    (@point $name:expr, $at:expr) => {
+        $crate::datapoint::DataPoint::at($at, &$name)
+    };
+}
+
 #[macro_export]
 macro_rules! datapoint {
     ($level:expr, $name:expr, $($fields:tt)+) => {
@@ -168,6 +227,21 @@ macro_rules! datapoint {
         }
     };
 }
+
+#[macro_export]
+macro_rules! datapoint_at {
+    ($level:expr, $at:expr, $name:expr) => {
+        if log::log_enabled!($level) {
+            $crate::submit($crate::create_datapoint_at!(@point $name, $at), $level);
+        }
+    };
+    ($level:expr, $at:expr, $name:expr, $($fields:tt)+) => {
+        if log::log_enabled!($level) {
+            $crate::submit($crate::create_datapoint_at!(@point $name, $at, $($fields)+), $level);
+        }
+    };
+}
+
 #[macro_export]
 macro_rules! datapoint_error {
     ($name:expr, $($fields:tt)+) => {
@@ -189,6 +263,16 @@ macro_rules! datapoint_info {
     };
 }
 
+#[macro_export]
+macro_rules! datapoint_info_at {
+    ($at:expr, $name:expr) => {
+        $crate::datapoint_at!(log::Level::Info, $at, $name);
+    };
+    ($at:expr, $name:expr, $($fields:tt)+) => {
+        $crate::datapoint_at!(log::Level::Info, $at, $name, $($fields)+);
+    };
+}
+
 #[macro_export]
 macro_rules! datapoint_debug {
     ($name:expr, $($fields:tt)+) => {
diff --git a/metrics/src/metrics.rs b/metrics/src/metrics.rs
index aae2dabb364077..069aa2531a74ba 100644
--- a/metrics/src/metrics.rs
+++ b/metrics/src/metrics.rs
@@ -181,7 +181,7 @@ impl Default for MetricsAgent {
 
         Self::new(
             Arc::new(InfluxDbMetricsWriter::new()),
-            Duration::from_secs(10),
+            Duration::from_secs(1),
             max_points_per_sec,
         )
     }
diff --git a/program-runtime/src/loaded_programs.rs b/program-runtime/src/loaded_programs.rs
index e5ee034e753a2d..dde03acb6da83f 100644
--- a/program-runtime/src/loaded_programs.rs
+++ b/program-runtime/src/loaded_programs.rs
@@ -791,6 +791,10 @@ impl<FG: ForkGraph> ProgramCache<FG> {
         self.fork_graph = Some(fork_graph);
     }
 
+    pub fn unset_fork_graph(&mut self) {
+        self.fork_graph = None;
+    }
+
     /// Returns the current environments depending on the given epoch
     pub fn get_environments_for_epoch(&self, epoch: Epoch) -> &ProgramRuntimeEnvironments {
         if epoch != self.latest_root_epoch {
diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock
index 2ccdc186b0eb6d..432beb9053dabf 100644
--- a/programs/sbf/Cargo.lock
+++ b/programs/sbf/Cargo.lock
@@ -1113,7 +1113,7 @@ version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c278839b831783b70278b14df4d45e1beb1aad306c07bb796637de9a0e323e8e"
 dependencies = [
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
 ]
 
 [[package]]
@@ -1189,6 +1189,16 @@ dependencies = [
  "winapi 0.2.8",
 ]
 
+[[package]]
+name = "cpu-time"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded"
+dependencies = [
+ "libc",
+ "winapi 0.3.9",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.7"
@@ -1209,11 +1219,10 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-channel"
-version = "0.5.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95"
+version = "0.5.11"
+source = "git+https://github.com/ryoqun/crossbeam?rev=438ec7cdaf6c6a8f593e50344c725fef8a13c7a5#438ec7cdaf6c6a8f593e50344c725fef8a13c7a5"
 dependencies = [
- "crossbeam-utils",
+ "crossbeam-utils 0.8.19",
 ]
 
 [[package]]
@@ -1224,7 +1233,7 @@ checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
 dependencies = [
  "cfg-if 1.0.0",
  "crossbeam-epoch",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
 ]
 
 [[package]]
@@ -1234,7 +1243,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
 dependencies = [
  "cfg-if 1.0.0",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
  "lazy_static",
  "memoffset 0.6.4",
  "scopeguard",
@@ -1249,6 +1258,11 @@ dependencies = [
  "cfg-if 1.0.0",
 ]
 
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.19"
+source = "git+https://github.com/ryoqun/crossbeam?rev=438ec7cdaf6c6a8f593e50344c725fef8a13c7a5#438ec7cdaf6c6a8f593e50344c725fef8a13c7a5"
+
 [[package]]
 name = "crunchy"
 version = "0.2.2"
@@ -1367,6 +1381,15 @@ dependencies = [
  "rusticata-macros",
 ]
 
+[[package]]
+name = "deranged"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8eb30d70a07a3b04884d2677f06bec33509dc67ca60d92949e5535352d3191dc"
+dependencies = [
+ "powerfmt",
+]
+
 [[package]]
 name = "derivation-path"
 version = "0.2.0"
@@ -2116,6 +2139,12 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
 
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
 [[package]]
 name = "histogram"
 version = "0.6.9"
@@ -3180,15 +3209,6 @@ dependencies = [
  "syn 2.0.52",
 ]
 
-[[package]]
-name = "num_threads"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aba1801fb138d8e85e11d0fc70baf4fe1cdfffda7c6cd34a854905df588e5ed0"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "number_prefix"
 version = "0.4.0"
@@ -3550,6 +3570,12 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dc59d1bcc64fc5d021d67521f818db868368028108d37f0e98d74e33f68297b5"
 
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
 [[package]]
 name = "ppv-lite86"
 version = "0.2.8"
@@ -3670,6 +3696,32 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "procfs"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
+dependencies = [
+ "bitflags 2.4.2",
+ "chrono",
+ "flate2",
+ "hex",
+ "lazy_static",
+ "procfs-core",
+ "rustix",
+]
+
+[[package]]
+name = "procfs-core"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
+dependencies = [
+ "bitflags 2.4.2",
+ "chrono",
+ "hex",
+]
+
 [[package]]
 name = "prost"
 version = "0.11.9"
@@ -3907,7 +3959,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
 dependencies = [
  "crossbeam-deque",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.18",
 ]
 
 [[package]]
@@ -4134,9 +4186,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.31"
+version = "0.38.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
+checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89"
 dependencies = [
  "bitflags 2.4.2",
  "errno",
@@ -4360,8 +4412,13 @@ version = "2.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe"
 dependencies = [
+ "base64 0.13.1",
+ "chrono",
+ "hex",
  "serde",
+ "serde_json",
  "serde_with_macros",
+ "time",
 ]
 
 [[package]]
@@ -6538,6 +6595,7 @@ name = "solana-unified-scheduler-logic"
 version = "2.0.0"
 dependencies = [
  "assert_matches",
+ "qualifier_attr",
  "solana-sdk",
  "static_assertions",
 ]
@@ -6547,17 +6605,24 @@ name = "solana-unified-scheduler-pool"
 version = "2.0.0"
 dependencies = [
  "assert_matches",
+ "cpu-time",
  "crossbeam-channel",
  "dashmap",
  "derivative",
  "log",
+ "procfs",
  "qualifier_attr",
+ "rustix",
+ "serde_json",
  "solana-ledger",
+ "solana-measure",
+ "solana-metrics",
  "solana-program-runtime",
  "solana-runtime",
  "solana-sdk",
  "solana-unified-scheduler-logic",
  "solana-vote",
+ "tikv-jemallocator",
 ]
 
 [[package]]
@@ -7210,21 +7275,32 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.9"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd"
+checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e"
 dependencies = [
+ "deranged",
  "itoa",
- "libc",
- "num_threads",
+ "powerfmt",
+ "serde",
+ "time-core",
  "time-macros",
 ]
 
+[[package]]
+name = "time-core"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
+
 [[package]]
 name = "time-macros"
-version = "0.2.4"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
+checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f"
+dependencies = [
+ "time-core",
+]
 
 [[package]]
 name = "tiny-bip39"
diff --git a/programs/sbf/Cargo.toml b/programs/sbf/Cargo.toml
index 830b57d8e5359b..349a0e4867c853 100644
--- a/programs/sbf/Cargo.toml
+++ b/programs/sbf/Cargo.toml
@@ -166,6 +166,8 @@ members = [
 targets = ["x86_64-unknown-linux-gnu"]
 
 [patch.crates-io]
+crossbeam-channel = { git = "https://github.com/ryoqun/crossbeam", rev = "438ec7cdaf6c6a8f593e50344c725fef8a13c7a5" }
+
 # We include the following crates as our dependencies from crates.io:
 #
 #  * spl-associated-token-account
diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs
index 33e4fa63bc0d19..1332e8acf490b6 100644
--- a/runtime/src/bank.rs
+++ b/runtime/src/bank.rs
@@ -823,7 +823,7 @@ pub struct Bank {
 
     epoch_reward_status: EpochRewardStatus,
 
-    transaction_processor: TransactionBatchProcessor<BankForks>,
+    pub transaction_processor: TransactionBatchProcessor<BankForks>,
 
     check_program_modification_slot: bool,
 
diff --git a/runtime/src/bank_forks.rs b/runtime/src/bank_forks.rs
index 46d9516d9e7b2e..6752b3b5e84636 100644
--- a/runtime/src/bank_forks.rs
+++ b/runtime/src/bank_forks.rs
@@ -5,7 +5,8 @@ use {
         accounts_background_service::{AbsRequestSender, SnapshotRequest, SnapshotRequestKind},
         bank::{epoch_accounts_hash_utils, Bank, SquashTiming},
         installed_scheduler_pool::{
-            BankWithScheduler, InstalledSchedulerPoolArc, SchedulingContext,
+            BankWithScheduler, DefaultScheduleExecutionArg, InstalledSchedulerPoolArc,
+            SchedulingContext,
         },
         snapshot_config::SnapshotConfig,
     },
@@ -16,6 +17,7 @@ use {
     solana_sdk::{
         clock::{Epoch, Slot},
         hash::Hash,
+        scheduling::SchedulingMode,
         timing,
     },
     std::{
@@ -81,7 +83,13 @@ pub struct BankForks {
     last_accounts_hash_slot: Slot,
     in_vote_only_mode: Arc<AtomicBool>,
     highest_slot_at_startup: Slot,
-    scheduler_pool: Option<InstalledSchedulerPoolArc>,
+    scheduler_pool: Option<InstalledSchedulerPoolArc<DefaultScheduleExecutionArg>>,
+}
+
+impl Drop for BankForks {
+    fn drop(&mut self) {
+        info!("BankForks::drop(): successfully dropped");
+    }
 }
 
 impl Index<u64> for BankForks {
@@ -215,7 +223,10 @@ impl BankForks {
         self[self.root()].clone()
     }
 
-    pub fn install_scheduler_pool(&mut self, pool: InstalledSchedulerPoolArc) {
+    pub fn install_scheduler_pool(
+        &mut self,
+        pool: InstalledSchedulerPoolArc<DefaultScheduleExecutionArg>,
+    ) {
         info!("Installed new scheduler_pool into bank_forks: {:?}", pool);
         assert!(
             self.scheduler_pool.replace(pool).is_none(),
@@ -223,6 +234,27 @@ impl BankForks {
         );
     }
 
+    pub fn uninstall_scheduler_pool(&mut self) {
+        // hint scheduler pool to cut circular references of Arc<SchedulerPool>
+        if let Some(sp) = self.scheduler_pool.take() {
+            sp.uninstalled_from_bank_forks();
+        }
+    }
+
+    pub fn prepare_to_drop(&mut self) {
+        let root_bank = self.root_bank();
+        // drop all non root BankWithScheduler, which causes all schedulers wind down.
+        self.banks.clear();
+        self.uninstall_scheduler_pool();
+        // this cuts circular references of BankForks...
+        root_bank
+            .transaction_processor
+            .program_cache
+            .write()
+            .unwrap()
+            .unset_fork_graph();
+    }
+
     pub fn insert(&mut self, mut bank: Bank) -> BankWithScheduler {
         if self.root.load(Ordering::Relaxed) < self.highest_slot_at_startup {
             bank.check_program_modification_slot();
@@ -230,7 +262,7 @@ impl BankForks {
 
         let bank = Arc::new(bank);
         let bank = if let Some(scheduler_pool) = &self.scheduler_pool {
-            let context = SchedulingContext::new(bank.clone());
+            let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
             let scheduler = scheduler_pool.take_scheduler(context);
             BankWithScheduler::new(bank, Some(scheduler))
         } else {
diff --git a/runtime/src/installed_scheduler_pool.rs b/runtime/src/installed_scheduler_pool.rs
index aaf3ea98f1b9aa..6dde48f1e424b3 100644
--- a/runtime/src/installed_scheduler_pool.rs
+++ b/runtime/src/installed_scheduler_pool.rs
@@ -25,21 +25,25 @@ use {
     log::*,
     solana_program_runtime::timings::ExecuteTimings,
     solana_sdk::{
+        clock::Slot,
         hash::Hash,
-        slot_history::Slot,
+        scheduling::{SchedulingMode, WithSchedulingMode},
         transaction::{Result, SanitizedTransaction},
     },
     std::{
+        borrow::Borrow,
         fmt::Debug,
         ops::Deref,
         sync::{Arc, RwLock},
+        thread,
     },
 };
 #[cfg(feature = "dev-context-only-utils")]
 use {mockall::automock, qualifier_attr::qualifiers};
 
-pub trait InstalledSchedulerPool: Send + Sync + Debug {
-    fn take_scheduler(&self, context: SchedulingContext) -> InstalledSchedulerBox;
+pub trait InstalledSchedulerPool<SEA: ScheduleExecutionArg>: Send + Sync + Debug {
+    fn take_scheduler(&self, context: SchedulingContext) -> Box<dyn InstalledScheduler<SEA>>;
+    fn uninstalled_from_bank_forks(self: Arc<Self>);
 }
 
 #[cfg_attr(doc, aquamarine::aquamarine)]
@@ -97,15 +101,15 @@ pub trait InstalledSchedulerPool: Send + Sync + Debug {
     feature = "dev-context-only-utils",
     allow(unused_attributes, clippy::needless_lifetimes)
 )]
-pub trait InstalledScheduler: Send + Sync + Debug + 'static {
+pub trait InstalledScheduler<SEA: ScheduleExecutionArg>: Send + Sync + Debug + 'static {
     fn id(&self) -> SchedulerId;
     fn context(&self) -> &SchedulingContext;
 
     // Calling this is illegal as soon as wait_for_termination is called.
     fn schedule_execution<'a>(
         &'a self,
-        transaction_with_index: &'a (&'a SanitizedTransaction, usize),
-    );
+        transaction_with_index: SEA::TransactionWithIndex<'a>,
+    ) -> Result<()>;
 
     /// Wait for a scheduler to terminate after processing.
     ///
@@ -135,13 +139,47 @@ pub trait UninstalledScheduler: Send + Sync + Debug + 'static {
     fn return_to_pool(self: Box<Self>);
 }
 
-pub type InstalledSchedulerBox = Box<dyn InstalledScheduler>;
+pub type InstalledSchedulerBox = Box<dyn InstalledScheduler<DefaultScheduleExecutionArg>>;
 pub type UninstalledSchedulerBox = Box<dyn UninstalledScheduler>;
 
-pub type InstalledSchedulerPoolArc = Arc<dyn InstalledSchedulerPool>;
+pub type InstalledSchedulerPoolArc<SEA> = Arc<dyn InstalledSchedulerPool<SEA>>;
 
 pub type SchedulerId = u64;
 
+pub trait WithTransactionAndIndex: Send + Sync + Debug {
+    fn with_transaction_and_index<R>(
+        &self,
+        callback: impl FnOnce(&SanitizedTransaction, usize) -> R,
+    ) -> R;
+}
+
+impl<
+        T: Send + Sync + Debug + Borrow<SanitizedTransaction>,
+        U: Send + Sync + Debug + Borrow<usize>,
+        Z: Send + Sync + Debug + Deref<Target = (T, U)>,
+    > WithTransactionAndIndex for Z
+{
+    fn with_transaction_and_index<R>(
+        &self,
+        callback: impl FnOnce(&SanitizedTransaction, usize) -> R,
+    ) -> R {
+        callback(self.0.borrow(), *self.1.borrow())
+    }
+}
+
+pub trait ScheduleExecutionArg: Send + Sync + Debug + 'static {
+    // GAT is used to make schedule_execution parametric even supporting references
+    // under the object-safety req. of InstalledScheduler trait...
+    type TransactionWithIndex<'tx>: WithTransactionAndIndex;
+}
+
+#[derive(Debug, Default, Clone)]
+pub struct DefaultScheduleExecutionArg;
+
+impl ScheduleExecutionArg for DefaultScheduleExecutionArg {
+    type TransactionWithIndex<'tx> = &'tx (&'tx SanitizedTransaction, usize);
+}
+
 /// A small context to propagate a bank and its scheduling mode to the scheduler subsystem.
 ///
 /// Note that this isn't called `SchedulerContext` because the contexts aren't associated with
@@ -153,13 +191,19 @@ pub type SchedulerId = u64;
 /// `SchedulingContext`s.
 #[derive(Clone, Debug)]
 pub struct SchedulingContext {
-    // mode: SchedulingMode, // this will be added later.
+    mode: SchedulingMode,
     bank: Arc<Bank>,
 }
 
+impl WithSchedulingMode for SchedulingContext {
+    fn mode(&self) -> SchedulingMode {
+        self.mode
+    }
+}
+
 impl SchedulingContext {
-    pub fn new(bank: Arc<Bank>) -> Self {
-        Self { bank }
+    pub fn new(mode: SchedulingMode, bank: Arc<Bank>) -> Self {
+        Self { mode, bank }
     }
 
     pub fn bank(&self) -> &Arc<Bank> {
@@ -246,9 +290,14 @@ impl BankWithScheduler {
     pub(crate) fn new(bank: Arc<Bank>, scheduler: Option<InstalledSchedulerBox>) -> Self {
         if let Some(bank_in_context) = scheduler
             .as_ref()
-            .map(|scheduler| scheduler.context().bank())
+            .map(|scheduler| scheduler.context().bank().clone())
         {
-            assert!(Arc::ptr_eq(&bank, bank_in_context));
+            assert!(
+                Arc::ptr_eq(&bank, &bank_in_context),
+                "different bank!? {} {}",
+                bank.slot(),
+                bank_in_context.slot()
+            );
         }
 
         Self {
@@ -290,7 +339,7 @@ impl BankWithScheduler {
     pub fn schedule_transaction_executions<'a>(
         &self,
         transactions_with_indexes: impl ExactSizeIterator<Item = (&'a SanitizedTransaction, &'a usize)>,
-    ) {
+    ) -> Result<()> {
         trace!(
             "schedule_transaction_executions(): {} txs",
             transactions_with_indexes.len()
@@ -300,8 +349,10 @@ impl BankWithScheduler {
         let scheduler = scheduler_guard.as_ref().unwrap();
 
         for (sanitized_transaction, &index) in transactions_with_indexes {
-            scheduler.schedule_execution(&(sanitized_transaction, index));
+            scheduler.schedule_execution(&(sanitized_transaction, index))?;
         }
+
+        Ok(())
     }
 
     // take needless &mut only to communicate its semantic mutability to humans...
@@ -356,7 +407,7 @@ impl BankWithSchedulerInner {
             "wait_for_scheduler_termination(slot: {}, reason: {:?}): started at {:?}...",
             bank.slot(),
             reason,
-            std::thread::current(),
+            thread::current(),
         );
 
         let mut scheduler = scheduler.write().unwrap();
@@ -378,14 +429,14 @@ impl BankWithSchedulerInner {
             reason,
             was_noop,
             result_with_timings.as_ref().map(|(result, _)| result),
-            std::thread::current(),
+            thread::current(),
         );
 
         result_with_timings
     }
 
     fn drop_scheduler(&self) {
-        if std::thread::panicking() {
+        if thread::panicking() {
             error!(
                 "BankWithSchedulerInner::drop_scheduler(): slot: {} skipping due to already panicking...",
                 self.bank.slot(),
@@ -438,7 +489,7 @@ mod tests {
     fn setup_mocked_scheduler_with_extra(
         bank: Arc<Bank>,
         is_dropped_flags: impl Iterator<Item = bool>,
-        f: Option<impl Fn(&mut MockInstalledScheduler)>,
+        f: Option<impl Fn(&mut MockInstalledScheduler<DefaultScheduleExecutionArg>)>,
     ) -> InstalledSchedulerBox {
         let mut mock = MockInstalledScheduler::new();
         let seq = Arc::new(Mutex::new(Sequence::new()));
@@ -446,7 +497,10 @@ mod tests {
         mock.expect_context()
             .times(1)
             .in_sequence(&mut seq.lock().unwrap())
-            .return_const(SchedulingContext::new(bank));
+            .return_const(SchedulingContext::new(
+                SchedulingMode::BlockVerification,
+                bank,
+            ));
 
         for wait_reason in is_dropped_flags {
             let seq_cloned = seq.clone();
@@ -482,7 +536,7 @@ mod tests {
         setup_mocked_scheduler_with_extra(
             bank,
             is_dropped_flags,
-            None::<fn(&mut MockInstalledScheduler) -> ()>,
+            None::<fn(&mut MockInstalledScheduler<DefaultScheduleExecutionArg>) -> ()>,
         )
     }
 
@@ -538,12 +592,14 @@ mod tests {
             Some(setup_mocked_scheduler_with_extra(
                 bank,
                 [false].into_iter(),
-                Some(|mocked: &mut MockInstalledScheduler| {
-                    mocked
-                        .expect_pause_for_recent_blockhash()
-                        .times(1)
-                        .returning(|| ());
-                }),
+                Some(
+                    |mocked: &mut MockInstalledScheduler<DefaultScheduleExecutionArg>| {
+                        mocked
+                            .expect_pause_for_recent_blockhash()
+                            .times(1)
+                            .returning(|| ());
+                    },
+                ),
             )),
         );
         goto_end_of_slot_with_scheduler(&bank);
@@ -569,15 +625,18 @@ mod tests {
         let mocked_scheduler = setup_mocked_scheduler_with_extra(
             bank.clone(),
             [true].into_iter(),
-            Some(|mocked: &mut MockInstalledScheduler| {
-                mocked
-                    .expect_schedule_execution()
-                    .times(1)
-                    .returning(|(_, _)| ());
-            }),
+            Some(
+                |mocked: &mut MockInstalledScheduler<DefaultScheduleExecutionArg>| {
+                    mocked
+                        .expect_schedule_execution()
+                        .times(1)
+                        .returning(|(_, _)| Ok(()));
+                },
+            ),
         );
 
         let bank = BankWithScheduler::new(bank, Some(mocked_scheduler));
-        bank.schedule_transaction_executions([(&tx0, &0)].into_iter());
+        bank.schedule_transaction_executions([(&tx0, &0)].into_iter())
+            .unwrap();
     }
 }
diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml
index 034a98623419a8..a81c62115a6935 100644
--- a/sdk/Cargo.toml
+++ b/sdk/Cargo.toml
@@ -69,7 +69,7 @@ serde = { workspace = true }
 serde_bytes = { workspace = true }
 serde_derive = { workspace = true }
 serde_json = { workspace = true, optional = true }
-serde_with = { workspace = true, features = ["macros"] }
+serde_with = { workspace = true, features = ["macros", "alloc"] }
 sha2 = { workspace = true }
 sha3 = { workspace = true, optional = true }
 siphasher = { workspace = true }
diff --git a/sdk/src/lib.rs b/sdk/src/lib.rs
index 12cc8ac7a232bc..64079236c1eb35 100644
--- a/sdk/src/lib.rs
+++ b/sdk/src/lib.rs
@@ -98,6 +98,7 @@ pub mod reserved_account_keys;
 pub mod reward_info;
 pub mod reward_type;
 pub mod rpc_port;
+pub mod scheduling;
 pub mod secp256k1_instruction;
 pub mod shred_version;
 pub mod signature;
diff --git a/sdk/src/scheduling.rs b/sdk/src/scheduling.rs
new file mode 100644
index 00000000000000..aa39f7a8b08e8d
--- /dev/null
+++ b/sdk/src/scheduling.rs
@@ -0,0 +1,11 @@
+//! Primitive types relevant to transaction scheduling
+#![cfg(feature = "full")]
+
+#[derive(Debug, Clone, Copy)]
+pub enum SchedulingMode {
+    BlockVerification,
+}
+
+pub trait WithSchedulingMode {
+    fn mode(&self) -> SchedulingMode;
+}
diff --git a/sdk/src/transaction/sanitized.rs b/sdk/src/transaction/sanitized.rs
index fe951c7ff57147..d01e56ed5be8d7 100644
--- a/sdk/src/transaction/sanitized.rs
+++ b/sdk/src/transaction/sanitized.rs
@@ -38,11 +38,17 @@ pub struct SanitizedTransaction {
 }
 
 /// Set of accounts that must be locked for safe transaction processing
-#[derive(Debug, Clone, Default, Eq, PartialEq)]
+use serde_with::serde_as;
+use serde_with::DisplayFromStr;
+
+#[serde_as]
+#[derive(Debug, Clone, Default, Eq, PartialEq, Serialize)]
 pub struct TransactionAccountLocks<'a> {
     /// List of readonly account key locks
+    #[serde_as(as = "Vec<DisplayFromStr>")]
     pub readonly: Vec<&'a Pubkey>,
     /// List of writable account key locks
+    #[serde_as(as = "Vec<DisplayFromStr>")]
     pub writable: Vec<&'a Pubkey>,
 }
 
diff --git a/unified-scheduler-logic/Cargo.toml b/unified-scheduler-logic/Cargo.toml
index b05cec41a7c862..e1dd176a2bd510 100644
--- a/unified-scheduler-logic/Cargo.toml
+++ b/unified-scheduler-logic/Cargo.toml
@@ -11,5 +11,25 @@ edition = { workspace = true }
 
 [dependencies]
 assert_matches = { workspace = true }
+qualifier_attr = { workspace = true }
 solana-sdk = { workspace = true }
 static_assertions = { workspace = true }
+#[[bench]]
+#name = "bench-with-iai-callgrind"
+#harness = false
+
+[dev-dependencies]
+# See order-crates-for-publishing.py for using this unusual `path = "."`
+solana-unified-scheduler-logic = { path = ".", features = ["dev-context-only-utils"] }
+triomphe = { version = "0.1.11" }
+
+[target."cfg(target_os = \"linux\")".dev-dependencies]
+iai-callgrind = { version = "0.10.2", features = [
+    "client_requests"
+] }
+
+[target."cfg(not(target_os = \"linux\"))".dev-dependencies]
+iai-callgrind = { version = "0.10.2" }
+
+[features]
+dev-context-only-utils = []
diff --git a/unified-scheduler-logic/benches/bench-with-iai-callgrind.rs b/unified-scheduler-logic/benches/bench-with-iai-callgrind.rs
new file mode 100644
index 00000000000000..a267874ef51cbb
--- /dev/null
+++ b/unified-scheduler-logic/benches/bench-with-iai-callgrind.rs
@@ -0,0 +1,668 @@
+#![cfg(feature = "dummy")]
+#![allow(clippy::arithmetic_side_effects)]
+
+#[global_allocator]
+static GLOBAL: B = B;
+
+struct A<T>(T);
+
+unsafe impl<T> std::marker::Sync for A<T> {}
+
+static LOCAL_ALLOCATOR: A<std::cell::UnsafeCell<BL>> = A(std::cell::UnsafeCell::new(BL::new()));
+
+struct BL {
+    cursor: *mut u8,
+    limit: *mut u8,
+    bytes: [u8; Self::BLOCK_SIZE],
+}
+
+impl BL {
+    const BLOCK_SIZE: usize = 100_000_000;
+
+    const fn new() -> Self {
+        Self {
+            cursor: usize::max_value() as _,
+            limit: usize::max_value() as _,
+            bytes: [0; Self::BLOCK_SIZE],
+        }
+    }
+
+    #[inline(always)]
+    pub fn alloc2(&mut self, bytes: usize) -> *mut u8 {
+        loop {
+            self.cursor = unsafe { (((self.cursor.sub(bytes)) as usize) & !15) as _ };
+            if self.cursor >= self.limit {
+                return self.cursor;
+            } else if self.limit == usize::max_value() as _ {
+                self.limit = self.bytes.as_mut_ptr();
+                self.cursor = unsafe { self.limit.add(Self::BLOCK_SIZE) };
+                continue;
+            } else {
+                panic!("out of memory form BL");
+            }
+        }
+    }
+}
+
+use std::{
+    alloc::{GlobalAlloc, Layout},
+    hint::black_box,
+};
+
+struct B;
+
+unsafe impl GlobalAlloc for B {
+    #[inline(always)]
+    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+        (*LOCAL_ALLOCATOR.0.get()).alloc2(layout.size())
+    }
+
+    #[inline(always)]
+    unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {}
+}
+
+use {
+    assert_matches::assert_matches,
+    iai_callgrind::{
+        client_requests::callgrind::toggle_collect, library_benchmark, library_benchmark_group,
+        main,
+    },
+    solana_sdk::{
+        instruction::{AccountMeta, Instruction},
+        message::Message,
+        pubkey::Pubkey,
+        signature::Signer,
+        signer::keypair::Keypair,
+        transaction::{SanitizedTransaction, Transaction},
+    },
+    solana_unified_scheduler_logic::{SchedulingStateMachine, UsageQueue},
+};
+
+#[library_benchmark]
+#[bench::min(0)]
+#[bench::one(1)]
+#[bench::two(2)]
+#[bench::three(3)]
+#[bench::normal(32)]
+#[bench::large(64)]
+#[bench::max(128)]
+fn bench_schedule_task(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for i in 0..account_count {
+        if i % 2 == 0 {
+            accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+        } else {
+            accounts.push(AccountMeta::new_readonly(Keypair::new().pubkey(), true));
+        }
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let task = SchedulingStateMachine::create_task(tx0, 0, &mut |_| UsageQueue::default());
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+    toggle_collect();
+    let task = scheduler.schedule_task(task);
+    toggle_collect();
+    task.unwrap();
+}
+
+#[library_benchmark]
+#[bench::min(0)]
+#[bench::one(1)]
+#[bench::two(2)]
+#[bench::three(3)]
+#[bench::normal(32)]
+#[bench::large(64)]
+#[bench::max(128)]
+fn bench_drop_task(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let task = SchedulingStateMachine::create_task(tx0, 0, &mut |_| UsageQueue::default());
+
+    toggle_collect();
+    drop(task);
+    toggle_collect();
+}
+
+#[library_benchmark]
+#[bench::one(1)]
+fn bench_insert_task(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let task = SchedulingStateMachine::create_task(tx0, 0, &mut |_| UsageQueue::default());
+
+    let mut b = std::collections::BTreeMap::new();
+    toggle_collect();
+    b.insert(task.index, task.clone());
+    b.insert(task.index + 1, task.clone());
+    b.remove(&task.index);
+    b.remove(&(task.index + 1));
+    //b.insert(task.index + 4, task);
+    toggle_collect();
+    drop(b);
+}
+
+#[library_benchmark]
+#[bench::arc_new(1)]
+#[bench::arc_new_and_clone(2)]
+#[bench::rc_new(3)]
+#[bench::rc_new_and_clone(4)]
+fn bench_arc(account_count: usize) {
+    toggle_collect();
+
+    {
+        let b;
+        match account_count {
+            1 => {
+                toggle_collect();
+                b = black_box(std::sync::Arc::new(black_box(3_u32)));
+            }
+            2 => {
+                b = black_box(std::sync::Arc::new(black_box(3_u32)));
+                toggle_collect();
+                std::mem::forget(black_box(b.clone()));
+            }
+            _ => {
+                let b;
+                match account_count {
+                    3 => {
+                        toggle_collect();
+                        b = black_box(std::rc::Rc::new(black_box(3_u32)));
+                    }
+                    4 => {
+                        toggle_collect();
+                        b = black_box(std::rc::Rc::new(black_box(3_u32)));
+                        black_box(b.clone());
+                    }
+                    _ => panic!(),
+                }
+                toggle_collect();
+                drop(b);
+                return;
+            }
+        }
+        toggle_collect();
+        drop(b);
+    }
+}
+
+#[library_benchmark]
+#[bench::arc_new(1)]
+#[bench::arc_new_and_clone(2)]
+#[bench::rc_new(3)]
+#[bench::rc_new_and_clone(4)]
+fn bench_triomphe_arc(account_count: usize) {
+    toggle_collect();
+
+    {
+        let b;
+        match account_count {
+            1 => {
+                toggle_collect();
+                b = black_box(triomphe::Arc::new(black_box(3_u32)));
+            }
+            2 => {
+                b = black_box(triomphe::Arc::new(black_box(3_u32)));
+                toggle_collect();
+                std::mem::forget(black_box(b.clone()));
+            }
+            _ => {
+                let b;
+                match account_count {
+                    3 => {
+                        toggle_collect();
+                        b = black_box(std::rc::Rc::new(black_box(3_u32)));
+                    }
+                    4 => {
+                        toggle_collect();
+                        b = black_box(std::rc::Rc::new(black_box(3_u32)));
+                        black_box(b.clone());
+                    }
+                    _ => panic!(),
+                }
+                toggle_collect();
+                drop(b);
+                return;
+            }
+        }
+        toggle_collect();
+        drop(b);
+    }
+}
+
+#[library_benchmark]
+#[bench::one(1)]
+fn bench_heaviest_task(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let task = SchedulingStateMachine::create_task(tx0, 0, &mut |_| UsageQueue::default());
+
+    let mut b = std::collections::BTreeMap::new();
+    b.insert(task.index, task.clone());
+    b.insert(task.index + 1, task.clone());
+    b.insert(task.index + 2, task.clone());
+    let mut c = std::collections::BTreeMap::new();
+    c.insert(task.index + 3, task.clone());
+    c.insert(task.index + 4, task.clone());
+    c.insert(task.index + 5, task.clone());
+
+    toggle_collect();
+    let d = b.first_key_value();
+    let e = c.first_key_value();
+    let f = std::cmp::min_by(d, e, |x, y| x.map(|x| x.0).cmp(&y.map(|y| y.0))).map(|x| x.1);
+    assert_matches!(f.map(|f| f.task_index()), Some(0));
+    toggle_collect();
+    dbg!(f);
+
+    drop(b);
+}
+
+#[library_benchmark]
+#[bench::min(0)]
+#[bench::one(1)]
+#[bench::two(2)]
+#[bench::three(3)]
+#[bench::normal(32)]
+#[bench::large(64)]
+#[bench::max(128)]
+fn bench_schedule_task_conflicting(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let task = SchedulingStateMachine::create_task(tx0, 0, &mut |_| UsageQueue::default());
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+    let task = scheduler.schedule_task(task).unwrap();
+    let task2 = task.clone();
+    toggle_collect();
+    assert_matches!(scheduler.schedule_task(task2), None);
+    toggle_collect();
+    drop(task);
+}
+
+#[library_benchmark]
+#[bench::min(3, 0)]
+#[bench::one(3, 1)]
+#[bench::two(2, 2)]
+#[bench::three(3, 3)]
+#[bench::normal(3, 32)]
+#[bench::large(3, 64)]
+#[bench::large2(3, 128)]
+#[bench::large3(3, 256)]
+#[bench::large4(3, 1024)]
+#[bench::large5(3, 2048)]
+fn bench_schedule_task_conflicting_hot(account_count: usize, task_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+
+    let mut usage_queues: std::collections::HashMap<solana_sdk::pubkey::Pubkey, UsageQueue> =
+        std::collections::HashMap::new();
+    let task = SchedulingStateMachine::create_task(tx0.clone(), 0, &mut |address| {
+        usage_queues.entry(address).or_default().clone()
+    });
+    scheduler.schedule_task(task).unwrap();
+    for i in 1..=task_count {
+        let task = SchedulingStateMachine::create_task(tx0.clone(), i, &mut |address| {
+            usage_queues.entry(address).or_default().clone()
+        });
+        assert_matches!(scheduler.schedule_task(task), None);
+    }
+
+    let task = SchedulingStateMachine::create_task(tx0.clone(), task_count + 1, &mut |address| {
+        usage_queues.entry(address).or_default().clone()
+    });
+    let task2 = task.clone();
+
+    toggle_collect();
+    assert_matches!(scheduler.schedule_task(task2), None);
+    toggle_collect();
+
+    drop(task);
+}
+
+#[library_benchmark]
+#[bench::min(0)]
+#[bench::one(1)]
+#[bench::two(2)]
+#[bench::three(3)]
+#[bench::normal(32)]
+#[bench::large(64)]
+#[bench::max(128)]
+fn bench_deschedule_task_conflicting(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let task = SchedulingStateMachine::create_task(tx0, 0, &mut |_| UsageQueue::default());
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+    let task = scheduler.schedule_task(task).unwrap();
+    assert_matches!(scheduler.schedule_task(task.clone()), None);
+
+    toggle_collect();
+    scheduler.deschedule_task(&task);
+    toggle_collect();
+
+    drop(task);
+}
+
+#[library_benchmark]
+#[bench::min(0)]
+#[bench::one(1)]
+#[bench::two(2)]
+#[bench::three(3)]
+#[bench::normal(32)]
+#[bench::large(64)]
+#[bench::max(128)]
+fn bench_schedule_unblocked_task(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let mut usage_queues: std::collections::HashMap<solana_sdk::pubkey::Pubkey, UsageQueue> =
+        std::collections::HashMap::new();
+    let task = SchedulingStateMachine::create_task(tx0.clone(), 0, &mut |address| {
+        usage_queues.entry(address).or_default().clone()
+    });
+    let task2 = SchedulingStateMachine::create_task(tx0, 1, &mut |address| {
+        usage_queues.entry(address).or_default().clone()
+    });
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+    let task = scheduler.schedule_task(task).unwrap();
+    assert_matches!(scheduler.schedule_task(task2), None);
+    scheduler.deschedule_task(&task);
+    toggle_collect();
+    let retried_task = scheduler.schedule_next_unblocked_task();
+    toggle_collect();
+    let retried_task = retried_task.unwrap();
+    assert_eq!(task.transaction(), retried_task.transaction());
+    drop(task);
+}
+
+#[library_benchmark]
+#[bench::min(0)]
+#[bench::one(1)]
+#[bench::two(2)]
+#[bench::three(3)]
+#[bench::small(16)]
+#[bench::normal(32)]
+#[bench::large(64)]
+//#[bench::max(128)]
+fn bench_end_to_end_worst(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for _ in 0..account_count {
+        accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let mut usage_queues: std::collections::HashMap<solana_sdk::pubkey::Pubkey, UsageQueue> =
+        std::collections::HashMap::new();
+    let task = SchedulingStateMachine::create_task(tx0.clone(), 0, &mut |address| {
+        usage_queues.entry(address).or_default().clone()
+    });
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+
+    let task = scheduler.schedule_task(task).unwrap();
+    for i in 1..account_count {
+        let mut accounts = vec![memo_ix.accounts[i].clone()];
+        //let mut accounts = vec![AccountMeta::new(Keypair::new().pubkey(), true)];
+        for _ in 0..account_count {
+            accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+        }
+
+        let payer = Keypair::new();
+        let memo_ix = Instruction {
+            program_id: Pubkey::default(),
+            accounts,
+            data: vec![0x00],
+        };
+        let ixs = vec![memo_ix];
+        let msg = Message::new(&ixs, Some(&payer.pubkey()));
+        let txn = Transaction::new_unsigned(msg);
+        //panic!("{:?}", txn);
+        //assert_eq!(wire_txn.len(), 3);
+        let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+        let task2 = SchedulingStateMachine::create_task(tx0, i, &mut |address| {
+            usage_queues.entry(address).or_default().clone()
+        });
+        toggle_collect();
+        let scheduled_task = scheduler.schedule_task(task2.clone());
+        toggle_collect();
+        drop(scheduled_task);
+    }
+
+    toggle_collect();
+    scheduler.deschedule_task(&task);
+    if let Some(_cc) = account_count.checked_sub(1) {
+        //assert_eq!(scheduler.unblocked_task_count(), cc);
+        //let mut c = 0;
+        while let Some(retried_task) = scheduler.schedule_next_unblocked_task() {
+            //c += 1;
+            //scheduler.deschedule_task(&retried_task);
+            toggle_collect();
+            drop::<solana_unified_scheduler_logic::Task>(retried_task);
+            toggle_collect();
+        }
+        //assert_eq!(c, cc);
+    }
+    toggle_collect();
+
+    //assert_eq!(task2.task_index(), retried_task.task_index());
+    drop(task);
+}
+
+#[library_benchmark]
+#[bench::min(0)]
+#[bench::one(1)]
+#[bench::two(2)]
+#[bench::three(3)]
+#[bench::normal(32)]
+#[bench::large(64)]
+#[bench::max(128)]
+fn bench_deschedule_task(account_count: usize) {
+    toggle_collect();
+    let mut accounts = vec![];
+    for i in 0..account_count {
+        if i % 2 == 0 {
+            accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+        } else {
+            accounts.push(AccountMeta::new_readonly(Keypair::new().pubkey(), true));
+        }
+    }
+
+    let payer = Keypair::new();
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //panic!("{:?}", txn);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    let task = SchedulingStateMachine::create_task(tx0, 0, &mut |_| UsageQueue::default());
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+    let task = scheduler.schedule_task(task).unwrap();
+    toggle_collect();
+    scheduler.deschedule_task(&task);
+    toggle_collect();
+    drop(task);
+}
+
+library_benchmark_group!(
+    name = bench_scheduling_state_machine;
+    benchmarks = bench_end_to_end_worst, bench_arc, bench_triomphe_arc, bench_drop_task, bench_insert_task, bench_heaviest_task, bench_schedule_task, bench_schedule_task_conflicting, bench_schedule_task_conflicting_hot, bench_deschedule_task, bench_deschedule_task_conflicting, bench_schedule_unblocked_task
+    //benchmarks = bench_arc, bench_triomphe_arc
+    //benchmarks = bench_end_to_end_worst
+);
+
+main!(library_benchmark_groups = bench_scheduling_state_machine);
diff --git a/unified-scheduler-logic/src/lib.rs b/unified-scheduler-logic/src/lib.rs
index 2bae4f603582f5..2f82baf693acff 100644
--- a/unified-scheduler-logic/src/lib.rs
+++ b/unified-scheduler-logic/src/lib.rs
@@ -95,6 +95,8 @@
 //! susceptible to the buffer bloat problem by itself as explained by the description and validated
 //! by the mentioned benchmark above. Thus, this should be solved elsewhere, specifically at the
 //! scheduler pool.
+#[cfg(feature = "dev-context-only-utils")]
+use qualifier_attr::field_qualifiers;
 use {
     crate::utils::{ShortCounter, Token, TokenCell},
     assert_matches::assert_matches,
@@ -105,6 +107,8 @@ use {
 
 /// Internal utilities. Namely this contains [`ShortCounter`] and [`TokenCell`].
 mod utils {
+    #[cfg(feature = "dev-context-only-utils")]
+    use qualifier_attr::qualifiers;
     use std::{
         any::{self, TypeId},
         cell::{RefCell, UnsafeCell},
@@ -116,6 +120,7 @@ mod utils {
     /// A really tiny counter to hide `.checked_{add,sub}` all over the place.
     ///
     /// It's caller's reponsibility to ensure this (backed by [`u32`]) never overflow.
+    #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))]
     #[derive(Debug, Clone, Copy)]
     pub(super) struct ShortCounter(u32);
 
@@ -249,6 +254,7 @@ mod utils {
     /// existence of mutable access over them by requiring the token itself to be mutably borrowed
     /// to get a mutable reference to the internal value of `TokenCell`.
     // *mut is used to make this type !Send and !Sync
+    #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))]
     pub(super) struct Token<V: 'static>(PhantomData<*mut V>);
 
     impl<V> Token<V> {
@@ -411,6 +417,7 @@ type BlockedUsageCountToken = Token<ShortCounter>;
 const_assert_eq!(mem::size_of::<BlockedUsageCountToken>(), 0);
 
 /// Internal scheduling data about a particular task.
+#[cfg_attr(feature = "dev-context-only-utils", field_qualifiers(index(pub)))]
 #[derive(Debug)]
 pub struct TaskInner {
     transaction: SanitizedTransaction,
@@ -614,6 +621,7 @@ const_assert_eq!(mem::size_of::<UsageQueue>(), 8);
 
 /// A high-level `struct`, managing the overall scheduling of [tasks](Task), to be used by
 /// `solana-unified-scheduler-pool`.
+#[cfg_attr(feature = "dev-context-only-utils", field_qualifiers(count_token(pub)))]
 pub struct SchedulingStateMachine {
     unblocked_task_queue: VecDeque<Task>,
     active_task_count: ShortCounter,
diff --git a/unified-scheduler-pool/Cargo.toml b/unified-scheduler-pool/Cargo.toml
index 1d57a9307f7a47..a0ae095ec8b3d4 100644
--- a/unified-scheduler-pool/Cargo.toml
+++ b/unified-scheduler-pool/Cargo.toml
@@ -11,22 +11,43 @@ edition = { workspace = true }
 
 [dependencies]
 assert_matches = { workspace = true }
+cpu-time = { workspace = true }
 crossbeam-channel = { workspace = true }
 dashmap = { workspace = true }
 derivative = { workspace = true }
 log = { workspace = true }
 qualifier_attr = { workspace = true }
+rustix = { workspace = true }
+serde_json = { workspace = true }
 solana-ledger = { workspace = true }
+solana-measure = { workspace = true }
+solana-metrics = { workspace = true }
 solana-program-runtime = { workspace = true }
 solana-runtime = { workspace = true }
 solana-sdk = { workspace = true }
 solana-unified-scheduler-logic = { workspace = true }
 solana-vote = { workspace = true }
 
+[target."cfg(target_os = \"linux\")".dependencies]
+procfs = { workspace = true }
+
 [dev-dependencies]
-assert_matches = { workspace = true }
+bincode = { workspace = true }
+criterion = "0.5.1"
+log = { workspace = true }
+rand = { workspace = true }
 solana-logger = { workspace = true }
+solana-nohash-hasher = { workspace = true }
 solana-runtime = { workspace = true, features = ["dev-context-only-utils"] }
+# See order-crates-for-publishing.py for using this unusual `path = "."`
+solana-unified-scheduler-pool = { path = ".", features = ["dev-context-only-utils"] }
+
+[target.'cfg(not(target_env = "msvc"))'.dependencies]
+jemallocator = { workspace = true }
+
+[[bench]]
+name = "lib"
+harness = false
 
 [features]
 dev-context-only-utils = []
diff --git a/unified-scheduler-pool/benches/lib.rs b/unified-scheduler-pool/benches/lib.rs
new file mode 100644
index 00000000000000..7284eec8b03065
--- /dev/null
+++ b/unified-scheduler-pool/benches/lib.rs
@@ -0,0 +1,217 @@
+#![allow(unused_imports, dead_code)]
+#![feature(test)]
+
+extern crate test;
+
+#[cfg(not(target_env = "msvc"))]
+use jemallocator::Jemalloc;
+
+#[cfg(not(target_env = "msvc"))]
+#[global_allocator]
+static GLOBAL: Jemalloc = Jemalloc;
+
+use {
+    solana_program_runtime::timings::ExecuteTimings,
+    solana_runtime::{
+        bank::Bank,
+        bank_forks::BankForks,
+        genesis_utils::{create_genesis_config, GenesisConfigInfo},
+        installed_scheduler_pool::{
+            DefaultScheduleExecutionArg, InstalledScheduler, SchedulingContext,
+        },
+        prioritization_fee_cache::PrioritizationFeeCache,
+    },
+    solana_sdk::{
+        scheduling::SchedulingMode,
+        transaction::{Result, SanitizedTransaction},
+    },
+    solana_unified_scheduler_logic::{SchedulingStateMachine, UsageQueue},
+    solana_unified_scheduler_pool::{
+        HandlerContext, PooledScheduler, SchedulerPool, SpawnableScheduler, TaskHandler,
+    },
+    std::sync::Arc,
+};
+
+#[derive(Debug, Clone)]
+struct DummyTaskHandler;
+
+impl TaskHandler<DefaultScheduleExecutionArg> for DummyTaskHandler {
+    fn handle(
+        &self,
+        _result: &mut Result<()>,
+        _timings: &mut ExecuteTimings,
+        _bank: &Arc<Bank>,
+        _transaction: &SanitizedTransaction,
+        _index: usize,
+        _handler_context: &HandlerContext,
+    ) {
+    }
+
+    fn create<T: SpawnableScheduler<Self, DefaultScheduleExecutionArg>>(
+        _pool: &SchedulerPool<T, Self, DefaultScheduleExecutionArg>,
+    ) -> Self {
+        Self
+    }
+}
+
+fn setup_dummy_fork_graph(bank: Bank) -> Arc<Bank> {
+    let slot = bank.slot();
+    let bank_fork = BankForks::new_rw_arc(bank);
+    let bank = bank_fork.read().unwrap().get(slot).unwrap();
+    bank.transaction_processor
+        .program_cache
+        .write()
+        .unwrap()
+        .set_fork_graph(bank_fork);
+    bank
+}
+
+use solana_sdk::{
+    instruction::{AccountMeta, Instruction},
+    message::Message,
+    pubkey::Pubkey,
+    signature::Signer,
+    signer::keypair::Keypair,
+    transaction::Transaction,
+};
+
+fn do_bench_tx_throughput(label: &str, bencher: &mut Criterion) {
+    solana_logger::setup();
+
+    /*
+    let GenesisConfigInfo {
+        genesis_config,
+        ..
+    } = create_genesis_config(10_000);
+    */
+    let payer = Keypair::new();
+
+    let mut accounts = vec![];
+    for i in 0..100 {
+        if i % 2 == 0 {
+            accounts.push(AccountMeta::new(Keypair::new().pubkey(), true));
+        } else {
+            accounts.push(AccountMeta::new_readonly(Keypair::new().pubkey(), true));
+        }
+    }
+
+    let memo_ix = Instruction {
+        program_id: Pubkey::default(),
+        accounts,
+        data: vec![0x00],
+    };
+    let mut ixs = vec![];
+    for _ in 0..1 {
+        ixs.push(memo_ix.clone());
+    }
+    let msg = Message::new(&ixs, Some(&payer.pubkey()));
+    let txn = Transaction::new_unsigned(msg);
+    //assert_eq!(wire_txn.len(), 3);
+    let tx0 = SanitizedTransaction::from_transaction_for_tests(txn);
+    /*
+    let bank = Bank::new_for_tests(&genesis_config);
+    let bank = setup_dummy_fork_graph(bank);
+    let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
+    let pool = SchedulerPool::<PooledScheduler<DummyTaskHandler, DefaultScheduleExecutionArg>, _, _>::new(
+        None,
+        None,
+        None,
+        ignored_prioritization_fee_cache,
+    );
+    let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
+    */
+
+    let (s, r) = crossbeam_channel::bounded(1000);
+
+    use std::sync::atomic::AtomicUsize;
+    let i = Arc::new(AtomicUsize::default());
+    use std::sync::Mutex;
+    let usage_queues: Arc<
+        Mutex<std::collections::HashMap<solana_sdk::pubkey::Pubkey, UsageQueue>>,
+    > = Arc::new(Mutex::new(std::collections::HashMap::new()));
+    /*
+    for _ in 0..5 {
+        std::thread::Builder::new()
+            .name("solScGen".to_owned())
+            .spawn({
+                let usage_queues = usage_queues.clone();
+                let i = i.clone();
+                let tx1 = tx0.clone();
+                let s = s.clone();
+                move || loop {
+                    let tasks = std::iter::repeat_with(|| SchedulingStateMachine::create_task(tx1.clone(), i.fetch_add(1, std::sync::atomic::Ordering::Relaxed), &mut |address| {
+        usage_queues.lock().unwrap().entry(address).or_default().clone()
+    })).take(100).collect::<Vec<_>>();
+                    if s.send(tasks).is_err() {
+                        break;
+                    }
+                }
+            })
+            .unwrap();
+    }
+    std::thread::sleep(std::time::Duration::from_secs(5));
+    */
+
+    //assert_eq!(bank.transaction_count(), 0);
+    //let mut scheduler = pool.do_take_scheduler(context);
+
+    let mut scheduler =
+        unsafe { SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() };
+
+    let tasks = std::iter::repeat_with(|| {
+        SchedulingStateMachine::create_task(
+            tx0.clone(),
+            i.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
+            &mut |address| {
+                usage_queues
+                    .lock()
+                    .unwrap()
+                    .entry(address)
+                    .or_default()
+                    .clone()
+            },
+        )
+    })
+    .take(100)
+    .collect::<Vec<_>>();
+    s.send(tasks).unwrap();
+
+    bencher.bench_function(label, |b| {
+        b.iter(|| {
+            for _ in 0..600 {
+                let mut first_task = None;
+                let tt = r.recv().unwrap();
+                let mut new_tasks = Vec::with_capacity(tt.len());
+                for t in tt {
+                    /*
+                    scheduler.schedule_task(t);
+                    */
+                    if let Some(task) = scheduler.schedule_task(t) {
+                        first_task = Some(task);
+                    }
+                }
+                scheduler.deschedule_task(first_task.as_ref().unwrap());
+                new_tasks.push(first_task.unwrap());
+                while let Some(unblocked_task) = scheduler.schedule_next_unblocked_task() {
+                    scheduler.deschedule_task(&unblocked_task);
+                    new_tasks.push(unblocked_task);
+                }
+                assert!(scheduler.has_no_active_task());
+                s.send(new_tasks).unwrap();
+            }
+            /*
+            scheduler.pause_for_recent_blockhash();
+            scheduler.clear_session_result_with_timings();
+            scheduler.restart_session();
+            */
+        })
+    });
+}
+
+fn bench_entrypoint(bencher: &mut Criterion) {
+    do_bench_tx_throughput("bench_tx_throughput", bencher)
+}
+
+use criterion::{criterion_group, criterion_main, Criterion};
+criterion_group!(benches, bench_entrypoint);
+criterion_main!(benches);
diff --git a/unified-scheduler-pool/benches/scheduler.rs b/unified-scheduler-pool/benches/scheduler.rs
new file mode 100644
index 00000000000000..aab32811352bea
--- /dev/null
+++ b/unified-scheduler-pool/benches/scheduler.rs
@@ -0,0 +1,923 @@
+#![cfg(feature = "dummy")]
+#![feature(test)]
+#![allow(clippy::arithmetic_side_effects)]
+
+#[cfg(not(target_env = "msvc"))]
+#[global_allocator]
+static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
+
+extern crate test;
+
+use {
+    assert_matches::assert_matches,
+    log::*,
+    rand::{thread_rng, Rng},
+    solana_program_runtime::timings::ExecuteTimings,
+    solana_runtime::{
+        bank::Bank,
+        genesis_utils::{create_genesis_config, GenesisConfigInfo},
+        installed_scheduler_pool::{
+            InstalledScheduler, ResultWithTimings, ScheduleExecutionArg, SchedulerId,
+            SchedulingContext, SchedulingMode, WithTransactionAndIndex,
+        },
+        prioritization_fee_cache::PrioritizationFeeCache,
+    },
+    solana_sdk::{
+        scheduling::SchedulingMode,
+        system_transaction,
+        transaction::{Result, SanitizedTransaction},
+    },
+    solana_unified_scheduler_pool::{
+        PooledScheduler, SchedulerPool, SpawnableScheduler, TaskHandler,
+    },
+    std::{
+        fmt::Debug,
+        marker::{PhantomData, Send, Sync},
+        mem,
+        sync::Arc,
+    },
+    test::Bencher,
+};
+
+const TX_COUNT: usize = 10_000;
+
+#[derive(Debug, Default, Clone)]
+struct ScheduleExecutionArgForBench;
+
+// use Arc-ed transaction for very cheap .clone() so that the consumer is never starved for
+// incoming transactions.
+type TransactionWithIndexForBench = Arc<(SanitizedTransaction, usize)>;
+
+impl ScheduleExecutionArg for ScheduleExecutionArgForBench {
+    type TransactionWithIndex<'_tx> = TransactionWithIndexForBench;
+}
+
+#[derive(Debug, Default, Clone)]
+struct BenchFriendlyHandler<SEA: ScheduleExecutionArg + Clone, const MUTATE_ARC: bool>(
+    PhantomData<SEA>,
+);
+
+impl<SEA: ScheduleExecutionArg + Clone, const MUTATE_ARC: bool> TaskHandler<SEA>
+    for BenchFriendlyHandler<SEA, MUTATE_ARC>
+{
+    fn create<T: SpawnableScheduler<Self, SEA>>(_pool: &SchedulerPool<T, Self, SEA>) -> Self {
+        Self(PhantomData)
+    }
+
+    fn handle<T: SpawnableScheduler<Self, SEA>>(
+        &self,
+        _result: &mut Result<()>,
+        _timings: &mut ExecuteTimings,
+        bank: &Arc<Bank>,
+        transaction: &SanitizedTransaction,
+        _index: usize,
+        _pool: &SchedulerPool<T, Self, SEA>,
+    ) {
+        //std::hint::black_box(bank.clone());
+        let mut i = 0;
+        for _ in 0..10 {
+            if MUTATE_ARC {
+                //for _ in 0..2 {
+                std::hint::black_box((Arc::downgrade(bank)).upgrade().unwrap());
+                //}
+            }
+            // call random one of Bank's lightweight-and-very-multi-threaded-friendly methods which take a
+            // transaction inside this artifical tight loop.
+            i += bank.get_fee_for_message_with_lamports_per_signature(transaction.message(), i)
+        }
+        std::hint::black_box(i);
+    }
+}
+
+type BenchFriendlyHandlerWithArcMutation = BenchFriendlyHandler<ScheduleExecutionArgForBench, true>;
+type BenchFriendlyHandlerWithoutArcMutation =
+    BenchFriendlyHandler<ScheduleExecutionArgForBench, false>;
+
+fn run_bench<
+    F: FnOnce(Arc<SchedulerPool<I, TH, ScheduleExecutionArgForBench>>, SchedulingContext) -> I,
+    I: SpawnableScheduler<TH, ScheduleExecutionArgForBench>,
+    TH: TaskHandler<ScheduleExecutionArgForBench>,
+>(
+    bencher: &mut Bencher,
+    create_scheduler: F,
+) {
+    solana_logger::setup();
+
+    let GenesisConfigInfo {
+        genesis_config,
+        mint_keypair,
+        ..
+    } = create_genesis_config(1_000_000_000);
+    let bank = &Arc::new(Bank::new_for_tests(&genesis_config));
+    let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
+    let pool = SchedulerPool::new(None, None, None, ignored_prioritization_fee_cache);
+    let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
+
+    let mut scheduler = create_scheduler(pool, context.clone());
+    let tx0 = &SanitizedTransaction::from_transaction_for_tests(system_transaction::transfer(
+        &mint_keypair,
+        &solana_sdk::pubkey::new_rand(),
+        2,
+        genesis_config.hash(),
+    ));
+    let tx_with_index = TransactionWithIndexForBench::new((tx0.clone(), 0));
+    bencher.iter(|| {
+        for _ in 0..TX_COUNT {
+            scheduler.schedule_execution(tx_with_index.clone());
+        }
+        assert_matches!(scheduler.wait_for_termination(false), Some((Ok(()), _)));
+        scheduler.replace_context(context.clone());
+    });
+}
+
+mod blocking_ref {
+    use {super::*, solana_runtime::installed_scheduler_pool::DefaultScheduleExecutionArg};
+
+    #[bench]
+    fn bench_without_arc_mutation(bencher: &mut Bencher) {
+        solana_logger::setup();
+
+        let GenesisConfigInfo {
+            genesis_config,
+            mint_keypair,
+            ..
+        } = create_genesis_config(1_000_000_000);
+        let bank = &Arc::new(Bank::new_for_tests(&genesis_config));
+        let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
+        let pool = SchedulerPool::new(None, None, None, ignored_prioritization_fee_cache);
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
+
+        let mut scheduler = PooledScheduler::<_, DefaultScheduleExecutionArg>::do_spawn(
+            pool,
+            context.clone(),
+            BenchFriendlyHandler::<_, false>::default(),
+        );
+        let tx0 = &SanitizedTransaction::from_transaction_for_tests(system_transaction::transfer(
+            &mint_keypair,
+            &solana_sdk::pubkey::new_rand(),
+            2,
+            genesis_config.hash(),
+        ));
+        let tx_with_index = &(tx0, 0);
+        bencher.iter(|| {
+            for _ in 0..TX_COUNT {
+                scheduler.schedule_execution(tx_with_index);
+            }
+            assert_matches!(scheduler.wait_for_termination(false), Some((Ok(()), _)));
+            scheduler.replace_context(context.clone());
+        });
+    }
+}
+
+mod blocking {
+    use super::*;
+
+    type BlockingScheduler<H> = PooledScheduler<H, ScheduleExecutionArgForBench>;
+
+    #[bench]
+    fn bench_with_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            BlockingScheduler::do_spawn(
+                pool,
+                context,
+                BenchFriendlyHandlerWithArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_without_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            BlockingScheduler::do_spawn(
+                pool,
+                context,
+                BenchFriendlyHandlerWithoutArcMutation::default(),
+            )
+        });
+    }
+}
+
+mod nonblocking {
+    use super::*;
+
+    #[derive(Debug)]
+    pub(super) struct NonblockingScheduler<H: TaskHandler<ScheduleExecutionArgForBench> + Clone> {
+        id: SchedulerId,
+        pub(crate) pool: Arc<SchedulerPool<Self, H, ScheduleExecutionArgForBench>>,
+        transaction_sender: crossbeam_channel::Sender<ChainedChannel>,
+        result_receiver: crossbeam_channel::Receiver<(Result<()>, ExecuteTimings, usize)>,
+        lane_count: usize,
+        context: SchedulingContext,
+        _phantom: PhantomData<H>,
+    }
+
+    enum ChainedChannel {
+        Payload(TransactionWithIndexForBench),
+        NextContext(SchedulingContext),
+        NextChannel(Box<dyn WithChannelPair + Send + Sync>),
+    }
+
+    type ChannelPair = (
+        crossbeam_channel::Receiver<ChainedChannel>,
+        crossbeam_channel::Sender<(Result<()>, ExecuteTimings, usize)>,
+    );
+
+    trait WithChannelPair {
+        fn unwrap_channel_pair(&mut self) -> ChannelPair;
+    }
+
+    struct ChannelPairOption(Option<ChannelPair>);
+
+    impl WithChannelPair for ChannelPairOption {
+        fn unwrap_channel_pair(&mut self) -> ChannelPair {
+            self.0.take().unwrap()
+        }
+    }
+
+    impl<H: TaskHandler<ScheduleExecutionArgForBench> + Clone>
+        SpawnableScheduler<H, ScheduleExecutionArgForBench> for NonblockingScheduler<H>
+    {
+        fn spawn(
+            _pool: Arc<SchedulerPool<Self, H, ScheduleExecutionArgForBench>>,
+            _initial_context: SchedulingContext,
+            _handler: H,
+        ) -> Self {
+            unimplemented!();
+        }
+
+        fn retire_if_stale(&mut self) -> bool {
+            unimplemented!();
+        }
+    }
+
+    impl<H: TaskHandler<ScheduleExecutionArgForBench> + Clone> NonblockingScheduler<H> {
+        pub(super) fn spawn(
+            pool: Arc<SchedulerPool<Self, H, ScheduleExecutionArgForBench>>,
+            initial_context: SchedulingContext,
+            lane_count: usize,
+            handler: H,
+        ) -> Self {
+            let (transaction_sender, transaction_receiver) =
+                crossbeam_channel::unbounded::<ChainedChannel>();
+            let (result_sender, result_receiver) = crossbeam_channel::unbounded();
+
+            for _ in 0..lane_count {
+                let mut bank = Arc::clone(initial_context.bank());
+                let mut transaction_receiver = transaction_receiver.clone();
+                let mut result_sender = result_sender.clone();
+                std::thread::spawn({
+                    let pool = pool.clone();
+                    let handler = handler.clone();
+                    move || {
+                        let mut result = Ok(());
+                        let mut timings = ExecuteTimings::default();
+                        let mut count = 0;
+                        while let Ok(message) = transaction_receiver.recv() {
+                            match message {
+                                ChainedChannel::Payload(with_transaction_and_index) => {
+                                    count += 1;
+                                    with_transaction_and_index.with_transaction_and_index(
+                                        |transaction, index| {
+                                            H::handle(
+                                                &handler,
+                                                &mut result,
+                                                &mut timings,
+                                                &bank,
+                                                transaction,
+                                                index,
+                                                &pool,
+                                            );
+                                        },
+                                    );
+                                }
+                                ChainedChannel::NextContext(next_context) => {
+                                    bank = next_context.bank().clone();
+                                }
+                                ChainedChannel::NextChannel(mut next_receiver_box) => {
+                                    result_sender
+                                        .send((
+                                            mem::replace(&mut result, Ok(())),
+                                            mem::take(&mut timings),
+                                            mem::take(&mut count),
+                                        ))
+                                        .unwrap();
+                                    (transaction_receiver, result_sender) =
+                                        next_receiver_box.unwrap_channel_pair();
+                                }
+                            }
+                        }
+                    }
+                });
+            }
+
+            Self {
+                id: thread_rng().gen::<SchedulerId>(),
+                pool,
+                transaction_sender,
+                result_receiver,
+                lane_count,
+                context: initial_context,
+                _phantom: PhantomData,
+            }
+        }
+    }
+    impl<H: TaskHandler<ScheduleExecutionArgForBench> + Clone>
+        InstalledScheduler<ScheduleExecutionArgForBench> for NonblockingScheduler<H>
+    {
+        fn id(&self) -> SchedulerId {
+            self.id
+        }
+
+        fn context(&self) -> &SchedulingContext {
+            &self.context
+        }
+
+        fn schedule_execution(&self, transaction_with_index: TransactionWithIndexForBench) {
+            self.transaction_sender
+                .send(ChainedChannel::Payload(transaction_with_index))
+                .unwrap();
+        }
+
+        fn wait_for_termination(&mut self, _is_dropped: bool) -> Option<ResultWithTimings> {
+            let (next_transaction_sender, next_transaction_receiver) =
+                crossbeam_channel::unbounded::<ChainedChannel>();
+            let (next_result_sender, next_result_receiver) = crossbeam_channel::unbounded();
+            for _ in 0..self.lane_count {
+                let (next_transaction_receiver, next_result_sender) = (
+                    next_transaction_receiver.clone(),
+                    next_result_sender.clone(),
+                );
+                self.transaction_sender
+                    .send(ChainedChannel::NextChannel(Box::new(ChannelPairOption(
+                        Some((next_transaction_receiver, next_result_sender)),
+                    ))))
+                    .unwrap();
+            }
+            self.transaction_sender = next_transaction_sender;
+
+            let mut overall_result = Ok(());
+            let mut overall_timings = ExecuteTimings::default();
+
+            while let Ok((result, timings, count)) = self.result_receiver.recv() {
+                match result {
+                    Ok(()) => {}
+                    Err(e) => overall_result = Err(e),
+                }
+                overall_timings.accumulate(&timings);
+                trace!("received: {count:?}");
+            }
+            self.result_receiver = next_result_receiver;
+
+            Some((overall_result, overall_timings))
+        }
+
+        /*
+        fn return_to_pool(self: Box<Self>) {
+            self.pool.clone().return_scheduler(self)
+        }
+        */
+        fn pause_for_recent_blockhash(&mut self) {
+            todo!()
+        }
+    }
+
+    #[bench]
+    fn bench_with_01_thread_with_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                1,
+                BenchFriendlyHandlerWithArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_with_01_thread_without_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                1,
+                BenchFriendlyHandlerWithoutArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_with_04_threads_with_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                4,
+                BenchFriendlyHandlerWithArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_with_04_threads_without_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                4,
+                BenchFriendlyHandlerWithoutArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_with_08_threads_with_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                8,
+                BenchFriendlyHandlerWithArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_with_08_threads_without_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                8,
+                BenchFriendlyHandlerWithoutArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_with_16_threads_with_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                16,
+                BenchFriendlyHandlerWithArcMutation::default(),
+            )
+        });
+    }
+
+    #[bench]
+    fn bench_with_16_threads_without_arc_mutation(bencher: &mut Bencher) {
+        run_bench(bencher, |pool, context| {
+            NonblockingScheduler::spawn(
+                pool,
+                context,
+                16,
+                BenchFriendlyHandlerWithoutArcMutation::default(),
+            )
+        });
+    }
+}
+
+// demonstrate meaningfully differing performance profile regarding multi worker thread utilization
+// with saturated transaction execution for each bench scenarios, with/without the existence of
+// artificial and needless synchronizations.
+// conversely, the whole InstallableScheduler machinery can be justified as it can eliminate these
+// synchronizations altogether to bare minimum (i.e. bank freeze).
+#[cfg(feature = "dummy")]
+mod thread_utilization {
+    use {
+        super::*,
+        crate::nonblocking::NonblockingScheduler,
+        solana_nohash_hasher::IntSet,
+        solana_sdk::{
+            signature::Signature, signer::keypair::Keypair,
+            system_instruction::SystemInstruction::Transfer, transaction::TransactionAccountLocks,
+        },
+        std::{collections::HashMap, sync::Mutex, thread::sleep, time::Duration},
+    };
+
+    #[derive(Debug, Clone)]
+    struct SleepyHandler;
+
+    impl<SEA: ScheduleExecutionArg> TaskHandler<SEA> for SleepyHandler {
+        fn create<T: SpawnableScheduler<Self, SEA>>(_pool: &SchedulerPool<T, Self, SEA>) -> Self {
+            Self
+        }
+
+        fn handle<T: SpawnableScheduler<Self, SEA>>(
+            &self,
+            _result: &mut Result<()>,
+            _timings: &mut ExecuteTimings,
+            _bank: &Arc<Bank>,
+            transaction: &SanitizedTransaction,
+            _index: usize,
+            _pool: &SchedulerPool<T, Self, SEA>,
+        ) {
+            let Ok(Transfer { lamports: sleep_ms }) =
+                bincode::deserialize(&transaction.message().instructions()[0].data)
+            else {
+                panic!()
+            };
+
+            sleep(Duration::from_millis(sleep_ms));
+        }
+    }
+
+    enum Step {
+        Batch(Vec<TransactionWithIndexForBench>),
+        // mimic periodic or contention-induced synchronization with this artificial blocking
+        MaySynchronize,
+    }
+
+    const WORKER_THREAD_COUNT: usize = 10;
+
+    fn simulate_synchronization_point<T: InstallableScheduler<ScheduleExecutionArgForBench>>(
+        scheduler: &mut T,
+        context: SchedulingContext,
+    ) {
+        assert_matches!(scheduler.wait_for_termination(false), Some((Ok(()), _)));
+        scheduler.replace_context(context);
+    }
+
+    fn run_scenario_and_finalize<T: InstallableScheduler<ScheduleExecutionArgForBench>>(
+        bencher: &mut Bencher,
+        really_synchronize: bool,
+        scheduler: &mut T,
+        context: SchedulingContext,
+        create_scenario: impl Fn() -> Vec<Step>,
+    ) {
+        let scenario = &create_scenario();
+        bencher.iter(|| {
+            for step in scenario {
+                match step {
+                    Step::Batch(txes) => {
+                        for tx in txes {
+                            scheduler.schedule_execution(tx.clone());
+                        }
+                    }
+                    Step::MaySynchronize => {
+                        if really_synchronize {
+                            simulate_synchronization_point(scheduler, context.clone());
+                        }
+                    }
+                }
+            }
+            simulate_synchronization_point(scheduler, context.clone());
+        })
+    }
+
+    // frequent synchronization creates non-zero idling time among some of worker threads, given
+    // batches with mixed transactions. then, it adds up as these kinds synchronizations occurs over
+    // processing
+    fn bench_random_execution_durations(bencher: &mut Bencher, really_synchronize: bool) {
+        let GenesisConfigInfo {
+            genesis_config,
+            mint_keypair,
+            ..
+        } = create_genesis_config(1_000_000_000);
+        let bank = &Arc::new(Bank::new_for_tests(&genesis_config));
+
+        let create_tx_with_index = |index| {
+            let tx0 =
+                SanitizedTransaction::from_transaction_for_tests(system_transaction::transfer(
+                    &mint_keypair,
+                    &solana_sdk::pubkey::new_rand(),
+                    // simulate somewhat realistic work load; txes finish at different timings
+                    thread_rng().gen_range(1..10),
+                    genesis_config.hash(),
+                ));
+            TransactionWithIndexForBench::new((tx0, index))
+        };
+
+        let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
+        let pool = SchedulerPool::new(None, None, None, ignored_prioritization_fee_cache);
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
+        let mut scheduler =
+            NonblockingScheduler::spawn(pool, context.clone(), WORKER_THREAD_COUNT, SleepyHandler);
+
+        run_scenario_and_finalize(bencher, really_synchronize, &mut scheduler, context, || {
+            const TX_PER_BATCH: usize = 20;
+            const SYNCHRONIZATION_PER_BENCH_ITER: usize = 10;
+
+            (0..SYNCHRONIZATION_PER_BENCH_ITER)
+                .flat_map(|_| {
+                    [
+                        Step::Batch((0..TX_PER_BATCH).map(create_tx_with_index).collect()),
+                        Step::MaySynchronize,
+                    ]
+                })
+                .collect()
+        });
+    }
+
+    #[bench]
+    fn bench_random_execution_durations_with_interleaved_synchronization(bencher: &mut Bencher) {
+        bench_random_execution_durations(bencher, true);
+    }
+
+    #[bench]
+    fn bench_random_execution_durations_without_interleaved_synchronization(bencher: &mut Bencher) {
+        bench_random_execution_durations(bencher, false);
+    }
+
+    #[derive(Debug, Clone)]
+    struct SleepyHandlerWithCompletionSignal(crossbeam_channel::Sender<Signature>);
+
+    impl<SEA: ScheduleExecutionArg> TaskHandler<SEA> for SleepyHandlerWithCompletionSignal {
+        fn create<T: SpawnableScheduler<Self, SEA>>(_pool: &SchedulerPool<T, Self, SEA>) -> Self {
+            // not needed for bench...
+            unimplemented!();
+        }
+
+        fn handle<T: SpawnableScheduler<Self, SEA>>(
+            &self,
+            _result: &mut Result<()>,
+            _timings: &mut ExecuteTimings,
+            _bank: &Arc<Bank>,
+            transaction: &SanitizedTransaction,
+            _index: usize,
+            _pool: &SchedulerPool<T, Self, SEA>,
+        ) {
+            let Ok(Transfer { lamports: sleep_ms }) =
+                bincode::deserialize(&transaction.message().instructions()[0].data)
+            else {
+                panic!()
+            };
+
+            sleep(Duration::from_millis(sleep_ms));
+
+            self.0.send(*transaction.signature()).unwrap();
+        }
+    }
+
+    // a wrapper InstallableScheduler to integrate with dep graph scheduling logic
+    #[derive(Debug)]
+    struct NonblockingSchedulerWithDepGraph {
+        inner_scheduler: NonblockingScheduler<SleepyHandlerWithCompletionSignal>,
+        pending_transactions: Mutex<Vec<SanitizedTransaction>>,
+        completion_receiver: crossbeam_channel::Receiver<Signature>,
+    }
+
+    impl InstalledScheduler<ScheduleExecutionArgForBench> for NonblockingSchedulerWithDepGraph {
+        fn id(&self) -> SchedulerId {
+            self.inner_scheduler.id()
+        }
+
+        fn context(&self) -> &SchedulingContext {
+            self.inner_scheduler.context()
+        }
+
+        fn schedule_execution(&self, transaction_with_index: TransactionWithIndexForBench) {
+            // just buffer all the txes to work with the dep graph outer loop nicely, which needs
+            // some buffering to schedule efficiently
+            // note taht the prompt execution as soon as entering into schedule_execution() isn't
+            // needed for these particular bench purposes. so, buffering is okay in that regard.
+            self.pending_transactions
+                .lock()
+                .unwrap()
+                .push(transaction_with_index.0.clone());
+        }
+
+        fn wait_for_termination(&mut self, is_dropped: bool) -> Option<ResultWithTimings> {
+            // execute all the pending transactions now!
+            self.execute_batches(
+                self.context().bank(),
+                &std::mem::take(&mut *self.pending_transactions.lock().unwrap()),
+                &self.completion_receiver,
+            )
+            .unwrap();
+
+            self.inner_scheduler.wait_for_termination(is_dropped)
+        }
+
+        /*
+        fn return_to_pool(self: Box<Self>) {
+            Box::new(self.inner_scheduler).return_to_pool()
+        }
+        */
+    }
+
+    /*
+    impl InstallableScheduler<ScheduleExecutionArgForBench> for NonblockingSchedulerWithDepGraph {
+        fn replace_context(&mut self, context: SchedulingContext) {
+            self.inner_scheduler.replace_context(context)
+        }
+    }
+    */
+
+    // adapted from https://github.com/jito-foundation/jito-solana/pull/294; retained to be as-is
+    // as much as possible by the use of some wrapper type hackery.
+    impl NonblockingSchedulerWithDepGraph {
+        // for each index, builds a transaction dependency graph of indices that need to execute before
+        // the current one.
+        // The returned Vec<HashSet<usize>> is a 1:1 mapping for the indices that need to be executed
+        // before that index can be executed
+        fn build_dependency_graph(
+            tx_account_locks: &[TransactionAccountLocks],
+        ) -> Vec<IntSet<usize>> {
+            // build a map whose key is a pubkey + value is a sorted vector of all indices that
+            // lock that account
+            let mut indices_read_locking_account = HashMap::new();
+            let mut indicies_write_locking_account = HashMap::new();
+            tx_account_locks
+                .iter()
+                .enumerate()
+                .for_each(|(idx, tx_account_locks)| {
+                    for account in &tx_account_locks.readonly {
+                        indices_read_locking_account
+                            .entry(**account)
+                            .and_modify(|indices: &mut Vec<usize>| indices.push(idx))
+                            .or_insert_with(|| vec![idx]);
+                    }
+                    for account in &tx_account_locks.writable {
+                        indicies_write_locking_account
+                            .entry(**account)
+                            .and_modify(|indices: &mut Vec<usize>| indices.push(idx))
+                            .or_insert_with(|| vec![idx]);
+                    }
+                });
+
+            tx_account_locks
+                .iter()
+                .enumerate()
+                .map(|(idx, account_locks)| {
+                    let mut dep_graph: IntSet<usize> = IntSet::default();
+
+                    let readlock_conflict_accs = account_locks.writable.iter();
+                    let writelock_conflict_accs = account_locks
+                        .readonly
+                        .iter()
+                        .chain(account_locks.writable.iter());
+
+                    for acc in readlock_conflict_accs {
+                        if let Some(indices) = indices_read_locking_account.get(acc) {
+                            dep_graph.extend(indices.iter().take_while(|l_idx| **l_idx < idx));
+                        }
+                    }
+
+                    for acc in writelock_conflict_accs {
+                        if let Some(indices) = indicies_write_locking_account.get(acc) {
+                            dep_graph.extend(indices.iter().take_while(|l_idx| **l_idx < idx));
+                        }
+                    }
+                    dep_graph
+                })
+                .collect()
+        }
+
+        fn execute_batches(
+            &self,
+            bank: &Arc<Bank>,
+            pending_transactions: &[SanitizedTransaction],
+            receiver: &crossbeam_channel::Receiver<Signature>,
+        ) -> Result<()> {
+            if pending_transactions.is_empty() {
+                return Ok(());
+            }
+
+            let mut tx_account_locks: Vec<_> = Vec::with_capacity(pending_transactions.len());
+            for tx in pending_transactions {
+                tx_account_locks
+                    .push(tx.get_account_locks(bank.get_transaction_account_lock_limit())?);
+            }
+
+            // the dependency graph contains the indices that must be executed (marked with
+            // State::Done) before they can be executed
+            let dependency_graph = Self::build_dependency_graph(&tx_account_locks);
+
+            #[derive(Clone)]
+            enum State {
+                Blocked,
+                Processing,
+                Done,
+            }
+
+            let mut processing_states: Vec<State> = vec![State::Blocked; dependency_graph.len()];
+            let mut signature_indices: HashMap<&Signature, usize> =
+                HashMap::with_capacity(dependency_graph.len());
+            signature_indices.extend(
+                pending_transactions
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, tx)| (tx.signature(), idx)),
+            );
+
+            loop {
+                let mut is_done = true;
+                for idx in 0..processing_states.len() {
+                    match processing_states[idx] {
+                        State::Blocked => {
+                            is_done = false;
+
+                            // if all the dependent txs are executed, this transaction can be
+                            // scheduled for execution.
+                            if dependency_graph[idx]
+                                .iter()
+                                .all(|idx| matches!(processing_states[*idx], State::Done))
+                            {
+                                self.inner_scheduler.schedule_execution(Arc::new((
+                                    pending_transactions[idx].clone(),
+                                    idx,
+                                )));
+                                // this idx can be scheduled and moved to processing
+                                processing_states[idx] = State::Processing;
+                            }
+                        }
+                        State::Processing => {
+                            is_done = false;
+                        }
+                        State::Done => {}
+                    }
+                }
+
+                if is_done {
+                    break;
+                }
+
+                let mut executor_responses: Vec<_> = vec![receiver.recv().unwrap()];
+                executor_responses.extend(receiver.try_iter());
+                for r in &executor_responses {
+                    processing_states[*signature_indices.get(r).unwrap()] = State::Done;
+                }
+            }
+            Ok(())
+        }
+    }
+
+    // frequent synchronizations hampers efficient (= parallelizable) scheduling of several chunks
+    // of txes which are tied together for each common account locks. Ideally those independent chunks can be
+    // executed in parallel, which each is consuming one worker thread as a form of serialized runs
+    // of processing. However, should synchronizations occurs between boundaries of those chunks
+    // arrival, it cannot schedule the later-coming one because it firstly flush out the the first
+    // one
+    // in other words, this is just a re-manifestation of perf. issue coming from write barriers in
+    // general.
+    fn bench_long_serialized_runs(bencher: &mut Bencher, really_synchronize: bool) {
+        let GenesisConfigInfo { genesis_config, .. } = create_genesis_config(1_000_000_000);
+        let bank = &Arc::new(Bank::new_for_tests(&genesis_config));
+        let (kp1, kp2) = (Keypair::new(), Keypair::new());
+
+        let create_tx_of_serialized_run1 = || {
+            let tx0 =
+                SanitizedTransaction::from_transaction_for_tests(system_transaction::transfer(
+                    &kp1,
+                    &solana_sdk::pubkey::new_rand(),
+                    10,
+                    genesis_config.hash(),
+                ));
+            TransactionWithIndexForBench::new((tx0, 0))
+        };
+        let create_tx_of_serialized_run2 = || {
+            let tx0 =
+                SanitizedTransaction::from_transaction_for_tests(system_transaction::transfer(
+                    &kp2,
+                    &solana_sdk::pubkey::new_rand(),
+                    10,
+                    genesis_config.hash(),
+                ));
+            TransactionWithIndexForBench::new((tx0, 0))
+        };
+
+        let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
+        let pool = SchedulerPool::new(None, None, None, ignored_prioritization_fee_cache);
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
+        let (completion_sender, completion_receiver) = crossbeam_channel::unbounded();
+        let handler = SleepyHandlerWithCompletionSignal(completion_sender);
+        let tx_lock_ignoring_scheduler =
+            NonblockingScheduler::spawn(pool, context.clone(), WORKER_THREAD_COUNT, handler);
+        let tx_lock_adhering_scheduler = NonblockingSchedulerWithDepGraph {
+            inner_scheduler: tx_lock_ignoring_scheduler,
+            pending_transactions: Mutex::new(Vec::default()),
+            completion_receiver,
+        };
+        let mut scheduler = tx_lock_adhering_scheduler;
+        run_scenario_and_finalize(bencher, really_synchronize, &mut scheduler, context, || {
+            (0..1)
+                .flat_map(|_| {
+                    [
+                        Step::Batch(vec![create_tx_of_serialized_run1()]),
+                        Step::Batch(vec![create_tx_of_serialized_run1()]),
+                        Step::Batch(vec![create_tx_of_serialized_run1()]),
+                        Step::Batch(vec![create_tx_of_serialized_run1()]),
+                        Step::MaySynchronize,
+                        Step::Batch(vec![create_tx_of_serialized_run2()]),
+                        Step::Batch(vec![create_tx_of_serialized_run2()]),
+                        Step::Batch(vec![create_tx_of_serialized_run2()]),
+                        Step::Batch(vec![create_tx_of_serialized_run2()]),
+                        Step::MaySynchronize,
+                    ]
+                })
+                .collect()
+        });
+    }
+
+    #[bench]
+    fn bench_long_serialized_runs_with_interleaved_synchronization(bencher: &mut Bencher) {
+        bench_long_serialized_runs(bencher, true);
+    }
+
+    #[bench]
+    fn bench_long_serialized_runs_without_interleaved_synchronization(bencher: &mut Bencher) {
+        bench_long_serialized_runs(bencher, false);
+    }
+}
diff --git a/unified-scheduler-pool/src/lib.rs b/unified-scheduler-pool/src/lib.rs
index 0b7c5495b0accc..1ef0c5b053c1c4 100644
--- a/unified-scheduler-pool/src/lib.rs
+++ b/unified-scheduler-pool/src/lib.rs
@@ -1,8 +1,3 @@
-//! NOTE: While the unified scheduler is fully functional and moderately performant even with
-//! mainnet-beta, it has known resource-exhaustion related security issues for replaying
-//! specially-crafted blocks produced by malicious leaders. Thus, this experimental and
-//! nondefault functionality is exempt from the bug bounty program for now.
-//!
 //! Transaction scheduling code.
 //!
 //! This crate implements 3 solana-runtime traits (`InstalledScheduler`, `UninstalledScheduler` and
@@ -17,37 +12,47 @@
 use qualifier_attr::qualifiers;
 use {
     assert_matches::assert_matches,
-    crossbeam_channel::{self, never, select, Receiver, RecvError, SendError, Sender},
+    cpu_time::ThreadTime,
+    crossbeam_channel::{
+        self, disconnected, never, select_biased, Receiver, RecvError, RecvTimeoutError, SendError,
+        Sender, TryRecvError,
+    },
     dashmap::DashMap,
     derivative::Derivative,
     log::*,
     solana_ledger::blockstore_processor::{
         execute_batch, TransactionBatchWithIndexes, TransactionStatusSender,
     },
+    solana_measure::measure::Measure,
+    solana_metrics::datapoint_info_at,
     solana_program_runtime::timings::ExecuteTimings,
     solana_runtime::{
         bank::Bank,
+        compute_budget_details::GetComputeBudgetDetails,
         installed_scheduler_pool::{
-            InstalledScheduler, InstalledSchedulerBox, InstalledSchedulerPool,
-            InstalledSchedulerPoolArc, ResultWithTimings, SchedulerId, SchedulingContext,
-            UninstalledScheduler, UninstalledSchedulerBox,
+            DefaultScheduleExecutionArg, InstalledScheduler, InstalledSchedulerPool,
+            InstalledSchedulerPoolArc, ResultWithTimings, ScheduleExecutionArg, SchedulerId,
+            SchedulingContext, UninstalledScheduler, UninstalledSchedulerBox,
+            WithTransactionAndIndex,
         },
         prioritization_fee_cache::PrioritizationFeeCache,
     },
     solana_sdk::{
+        clock::Slot,
         pubkey::Pubkey,
-        transaction::{Result, SanitizedTransaction},
+        transaction::{Result, SanitizedTransaction, TransactionError},
     },
     solana_unified_scheduler_logic::{SchedulingStateMachine, Task, UsageQueue},
     solana_vote::vote_sender_types::ReplayVoteSender,
     std::{
+        env,
         fmt::Debug,
-        marker::PhantomData,
         sync::{
             atomic::{AtomicU64, Ordering::Relaxed},
-            Arc, Mutex, OnceLock, Weak,
+            Arc, Mutex, OnceLock, RwLock, RwLockReadGuard, Weak,
         },
         thread::{self, JoinHandle},
+        time::{Duration, Instant, SystemTime},
     },
 };
 
@@ -57,7 +62,12 @@ type AtomicSchedulerId = AtomicU64;
 // contains some internal fields, whose types aren't available in solana-runtime (currently
 // TransactionStatusSender; also, PohRecorder in the future)...
 #[derive(Debug)]
-pub struct SchedulerPool<S: SpawnableScheduler<TH>, TH: TaskHandler> {
+pub struct SchedulerPool<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
     scheduler_inners: Mutex<Vec<S::Inner>>,
     handler_count: usize,
     handler_context: HandlerContext,
@@ -73,7 +83,11 @@ pub struct SchedulerPool<S: SpawnableScheduler<TH>, TH: TaskHandler> {
     // memory increase.
     weak_self: Weak<Self>,
     next_scheduler_id: AtomicSchedulerId,
-    _phantom: PhantomData<TH>,
+    // prune schedulers, stop idling scheduler's threads, sanity check on the
+    // usage queue loader after scheduler is returned.
+    cleaner_sender: Sender<Weak<RwLock<ThreadManager<S, TH, SEA>>>>,
+    cleaner_exit_signal_sender: Sender<()>,
+    cleaner_thread: Mutex<Option<JoinHandle<()>>>,
 }
 
 #[derive(Debug)]
@@ -84,13 +98,117 @@ pub struct HandlerContext {
     prioritization_fee_cache: Arc<PrioritizationFeeCache>,
 }
 
-pub type DefaultSchedulerPool =
-    SchedulerPool<PooledScheduler<DefaultTaskHandler>, DefaultTaskHandler>;
+pub type DefaultSchedulerPool = SchedulerPool<
+    PooledScheduler<DefaultTaskHandler, DefaultScheduleExecutionArg>,
+    DefaultTaskHandler,
+    DefaultScheduleExecutionArg,
+>;
+
+struct WatchedThreadManager<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    thread_manager: Weak<RwLock<ThreadManager<S, TH, SEA>>>,
+    #[cfg(target_os = "linux")]
+    tick: u64,
+    #[cfg(target_os = "linux")]
+    updated_at: Instant,
+}
 
-impl<S, TH> SchedulerPool<S, TH>
+impl<S, TH, SEA> WatchedThreadManager<S, TH, SEA>
 where
-    S: SpawnableScheduler<TH>,
-    TH: TaskHandler,
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    fn new(thread_manager: Weak<RwLock<ThreadManager<S, TH, SEA>>>) -> Self {
+        Self {
+            thread_manager,
+            #[cfg(target_os = "linux")]
+            tick: 0,
+            #[cfg(target_os = "linux")]
+            updated_at: Instant::now(),
+        }
+    }
+
+    fn retire_if_stale(&mut self) -> bool {
+        #[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
+        let Some(thread_manager) = self.thread_manager.upgrade() else {
+            return false;
+        };
+
+        // The following linux-only code implements an eager native thread reclaiming, which is
+        // only useful if the solana-validator sees many unrooted forks. Such hostile situations
+        // should NEVER happen on remotely-uncontrollable ledgers created by solana-test-validator.
+        // And it's generally not expected mainnet-beta validators (or any live clusters for that
+        // matter) to be run on non-linux OSes at all.
+        //
+        // Thus, this OS-specific implementation can be justified because this enables the hot-path
+        // (the scheduler main thread) to omit VDSO calls and timed-out futex syscalls by relying on
+        // this out-of-bound cleaner for a defensive thread reclaiming.
+        #[cfg(target_os = "linux")]
+        {
+            let Some(tid) = thread_manager.read().unwrap().active_tid_if_not_primary() else {
+                self.tick = 0;
+                self.updated_at = Instant::now();
+                return true;
+            };
+
+            let pid = std::process::id();
+            let task = procfs::process::Process::new(pid.try_into().unwrap())
+                .unwrap()
+                .task_from_tid(tid)
+                .unwrap();
+            let stat = task.stat().unwrap();
+            let current_tick = stat.utime.checked_add(stat.stime).unwrap();
+            if current_tick > self.tick {
+                self.tick = current_tick;
+                self.updated_at = Instant::now();
+            } else {
+                // 5x of 400ms block time
+                const IDLE_DURATION_FOR_EAGER_THREAD_RECLAIM: Duration = Duration::from_secs(2);
+
+                let elapsed = self.updated_at.elapsed();
+                if elapsed > IDLE_DURATION_FOR_EAGER_THREAD_RECLAIM {
+                    const BITS_PER_HEX_DIGIT: usize = 4;
+                    let thread_manager = &mut thread_manager.write().unwrap();
+                    info!(
+                        "[sch_{:0width$x}]: cleaner: retire_if_stale(): stopping thread manager ({tid}/{} <= {}/{:?})...",
+                        thread_manager.scheduler_id,
+                        current_tick,
+                        self.tick,
+                        elapsed,
+                        width = SchedulerId::BITS as usize / BITS_PER_HEX_DIGIT,
+                    );
+                    thread_manager.suspend();
+                    self.tick = 0;
+                    self.updated_at = Instant::now();
+                }
+            }
+        }
+
+        true
+    }
+}
+
+impl<S, TH, SEA> Drop for SchedulerPool<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    fn drop(&mut self) {
+        info!("SchedulerPool::drop() is successfully called");
+    }
+}
+
+impl<S, TH, SEA> SchedulerPool<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
 {
     // Some internal impl and test code want an actual concrete type, NOT the
     // `dyn InstalledSchedulerPool`. So don't merge this into `Self::new_dyn()`.
@@ -105,7 +223,64 @@ where
         let handler_count = handler_count.unwrap_or(Self::default_handler_count());
         assert!(handler_count >= 1);
 
-        Arc::new_cyclic(|weak_self| Self {
+        let (scheduler_pool_sender, scheduler_pool_receiver) = crossbeam_channel::bounded(1);
+        let (cleaner_sender, cleaner_receiver) = crossbeam_channel::unbounded();
+        let (cleaner_exit_signal_sender, cleaner_exit_signal_receiver) =
+            crossbeam_channel::unbounded();
+
+        let cleaner_main_loop = || {
+            move || {
+                let scheduler_pool: Arc<Self> = scheduler_pool_receiver.recv().unwrap();
+                drop(scheduler_pool_receiver);
+
+                let mut thread_managers: Vec<WatchedThreadManager<S, TH, SEA>> = vec![];
+
+                'outer: loop {
+                    let mut schedulers = scheduler_pool.scheduler_inners.lock().unwrap();
+                    let schedulers_len_pre_retain = schedulers.len();
+                    schedulers.retain_mut(|scheduler| scheduler.retire_if_stale());
+                    let schedulers_len_post_retain = schedulers.len();
+                    drop(schedulers);
+
+                    let thread_manager_len_pre_retain = thread_managers.len();
+                    thread_managers.retain_mut(|thread_manager| thread_manager.retire_if_stale());
+
+                    let thread_manager_len_pre_push = thread_managers.len();
+                    'inner: loop {
+                        match cleaner_receiver.try_recv() {
+                            Ok(thread_manager) => {
+                                thread_managers.push(WatchedThreadManager::new(thread_manager))
+                            }
+                            Err(TryRecvError::Disconnected) => break 'outer,
+                            Err(TryRecvError::Empty) => break 'inner,
+                        }
+                    }
+
+                    info!(
+                        "cleaner: unused schedulers in the pool: {} => {}, all thread managers: {} => {} => {}",
+                        schedulers_len_pre_retain,
+                        schedulers_len_post_retain,
+                        thread_manager_len_pre_retain,
+                        thread_manager_len_pre_push,
+                        thread_managers.len(),
+                    );
+                    // wait for signal with timeout here instead of recv_timeout() to write all the
+                    // preceeding logs at once.
+                    match cleaner_exit_signal_receiver.recv_timeout(Duration::from_secs(1)) {
+                        Ok(()) | Err(RecvTimeoutError::Disconnected) => break 'outer,
+                        Err(RecvTimeoutError::Timeout) => continue,
+                    }
+                }
+                info!("cleaner thread terminating!");
+            }
+        };
+
+        let cleaner_thread = thread::Builder::new()
+            .name("solScCleaner".to_owned())
+            .spawn(cleaner_main_loop())
+            .unwrap();
+
+        let scheduler_pool = Arc::new_cyclic(|weak_self| Self {
             scheduler_inners: Mutex::default(),
             handler_count,
             handler_context: HandlerContext {
@@ -115,9 +290,13 @@ where
                 prioritization_fee_cache,
             },
             weak_self: weak_self.clone(),
-            next_scheduler_id: AtomicSchedulerId::default(),
-            _phantom: PhantomData,
-        })
+            next_scheduler_id: AtomicSchedulerId::new(PRIMARY_SCHEDULER_ID),
+            cleaner_thread: Mutex::new(Some(cleaner_thread)),
+            cleaner_sender,
+            cleaner_exit_signal_sender,
+        });
+        scheduler_pool_sender.send(scheduler_pool.clone()).unwrap();
+        scheduler_pool
     }
 
     // This apparently-meaningless wrapper is handy, because some callers explicitly want
@@ -128,7 +307,7 @@ where
         transaction_status_sender: Option<TransactionStatusSender>,
         replay_vote_sender: Option<ReplayVoteSender>,
         prioritization_fee_cache: Arc<PrioritizationFeeCache>,
-    ) -> InstalledSchedulerPoolArc {
+    ) -> InstalledSchedulerPoolArc<SEA> {
         Self::new(
             handler_count,
             log_messages_bytes_limit,
@@ -156,16 +335,21 @@ where
             .push(scheduler);
     }
 
+    #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))]
     fn do_take_scheduler(&self, context: SchedulingContext) -> S {
         // pop is intentional for filo, expecting relatively warmed-up scheduler due to having been
         // returned recently
-        if let Some(inner) = self.scheduler_inners.lock().expect("not poisoned").pop() {
-            S::from_inner(inner, context)
+        if let Some(pooled_inner) = self.scheduler_inners.lock().expect("not poisoned").pop() {
+            S::from_inner(pooled_inner, context)
         } else {
-            S::spawn(self.self_arc(), context)
+            S::spawn(self.self_arc(), context, TH::create(self))
         }
     }
 
+    fn register_to_cleaner(&self, thread_manager: Weak<RwLock<ThreadManager<S, TH, SEA>>>) {
+        self.cleaner_sender.send(thread_manager).unwrap();
+    }
+
     #[cfg(feature = "dev-context-only-utils")]
     pub fn pooled_scheduler_count(&self) -> usize {
         self.scheduler_inners.lock().expect("not poisoned").len()
@@ -203,18 +387,41 @@ where
     }
 }
 
-impl<S, TH> InstalledSchedulerPool for SchedulerPool<S, TH>
+impl<S, TH, SEA> InstalledSchedulerPool<SEA> for SchedulerPool<S, TH, SEA>
 where
-    S: SpawnableScheduler<TH>,
-    TH: TaskHandler,
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
 {
-    fn take_scheduler(&self, context: SchedulingContext) -> InstalledSchedulerBox {
+    fn take_scheduler(&self, context: SchedulingContext) -> Box<dyn InstalledScheduler<SEA>> {
         Box::new(self.do_take_scheduler(context))
     }
+
+    fn uninstalled_from_bank_forks(self: Arc<Self>) {
+        self.scheduler_inners.lock().unwrap().clear();
+        self.cleaner_exit_signal_sender.send(()).unwrap();
+        let () = self
+            .cleaner_thread
+            .lock()
+            .unwrap()
+            .take()
+            .unwrap()
+            .join()
+            .unwrap();
+        info!(
+            "SchedulerPool::uninstalled_from_bank_forks(): joined cleaner thread at {:?}...",
+            thread::current()
+        );
+    }
 }
 
-pub trait TaskHandler: Send + Sync + Debug + Sized + 'static {
+pub trait TaskHandler<SEA: ScheduleExecutionArg>:
+    Send + Sync + Debug + Sized + Clone + 'static
+{
+    fn create<T: SpawnableScheduler<Self, SEA>>(pool: &SchedulerPool<T, Self, SEA>) -> Self;
+
     fn handle(
+        &self,
         result: &mut Result<()>,
         timings: &mut ExecuteTimings,
         bank: &Arc<Bank>,
@@ -224,11 +431,16 @@ pub trait TaskHandler: Send + Sync + Debug + Sized + 'static {
     );
 }
 
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct DefaultTaskHandler;
 
-impl TaskHandler for DefaultTaskHandler {
+impl<SEA: ScheduleExecutionArg> TaskHandler<SEA> for DefaultTaskHandler {
+    fn create<T: SpawnableScheduler<Self, SEA>>(_pool: &SchedulerPool<T, Self, SEA>) -> Self {
+        Self
+    }
+
     fn handle(
+        &self,
         result: &mut Result<()>,
         timings: &mut ExecuteTimings,
         bank: &Arc<Bank>,
@@ -259,15 +471,31 @@ impl TaskHandler for DefaultTaskHandler {
 struct ExecutedTask {
     task: Task,
     result_with_timings: ResultWithTimings,
+    slot: Slot,
+    thx: usize,
+    handler_timings: Option<HandlerTimings>,
+}
+
+pub struct HandlerTimings {
+    finish_time: SystemTime,
+    execution_us: u64,
+    execution_cpu_us: u128,
 }
 
 impl ExecutedTask {
-    fn new_boxed(task: Task) -> Box<Self> {
+    fn new_boxed(task: Task, thx: usize, slot: Slot) -> Box<Self> {
         Box::new(Self {
             task,
             result_with_timings: initialized_result_with_timings(),
+            slot,
+            thx,
+            handler_timings: None,
         })
     }
+
+    fn is_err(&self) -> bool {
+        self.result_with_timings.0.is_err()
+    }
 }
 
 // A very tiny generic message type to signal about opening and closing of subchannels, which are
@@ -283,6 +511,7 @@ enum SubchanneledPayload<P1, P2> {
 }
 
 type NewTaskPayload = SubchanneledPayload<Task, SchedulingContext>;
+type RetiredTaskPayload = SubchanneledPayload<Box<ExecutedTask>, ()>;
 
 // A tiny generic message type to synchronize multiple threads everytime some contextual data needs
 // to be switched (ie. SchedulingContext), just using a single communication channel.
@@ -370,6 +599,14 @@ mod chained_channel {
             self.aux_sender = chained_aux_sender;
             Ok(())
         }
+
+        pub(super) fn len(&self) -> usize {
+            self.sender.len()
+        }
+
+        pub(super) fn aux_len(&self) -> usize {
+            self.aux_sender.len()
+        }
     }
 
     // P doesn't need to be `: Clone`, yet rustc derive can't handle it.
@@ -447,14 +684,14 @@ impl UsageQueueLoader {
     pub fn load(&self, address: Pubkey) -> UsageQueue {
         self.usage_queues.entry(address).or_default().clone()
     }
-}
 
-// (this is slow needing atomic mem reads. However, this can be turned into a lot faster
-// optimizer-friendly version as shown in this crossbeam pr:
-// https://github.com/crossbeam-rs/crossbeam/pull/1047)
-fn disconnected<T>() -> Receiver<T> {
-    // drop the sender residing at .0, returning an always-disconnected receiver.
-    crossbeam_channel::unbounded().1
+    pub fn usage_queue_count(&self) -> usize {
+        self.usage_queues.len()
+    }
+
+    pub fn clear(&self) {
+        self.usage_queues.clear();
+    }
 }
 
 fn initialized_result_with_timings() -> ResultWithTimings {
@@ -462,49 +699,174 @@ fn initialized_result_with_timings() -> ResultWithTimings {
 }
 
 #[derive(Debug)]
-pub struct PooledScheduler<TH: TaskHandler> {
-    inner: PooledSchedulerInner<Self, TH>,
+pub struct PooledScheduler<TH, SEA>
+where
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    inner: PooledSchedulerInner<Self, TH, SEA>,
     context: SchedulingContext,
 }
 
 #[derive(Debug)]
-pub struct PooledSchedulerInner<S: SpawnableScheduler<TH>, TH: TaskHandler> {
-    thread_manager: ThreadManager<S, TH>,
+pub struct PooledSchedulerInner<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    thread_manager: Arc<RwLock<ThreadManager<S, TH, SEA>>>,
     usage_queue_loader: UsageQueueLoader,
+    pooled_at: Instant,
+}
+
+impl<S, TH, SEA> PooledSchedulerInner<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    fn pooled_since(&self) -> Duration {
+        self.pooled_at.elapsed()
+    }
+
+    fn suspend_thread_manager(&mut self) {
+        debug!("suspend_thread_manager()");
+        self.thread_manager.write().unwrap().suspend();
+    }
+
+    fn id(&self) -> SchedulerId {
+        self.thread_manager.read().unwrap().scheduler_id
+    }
 }
 
+type Tid = i32;
+// The linux's tid (essentially is in the pid name space) is guaranteed to be non-zero; so
+// using 0 for special purpose at user-land is totally safe.
+#[cfg_attr(target_os = "linux", allow(dead_code))]
+const DUMMY_TID: Tid = 0;
+
+#[derive(Default)]
+struct LogInterval(usize);
+
+impl LogInterval {
+    fn increment(&mut self) -> bool {
+        let should_log = self.0 % 1000 == 0;
+        self.0 = self.0.checked_add(1).unwrap();
+        should_log
+    }
+}
+
+const PRIMARY_SCHEDULER_ID: SchedulerId = 0;
+
 // This type manages the OS threads for scheduling and executing transactions. The term
 // `session` is consistently used to mean a group of Tasks scoped under a single SchedulingContext.
 // This is equivalent to a particular bank for block verification. However, new terms is introduced
 // here to mean some continuous time over multiple continuous banks/slots for the block production,
 // which is planned to be implemented in the future.
 #[derive(Debug)]
-struct ThreadManager<S: SpawnableScheduler<TH>, TH: TaskHandler> {
+struct ThreadManager<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
     scheduler_id: SchedulerId,
-    pool: Arc<SchedulerPool<S, TH>>,
+    pool: Arc<SchedulerPool<S, TH, SEA>>,
+    handler: TH,
     new_task_sender: Sender<NewTaskPayload>,
-    new_task_receiver: Receiver<NewTaskPayload>,
+    new_task_receiver: Option<Receiver<NewTaskPayload>>,
     session_result_sender: Sender<Option<ResultWithTimings>>,
     session_result_receiver: Receiver<Option<ResultWithTimings>>,
     session_result_with_timings: Option<ResultWithTimings>,
-    scheduler_thread: Option<JoinHandle<()>>,
+    scheduler_thread_and_tid: Option<(JoinHandle<Option<ResultWithTimings>>, Tid)>,
     handler_threads: Vec<JoinHandle<()>>,
+    accumulator_thread: Option<JoinHandle<()>>,
 }
 
-impl<TH: TaskHandler> PooledScheduler<TH> {
-    fn do_spawn(pool: Arc<SchedulerPool<Self, TH>>, initial_context: SchedulingContext) -> Self {
+impl<TH, SEA> PooledScheduler<TH, SEA>
+where
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    fn do_spawn(
+        pool: Arc<SchedulerPool<Self, TH, SEA>>,
+        initial_context: SchedulingContext,
+        handler: TH,
+    ) -> Self {
         Self::from_inner(
-            PooledSchedulerInner::<Self, TH> {
-                thread_manager: ThreadManager::new(pool),
+            PooledSchedulerInner {
+                thread_manager: Arc::new(RwLock::new(ThreadManager::new(pool.clone(), handler))),
                 usage_queue_loader: UsageQueueLoader::default(),
+                pooled_at: Instant::now(),
             },
             initial_context,
         )
     }
+
+    #[cfg(feature = "dev-context-only-utils")]
+    pub fn clear_session_result_with_timings(&mut self) {
+        assert_matches!(
+            self.inner
+                .thread_manager
+                .write()
+                .unwrap()
+                .take_session_result_with_timings(),
+            (Ok(_), _)
+        );
+    }
+
+    #[cfg(feature = "dev-context-only-utils")]
+    pub fn restart_session(&mut self) {
+        self.inner
+            .thread_manager
+            .write()
+            .unwrap()
+            .start_session(&self.context);
+    }
+
+    #[cfg(feature = "dev-context-only-utils")]
+    pub fn schedule_task(&self, task: Task) {
+        self.inner.thread_manager.read().unwrap().send_task(task);
+    }
+
+    fn ensure_thread_manager_resumed(
+        &self,
+        context: &SchedulingContext,
+    ) -> std::result::Result<RwLockReadGuard<'_, ThreadManager<Self, TH, SEA>>, TransactionError>
+    {
+        let mut was_already_active = false;
+        loop {
+            let read = self.inner.thread_manager.read().unwrap();
+            if !read.is_suspended() {
+                debug!(
+                    "{}",
+                    if was_already_active {
+                        "ensure_thread_manager_resumed(): was already active."
+                    } else {
+                        "ensure_thread_manager_resumed(): wasn't already active..."
+                    }
+                );
+                return Ok(read);
+            } else {
+                debug!("ensure_thread_manager_resumed(): will start threads...");
+                drop(read);
+                let mut write = self.inner.thread_manager.write().unwrap();
+                write.start_or_try_resume_threads(context)?;
+                drop(write);
+                was_already_active = false;
+            }
+        }
+    }
 }
 
-impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
-    fn new(pool: Arc<SchedulerPool<S, TH>>) -> Self {
+impl<S, TH, SEA> ThreadManager<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    fn new(pool: Arc<SchedulerPool<S, TH, SEA>>, handler: TH) -> Self {
         let (new_task_sender, new_task_receiver) = crossbeam_channel::unbounded();
         let (session_result_sender, session_result_receiver) = crossbeam_channel::unbounded();
         let handler_count = pool.handler_count;
@@ -512,23 +874,40 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
         Self {
             scheduler_id: pool.new_scheduler_id(),
             pool,
+            handler,
             new_task_sender,
-            new_task_receiver,
+            new_task_receiver: Some(new_task_receiver),
             session_result_sender,
             session_result_receiver,
             session_result_with_timings: None,
-            scheduler_thread: None,
+            scheduler_thread_and_tid: None,
             handler_threads: Vec::with_capacity(handler_count),
+            accumulator_thread: None,
         }
     }
 
+    fn is_suspended(&self) -> bool {
+        self.scheduler_thread_and_tid.is_none()
+    }
+
+    pub fn take_scheduler_thread(&mut self) -> Option<JoinHandle<Option<ResultWithTimings>>> {
+        self.scheduler_thread_and_tid
+            .take()
+            .map(|(thread, _tid)| thread)
+    }
+
     fn execute_task_with_handler(
+        handler: &TH,
         bank: &Arc<Bank>,
         executed_task: &mut Box<ExecutedTask>,
         handler_context: &HandlerContext,
+        send_metrics: bool,
     ) {
+        let handler_timings =
+            send_metrics.then_some((Measure::start("process_message_time"), ThreadTime::now()));
         debug!("handling task at {:?}", thread::current());
         TH::handle(
+            handler,
             &mut executed_task.result_with_timings.0,
             &mut executed_task.result_with_timings.1,
             bank,
@@ -536,30 +915,76 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
             executed_task.task.task_index(),
             handler_context,
         );
+        if let Some((mut wall_time, cpu_time)) = handler_timings {
+            executed_task.handler_timings = Some(HandlerTimings {
+                finish_time: SystemTime::now(),
+                execution_cpu_us: cpu_time.elapsed().as_micros(),
+                execution_us: {
+                    // make wall time is longer than cpu time, always
+                    wall_time.stop();
+                    wall_time.as_us()
+                },
+            });
+        }
     }
 
     fn accumulate_result_with_timings(
-        (result, timings): &mut ResultWithTimings,
+        (_result, timings): &mut ResultWithTimings,
         executed_task: Box<ExecutedTask>,
     ) {
-        match executed_task.result_with_timings.0 {
-            Ok(()) => {}
-            Err(error) => {
-                error!("error is detected while accumulating....: {error:?}");
-                // Override errors intentionally for simplicity, not retaining the
-                // first error unlike the block verification in the
-                // blockstore_processor. This will be addressed with more
-                // full-fledged impl later.
-                *result = Err(error);
-            }
+        assert_matches!(executed_task.result_with_timings.0, Ok(()));
+
+        if let Some(handler_timings) = &executed_task.handler_timings {
+            let thread = format!("solScExLane{:02}", executed_task.thx);
+            let signature = executed_task.task.transaction().signature().to_string();
+            let account_locks_in_json = serde_json::to_string(
+                &executed_task
+                    .task
+                    .transaction()
+                    .get_account_locks_unchecked(),
+            )
+            .unwrap();
+            let status = format!("{:?}", executed_task.result_with_timings.0);
+            let compute_unit_price = executed_task
+                .task
+                .transaction()
+                .get_compute_budget_details(false)
+                .map(|d| d.compute_unit_price)
+                .unwrap_or_default();
+
+            datapoint_info_at!(
+                handler_timings.finish_time,
+                "transaction_timings",
+                ("slot", executed_task.slot, i64),
+                ("index", executed_task.task.task_index(), i64),
+                ("thread", thread, String),
+                ("signature", signature, String),
+                ("account_locks_in_json", account_locks_in_json, String),
+                ("status", status, String),
+                ("duration", handler_timings.execution_us, i64),
+                ("cpu_duration", handler_timings.execution_cpu_us, i64),
+                ("compute_units", 0 /*task.cu*/, i64),
+                ("priority", compute_unit_price, i64), // old name is kept for compat...
+            );
         }
         timings.accumulate(&executed_task.result_with_timings.1);
+        drop(executed_task);
     }
 
     fn take_session_result_with_timings(&mut self) -> ResultWithTimings {
         self.session_result_with_timings.take().unwrap()
     }
 
+    fn reset_session_on_error(&mut self) -> Result<()> {
+        let err = self
+            .session_result_with_timings
+            .replace(initialized_result_with_timings())
+            .unwrap()
+            .0;
+        assert_matches!(err, Err(_));
+        err
+    }
+
     fn put_session_result_with_timings(&mut self, result_with_timings: ResultWithTimings) {
         assert_matches!(
             self.session_result_with_timings
@@ -568,7 +993,24 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
         );
     }
 
-    fn start_threads(&mut self, context: &SchedulingContext) {
+    fn start_or_try_resume_threads(&mut self, context: &SchedulingContext) -> Result<()> {
+        if !self.is_suspended() {
+            // this can't be promoted to panic! as read => write upgrade isn't completely
+            // race-free in ensure_thread_manager_resumed()...
+            warn!("try_resume(): already resumed");
+            return Ok(());
+        } else if self
+            .session_result_with_timings
+            .as_ref()
+            .map(|(result, _)| result.is_err())
+            .unwrap_or(false)
+        {
+            warn!("try_resume(): skipping resuming due to err, while resetting session result");
+            return self.reset_session_on_error();
+        }
+        debug!("try_resume(): doing now");
+
+        let send_metrics = env::var("SOLANA_TRANSACTION_TIMINGS").is_ok();
         // Firstly, setup bi-directional messaging between the scheduler and handlers to pass
         // around tasks, by creating 2 channels (one for to-be-handled tasks from the scheduler to
         // the handlers and the other for finished tasks from the handlers to the scheduler).
@@ -660,7 +1102,14 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
         let (finished_idle_task_sender, finished_idle_task_receiver) =
             crossbeam_channel::unbounded::<Box<ExecutedTask>>();
 
-        let mut result_with_timings = self.session_result_with_timings.take();
+        let (retired_task_sender, retired_task_receiver) =
+            crossbeam_channel::unbounded::<RetiredTaskPayload>();
+        let (accumulated_result_sender, accumulated_result_receiver) =
+            crossbeam_channel::unbounded::<Option<ResultWithTimings>>();
+
+        let scheduler_id = self.scheduler_id;
+        let mut slot = context.bank().slot();
+        let (tid_sender, tid_receiver) = crossbeam_channel::bounded(1);
 
         // High-level flow of new tasks:
         // 1. the replay stage thread send a new task.
@@ -669,12 +1118,16 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
         // 4. the handler thread processes the dispatched task.
         // 5. the handler thread reply back to the scheduler thread as an executed task.
         // 6. the scheduler thread post-processes the executed task.
+        // 7. the scheduler thread send the executed task to the accumulator thread.
+        // 8. the accumulator thread examines the executed task's result and accumulate its timing,
+        //    finally dropping the transaction inside the executed task.
         let scheduler_main_loop = || {
             let handler_count = self.pool.handler_count;
             let session_result_sender = self.session_result_sender.clone();
-            let new_task_receiver = self.new_task_receiver.clone();
+            let mut new_task_receiver = self.new_task_receiver.take().unwrap();
 
             let mut session_ending = false;
+            let mut thread_suspending = false;
 
             // Now, this is the main loop for the scheduler thread, which is a special beast.
             //
@@ -724,24 +1177,43 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
                     }
                 };
 
+                const BITS_PER_HEX_DIGIT: usize = 4;
                 let mut state_machine = unsafe {
                     SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling()
                 };
-
-                loop {
-                    if let Ok(NewTaskPayload::OpenSubchannel(context)) = new_task_receiver.recv() {
-                        // signal about new SchedulingContext to handler threads
-                        runnable_task_sender
-                            .send_chained_channel(context, handler_count)
-                            .unwrap();
-                        assert_matches!(
-                            result_with_timings.replace(initialized_result_with_timings()),
-                            None
+                let mut log_interval = LogInterval::default();
+                // hint compiler about inline[never] and unlikely?
+                macro_rules! log_scheduler {
+                    ($prefix:tt) => {
+                        info!(
+                            "[sch_{:0width$x}]: slot: {}[{:12}]({}{}): state_machine(({}(+{})=>{})/{}|{}) channels(<{} >{}+{} <{}+{})",
+                            scheduler_id, slot,
+                            (if ($prefix) == "step" { "interval" } else { $prefix }),
+                            (if session_ending {"S"} else {"-"}), (if thread_suspending {"T"} else {"-"}),
+                            state_machine.active_task_count(), state_machine.unblocked_task_queue_count(), state_machine.handled_task_count(),
+                            state_machine.total_task_count(),
+                            state_machine.unblocked_task_count(),
+                            new_task_receiver.len(),
+                            runnable_task_sender.len(), runnable_task_sender.aux_len(),
+                            finished_blocked_task_receiver.len(), finished_idle_task_receiver.len(),
+                            width = SchedulerId::BITS as usize / BITS_PER_HEX_DIGIT,
                         );
-                    } else {
-                        unreachable!();
-                    }
+                    };
+                }
 
+                trace!("solScheduler thread is running at: {:?}", thread::current());
+                tid_sender
+                    .send({
+                        #[cfg(not(target_os = "linux"))]
+                        let tid = DUMMY_TID;
+                        #[cfg(target_os = "linux")]
+                        let tid = rustix::thread::gettid().as_raw_nonzero().get();
+                        tid
+                    })
+                    .unwrap();
+                log_scheduler!("T:started");
+
+                while !thread_suspending {
                     let mut is_finished = false;
                     while !is_finished {
                         // ALL recv selectors are eager-evaluated ALWAYS by current crossbeam impl,
@@ -751,9 +1223,6 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
                         let dummy_unblocked_task_receiver =
                             dummy_receiver(state_machine.has_unblocked_task());
 
-                        // (Assume this is biased; i.e. select_biased! in this crossbeam pr:
-                        // https://github.com/rust-lang/futures-rs/pull/1976)
-                        //
                         // There's something special called dummy_unblocked_task_receiver here.
                         // This odd pattern was needed to react to newly unblocked tasks from
                         // _not-crossbeam-channel_ event sources, precisely at the specified
@@ -764,13 +1233,25 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
                         // consistent. Note that unified scheduler will go
                         // into busy looping to seek lowest latency eventually. However, not now,
                         // to measure _actual_ cpu usage easily with the select approach.
-                        select! {
+                        let state_change = select_biased! {
                             recv(finished_blocked_task_receiver) -> executed_task => {
                                 let executed_task = executed_task.unwrap();
 
-                                state_machine.deschedule_task(&executed_task.task);
-                                let result_with_timings = result_with_timings.as_mut().unwrap();
-                                Self::accumulate_result_with_timings(result_with_timings, executed_task);
+                                if executed_task.is_err() {
+                                    log_scheduler!("S+T:aborted");
+                                    // MUST: clear the usage queue loader before reusing this scheduler
+                                    // ...
+                                    session_result_sender.send(None).unwrap();
+                                    // be explicit about specifically dropping this receiver
+                                    drop(new_task_receiver);
+                                    // this timings aren't for the accumulated one. but
+                                    // caller doesn't care.
+                                    return Some(executed_task.result_with_timings);
+                                } else {
+                                    state_machine.deschedule_task(&executed_task.task);
+                                    retired_task_sender.send_buffered(RetiredTaskPayload::Payload(executed_task)).unwrap();
+                                }
+                                "step"
                             },
                             recv(dummy_unblocked_task_receiver) -> dummy => {
                                 assert_matches!(dummy, Err(RecvError));
@@ -779,89 +1260,229 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
                                     .schedule_next_unblocked_task()
                                     .expect("unblocked task");
                                 runnable_task_sender.send_payload(task).unwrap();
+                                "step"
                             },
                             recv(new_task_receiver) -> message => {
-                                assert!(!session_ending);
+                                assert!(message.is_err() || (!session_ending && !thread_suspending));
 
-                                match message.unwrap() {
-                                    NewTaskPayload::Payload(task) => {
+                                match message {
+                                    Ok(NewTaskPayload::Payload(task)) => {
                                         if let Some(task) = state_machine.schedule_task(task) {
                                             runnable_task_sender.send_aux_payload(task).unwrap();
                                         }
+                                        "step"
                                     }
-                                    NewTaskPayload::CloseSubchannel => {
+                                    Ok(NewTaskPayload::CloseSubchannel) => {
                                         session_ending = true;
+                                        "S:ending"
                                     }
-                                    NewTaskPayload::OpenSubchannel(_context) => {
+                                    Ok(NewTaskPayload::OpenSubchannel(_context)) => {
                                         unreachable!();
                                     }
+                                    Err(_) => {
+                                        assert!(!thread_suspending);
+                                        thread_suspending = true;
+
+                                        // Err(_) on new_task_receiver guarantees
+                                        // that there's no live sender and no messages to be
+                                        // received anymore; so dropping by overriding it with
+                                        // never() should pose no possibility of missed messages.
+                                        new_task_receiver = never();
+
+                                        "T:suspending"
+                                    }
                                 }
                             },
                             recv(finished_idle_task_receiver) -> executed_task => {
                                 let executed_task = executed_task.unwrap();
 
-                                state_machine.deschedule_task(&executed_task.task);
-                                let result_with_timings = result_with_timings.as_mut().unwrap();
-                                Self::accumulate_result_with_timings(result_with_timings, executed_task);
+                                if executed_task.is_err() {
+                                    log_scheduler!("S+T:aborted");
+                                    session_result_sender.send(None).unwrap();
+                                    // be explicit about specifically dropping this receiver
+                                    drop(new_task_receiver);
+                                    // this timings aren't for the accumulated one. but
+                                    // caller doesn't care.
+                                    return Some(executed_task.result_with_timings);
+                                } else {
+                                    state_machine.deschedule_task(&executed_task.task);
+                                    retired_task_sender.send_buffered(RetiredTaskPayload::Payload(executed_task)).unwrap();
+                                }
+                                "step"
                             },
                         };
+                        if state_change != "step" || log_interval.increment() {
+                            log_scheduler!(state_change);
+                        }
 
-                        is_finished = session_ending && state_machine.has_no_active_task();
+                        is_finished = (session_ending || thread_suspending)
+                            && state_machine.has_no_active_task();
                     }
 
                     if session_ending {
+                        log_scheduler!("S:ended");
                         state_machine.reinitialize();
+                        log_interval = LogInterval::default();
+                        retired_task_sender
+                            .send(RetiredTaskPayload::CloseSubchannel)
+                            .unwrap();
                         session_result_sender
                             .send(Some(
-                                result_with_timings
-                                    .take()
+                                accumulated_result_receiver
+                                    .recv()
+                                    .unwrap()
                                     .unwrap_or_else(initialized_result_with_timings),
                             ))
                             .unwrap();
-                        session_ending = false;
+                        if !thread_suspending {
+                            session_ending = false;
+                        }
+                    }
+
+                    if !thread_suspending {
+                        match new_task_receiver.recv() {
+                            Ok(NewTaskPayload::OpenSubchannel(context)) => {
+                                slot = context.bank().slot();
+                                // signal about new SchedulingContext to handler threads
+                                runnable_task_sender
+                                    .send_chained_channel(context, handler_count)
+                                    .unwrap();
+                                retired_task_sender
+                                    .send(RetiredTaskPayload::OpenSubchannel(()))
+                                    .unwrap();
+                                log_scheduler!("S:started");
+                            }
+                            Err(_) => {
+                                assert!(!thread_suspending);
+                                thread_suspending = true;
+                                log_scheduler!("T:suspending");
+                                continue;
+                            }
+                            Ok(_) => {
+                                unreachable!();
+                            }
+                        }
                     }
                 }
+
+                log_scheduler!("T:suspended");
+                let scheduler_result_with_timings = if session_ending {
+                    None
+                } else {
+                    retired_task_sender
+                        .send(RetiredTaskPayload::CloseSubchannel)
+                        .unwrap();
+                    accumulated_result_receiver.recv().unwrap()
+                };
+                trace!(
+                    "solScheduler thread is terminating at: {:?}",
+                    thread::current()
+                );
+                scheduler_result_with_timings
             }
         };
 
-        let handler_main_loop = || {
+        let handler_main_loop = |thx| {
             let pool = self.pool.clone();
+            let handler = self.handler.clone();
             let mut runnable_task_receiver = runnable_task_receiver.clone();
             let finished_blocked_task_sender = finished_blocked_task_sender.clone();
             let finished_idle_task_sender = finished_idle_task_sender.clone();
 
-            move || loop {
-                let (task, sender) = select! {
-                    recv(runnable_task_receiver.for_select()) -> message => {
-                        if let Some(task) = runnable_task_receiver.after_select(message.unwrap()) {
-                            (task, &finished_blocked_task_sender)
-                        } else {
-                            continue;
-                        }
-                    },
-                    recv(runnable_task_receiver.aux_for_select()) -> task => {
-                        if let Ok(task) = task {
-                            (task, &finished_idle_task_sender)
-                        } else {
-                            continue;
-                        }
-                    },
-                };
-                let mut task = ExecutedTask::new_boxed(task);
-                Self::execute_task_with_handler(
-                    runnable_task_receiver.context().bank(),
-                    &mut task,
-                    &pool.handler_context,
+            move || {
+                trace!(
+                    "solScHandler{:02} thread is running at: {:?}",
+                    thx,
+                    thread::current()
                 );
-                sender.send(task).unwrap();
+                loop {
+                    let (task, sender) = select_biased! {
+                        recv(runnable_task_receiver.for_select()) -> message => {
+                            match message {
+                                Ok(message) => {
+                                    if let Some(task) = runnable_task_receiver.after_select(message) {
+                                        (task, &finished_blocked_task_sender)
+                                    } else {
+                                        continue;
+                                    }
+                                },
+                                Err(_) => break,
+                            }
+                        },
+                        recv(runnable_task_receiver.aux_for_select()) -> task => {
+                            if let Ok(task) = task {
+                                (task, &finished_idle_task_sender)
+                            } else {
+                                continue;
+                            }
+                        },
+                    };
+                    let bank = runnable_task_receiver.context().bank();
+                    let mut task = ExecutedTask::new_boxed(task, thx, bank.slot());
+                    Self::execute_task_with_handler(
+                        &handler,
+                        bank,
+                        &mut task,
+                        &pool.handler_context,
+                        send_metrics,
+                    );
+                    if sender.send(task).is_err() {
+                        break;
+                    }
+                }
+                trace!(
+                    "solScHandler{:02} thread is terminating at: {:?}",
+                    thx,
+                    thread::current()
+                );
+            }
+        };
+
+        let mut accumulator_result_with_timings = self.session_result_with_timings.take();
+
+        let accumulator_main_loop = || {
+            move || 'outer: loop {
+                match retired_task_receiver.recv_timeout(Duration::from_millis(40)) {
+                    Ok(RetiredTaskPayload::Payload(executed_task)) => {
+                        Self::accumulate_result_with_timings(
+                            accumulator_result_with_timings.as_mut().unwrap(),
+                            executed_task,
+                        );
+                    }
+                    Ok(RetiredTaskPayload::OpenSubchannel(())) => {
+                        assert_matches!(
+                            accumulator_result_with_timings
+                                .replace(initialized_result_with_timings()),
+                            None
+                        );
+                    }
+                    Ok(RetiredTaskPayload::CloseSubchannel) => {
+                        if accumulated_result_sender
+                            .send(accumulator_result_with_timings.take())
+                            .is_err()
+                        {
+                            break 'outer;
+                        }
+                    }
+                    Err(RecvTimeoutError::Disconnected) => break 'outer,
+                    Err(RecvTimeoutError::Timeout) => continue,
+                }
             }
         };
 
-        self.scheduler_thread = Some(
+        self.scheduler_thread_and_tid = Some((
             thread::Builder::new()
                 .name("solScheduler".to_owned())
                 .spawn(scheduler_main_loop())
                 .unwrap(),
+            tid_receiver.recv().unwrap(),
+        ));
+
+        self.accumulator_thread = Some(
+            thread::Builder::new()
+                .name("solScAccmltr".to_owned())
+                .spawn(accumulator_main_loop())
+                .unwrap(),
         );
 
         self.handler_threads = (0..self.pool.handler_count)
@@ -869,97 +1490,194 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
                 |thx| {
                     thread::Builder::new()
                         .name(format!("solScHandler{:02}", thx))
-                        .spawn(handler_main_loop())
+                        .spawn(handler_main_loop(thx))
                         .unwrap()
                 }
             })
             .collect();
+        Ok(())
     }
 
-    fn send_task(&self, task: Task) {
+    fn send_task(&self, task: Task) -> bool {
         debug!("send_task()");
         self.new_task_sender
             .send(NewTaskPayload::Payload(task))
-            .unwrap()
+            .is_err()
     }
 
     fn end_session(&mut self) {
-        if self.session_result_with_timings.is_some() {
+        debug!("end_session(): will end session...");
+        if self.is_suspended() {
+            debug!("end_session(): no threads..");
+            assert_matches!(self.session_result_with_timings, Some(_));
+            return;
+        } else if self.session_result_with_timings.is_some() {
             debug!("end_session(): already result resides within thread manager..");
             return;
         }
-        debug!("end_session(): will end session...");
 
-        self.new_task_sender
+        let mut abort_detected = self
+            .new_task_sender
             .send(NewTaskPayload::CloseSubchannel)
-            .unwrap();
+            .is_err();
 
         if let Some(result_with_timings) = self.session_result_receiver.recv().unwrap() {
+            assert!(!abort_detected);
             self.put_session_result_with_timings(result_with_timings);
+        } else {
+            abort_detected = true;
+        }
+
+        if abort_detected {
+            self.suspend();
         }
     }
 
     fn start_session(&mut self, context: &SchedulingContext) {
-        assert_matches!(self.session_result_with_timings, None);
-        self.new_task_sender
-            .send(NewTaskPayload::OpenSubchannel(context.clone()))
-            .unwrap();
+        if !self.is_suspended() {
+            assert_matches!(self.session_result_with_timings, None);
+            self.new_task_sender
+                .send(NewTaskPayload::OpenSubchannel(context.clone()))
+                .unwrap();
+        } else {
+            self.put_session_result_with_timings(initialized_result_with_timings());
+            assert_matches!(self.start_or_try_resume_threads(context), Ok(()));
+        }
+    }
+
+    fn suspend(&mut self) {
+        let Some(scheduler_thread) = self.take_scheduler_thread() else {
+            warn!("suspend(): already suspended...");
+            return;
+        };
+        debug!("suspend(): terminating threads by {:?}", thread::current());
+
+        let (s, r) = crossbeam_channel::unbounded();
+        (self.new_task_sender, self.new_task_receiver) = (s, Some(r));
+
+        let () = self.accumulator_thread.take().unwrap().join().unwrap();
+        for thread in self.handler_threads.drain(..) {
+            debug!("joining...: {:?}", thread);
+            () = thread.join().unwrap();
+        }
+        if let Some(result_with_timings) = scheduler_thread.join().unwrap() {
+            self.put_session_result_with_timings(result_with_timings);
+        }
+
+        debug!(
+            "suspend(): successfully suspended threads by {:?}",
+            thread::current()
+        );
+    }
+
+    fn is_primary(&self) -> bool {
+        self.scheduler_id == PRIMARY_SCHEDULER_ID
+    }
+
+    #[cfg(target_os = "linux")]
+    fn active_tid_if_not_primary(&self) -> Option<Tid> {
+        if self.is_primary() {
+            // always exempt from cleaner...
+            None
+        } else {
+            self.scheduler_thread_and_tid.as_ref().map(|&(_, tid)| tid)
+        }
     }
 }
 
-pub trait SpawnableScheduler<TH: TaskHandler>: InstalledScheduler {
-    type Inner: Debug + Send + Sync;
+pub trait SpawnableScheduler<TH, SEA>: InstalledScheduler<SEA>
+where
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    type Inner: Debug + Send + Sync + RetirableSchedulerInner;
 
     fn into_inner(self) -> (ResultWithTimings, Self::Inner);
 
     fn from_inner(inner: Self::Inner, context: SchedulingContext) -> Self;
 
-    fn spawn(pool: Arc<SchedulerPool<Self, TH>>, initial_context: SchedulingContext) -> Self
+    fn spawn(
+        pool: Arc<SchedulerPool<Self, TH, SEA>>,
+        initial_context: SchedulingContext,
+        handler: TH,
+    ) -> Self
     where
         Self: Sized;
 }
 
-impl<TH: TaskHandler> SpawnableScheduler<TH> for PooledScheduler<TH> {
-    type Inner = PooledSchedulerInner<Self, TH>;
+pub trait RetirableSchedulerInner {
+    fn retire_if_stale(&mut self) -> bool;
+}
 
-    fn into_inner(mut self) -> (ResultWithTimings, Self::Inner) {
+impl<TH, SEA> SpawnableScheduler<TH, SEA> for PooledScheduler<TH, SEA>
+where
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    type Inner = PooledSchedulerInner<Self, TH, SEA>;
+
+    fn into_inner(self) -> (ResultWithTimings, Self::Inner) {
         let result_with_timings = {
-            let manager = &mut self.inner.thread_manager;
+            let manager = &mut self.inner.thread_manager.write().unwrap();
             manager.end_session();
             manager.take_session_result_with_timings()
         };
         (result_with_timings, self.inner)
     }
 
-    fn from_inner(mut inner: Self::Inner, context: SchedulingContext) -> Self {
-        inner.thread_manager.start_session(&context);
+    fn from_inner(inner: Self::Inner, context: SchedulingContext) -> Self {
+        inner
+            .thread_manager
+            .write()
+            .unwrap()
+            .start_session(&context);
         Self { inner, context }
     }
 
-    fn spawn(pool: Arc<SchedulerPool<Self, TH>>, initial_context: SchedulingContext) -> Self {
-        let mut scheduler = Self::do_spawn(pool, initial_context);
-        scheduler
-            .inner
-            .thread_manager
-            .start_threads(&scheduler.context);
+    fn spawn(
+        pool: Arc<SchedulerPool<Self, TH, SEA>>,
+        initial_context: SchedulingContext,
+        handler: TH,
+    ) -> Self {
+        let scheduler = Self::do_spawn(pool.clone(), initial_context, handler);
+        pool.register_to_cleaner(Arc::downgrade(&scheduler.inner.thread_manager));
         scheduler
     }
 }
 
-impl<TH: TaskHandler> InstalledScheduler for PooledScheduler<TH> {
+impl<TH, SEA> InstalledScheduler<SEA> for PooledScheduler<TH, SEA>
+where
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
     fn id(&self) -> SchedulerId {
-        self.inner.thread_manager.scheduler_id
+        self.inner.id()
     }
 
     fn context(&self) -> &SchedulingContext {
         &self.context
     }
 
-    fn schedule_execution(&self, &(transaction, index): &(&SanitizedTransaction, usize)) {
-        let task = SchedulingStateMachine::create_task(transaction.clone(), index, &mut |pubkey| {
-            self.inner.usage_queue_loader.load(pubkey)
-        });
-        self.inner.thread_manager.send_task(task);
+    fn schedule_execution(
+        &self,
+        transaction_with_index: SEA::TransactionWithIndex<'_>,
+    ) -> Result<()> {
+        transaction_with_index.with_transaction_and_index(|transaction, index| {
+            let task =
+                SchedulingStateMachine::create_task(transaction.clone(), index, &mut |pubkey| {
+                    self.inner.usage_queue_loader.load(pubkey)
+                });
+            let abort_detected = self
+                .ensure_thread_manager_resumed(&self.context)?
+                .send_task(task);
+            if abort_detected {
+                let thread_manager = &mut self.inner.thread_manager.write().unwrap();
+                thread_manager.suspend();
+                thread_manager.reset_session_on_error()
+            } else {
+                Ok(())
+            }
+        })
     }
 
     fn wait_for_termination(
@@ -971,17 +1689,78 @@ impl<TH: TaskHandler> InstalledScheduler for PooledScheduler<TH> {
     }
 
     fn pause_for_recent_blockhash(&mut self) {
-        self.inner.thread_manager.end_session();
+        self.inner.thread_manager.write().unwrap().end_session();
+    }
+}
+
+impl<S, TH, SEA> UninstalledScheduler for PooledSchedulerInner<S, TH, SEA>
+where
+    S: SpawnableScheduler<TH, SEA, Inner = PooledSchedulerInner<S, TH, SEA>>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
+{
+    fn return_to_pool(mut self: Box<Self>) {
+        let pool = self.thread_manager.write().unwrap().pool.clone();
+        self.pooled_at = Instant::now();
+        pool.return_scheduler(*self)
     }
 }
 
-impl<S, TH> UninstalledScheduler for PooledSchedulerInner<S, TH>
+impl<S, TH, SEA> RetirableSchedulerInner for PooledSchedulerInner<S, TH, SEA>
 where
-    S: SpawnableScheduler<TH, Inner = PooledSchedulerInner<S, TH>>,
-    TH: TaskHandler,
+    S: SpawnableScheduler<TH, SEA, Inner = PooledSchedulerInner<S, TH, SEA>>,
+    TH: TaskHandler<SEA>,
+    SEA: ScheduleExecutionArg,
 {
-    fn return_to_pool(self: Box<Self>) {
-        self.thread_manager.pool.clone().return_scheduler(*self)
+    fn retire_if_stale(&mut self) -> bool {
+        // reap threads after 10mins of inactivity for any pooled (idle) schedulers. The primary
+        // scheduler is special-cased to empty its usage queue loader book instead, for easier
+        // monitoring to accumulate os-level thread metrics. The duration is chosen based on the
+        // rough estimation from the frequency of short-lived forks on the mainnet-beta, with
+        // consideration of some increased forking at epoch boundaries.
+        const IDLE_DURATION_FOR_LAZY_THREAD_RECLAIM: Duration = Duration::from_secs(600);
+
+        const BITS_PER_HEX_DIGIT: usize = 4;
+        let usage_queue_count = self.usage_queue_loader.usage_queue_count();
+        if usage_queue_count < 200_000 {
+            info!(
+                "[sch_{:0width$x}]: cleaner: usage queue loader book size: {usage_queue_count}...",
+                self.id(),
+                width = SchedulerId::BITS as usize / BITS_PER_HEX_DIGIT,
+            );
+        } else if self.thread_manager.read().unwrap().is_primary() {
+            info!(
+                "[sch_{:0width$x}]: cleaner: too big usage queue loader book size: {usage_queue_count}...; emptying the primary scheduler",
+                self.id(),
+                width = SchedulerId::BITS as usize / BITS_PER_HEX_DIGIT,
+            );
+            self.usage_queue_loader.clear();
+            return true;
+        } else {
+            info!(
+                "[sch_{:0width$x}]: cleaner: too big usage queue loader book size: {usage_queue_count}...; retiring scheduler",
+                self.id(),
+                width = SchedulerId::BITS as usize / BITS_PER_HEX_DIGIT,
+            );
+            self.suspend_thread_manager();
+            return false;
+        }
+
+        let pooled_duration = self.pooled_since();
+        if pooled_duration <= IDLE_DURATION_FOR_LAZY_THREAD_RECLAIM {
+            true
+        } else if !self.thread_manager.read().unwrap().is_primary() {
+            info!(
+                "[sch_{:0width$x}]: cleaner: retiring unused scheduler after {:?}...",
+                self.id(),
+                pooled_duration,
+                width = SchedulerId::BITS as usize / BITS_PER_HEX_DIGIT,
+            );
+            self.suspend_thread_manager();
+            false
+        } else {
+            true
+        }
     }
 }
 
@@ -989,7 +1768,6 @@ where
 mod tests {
     use {
         super::*,
-        assert_matches::assert_matches,
         solana_runtime::{
             bank::Bank,
             bank_forks::BankForks,
@@ -1000,11 +1778,17 @@ mod tests {
         solana_sdk::{
             clock::{Slot, MAX_PROCESSING_AGE},
             pubkey::Pubkey,
+            scheduling::SchedulingMode,
             signer::keypair::Keypair,
             system_transaction,
             transaction::{SanitizedTransaction, TransactionError},
         },
-        std::{sync::Arc, thread::JoinHandle},
+        std::{
+            mem,
+            sync::Arc,
+            thread::{self, sleep, JoinHandle},
+            time::Duration,
+        },
     };
 
     #[test]
@@ -1017,7 +1801,10 @@ mod tests {
 
         // this indirectly proves that there should be circular link because there's only one Arc
         // at this moment now
-        assert_eq!((Arc::strong_count(&pool), Arc::weak_count(&pool)), (1, 1));
+        assert_eq!(
+            (Arc::strong_count(&pool), Arc::weak_count(&pool)),
+            (1 + 1 /* todo */, 1)
+        );
         let debug = format!("{pool:#?}");
         assert!(!debug.is_empty());
     }
@@ -1030,7 +1817,7 @@ mod tests {
         let pool =
             DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache);
         let bank = Arc::new(Bank::default_for_tests());
-        let context = SchedulingContext::new(bank);
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank);
         let scheduler = pool.take_scheduler(context);
 
         let debug = format!("{scheduler:#?}");
@@ -1045,7 +1832,7 @@ mod tests {
         let pool =
             DefaultSchedulerPool::new(None, None, None, None, ignored_prioritization_fee_cache);
         let bank = Arc::new(Bank::default_for_tests());
-        let context = &SchedulingContext::new(bank);
+        let context = &SchedulingContext::new(SchedulingMode::BlockVerification, bank);
 
         let scheduler1 = pool.do_take_scheduler(context.clone());
         let scheduler_id1 = scheduler1.id();
@@ -1074,7 +1861,7 @@ mod tests {
         let pool =
             DefaultSchedulerPool::new(None, None, None, None, ignored_prioritization_fee_cache);
         let bank = Arc::new(Bank::default_for_tests());
-        let context = &SchedulingContext::new(bank);
+        let context = &SchedulingContext::new(SchedulingMode::BlockVerification, bank);
         let mut scheduler = pool.do_take_scheduler(context.clone());
 
         // should never panic.
@@ -1096,8 +1883,10 @@ mod tests {
         let new_bank = &Arc::new(Bank::default_for_tests());
         assert!(!Arc::ptr_eq(old_bank, new_bank));
 
-        let old_context = &SchedulingContext::new(old_bank.clone());
-        let new_context = &SchedulingContext::new(new_bank.clone());
+        let old_context =
+            &SchedulingContext::new(SchedulingMode::BlockVerification, old_bank.clone());
+        let new_context =
+            &SchedulingContext::new(SchedulingMode::BlockVerification, new_bank.clone());
 
         let scheduler = pool.do_take_scheduler(old_context.clone());
         let scheduler_id = scheduler.id();
@@ -1114,11 +1903,14 @@ mod tests {
 
         let bank = Bank::default_for_tests();
         let bank_forks = BankForks::new_rw_arc(bank);
-        let mut bank_forks = bank_forks.write().unwrap();
+        let mut bank_forks_write = bank_forks.write().unwrap();
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
         let pool =
             DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache);
-        bank_forks.install_scheduler_pool(pool);
+        bank_forks_write.install_scheduler_pool(pool);
+        bank_forks_write.prepare_to_drop();
+        drop(bank_forks_write);
+        drop::<BankForks>(Arc::into_inner(bank_forks).unwrap().into_inner().unwrap());
     }
 
     #[test]
@@ -1182,11 +1974,11 @@ mod tests {
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
         let pool =
             DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache);
-        let context = SchedulingContext::new(bank.clone());
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
 
         assert_eq!(bank.transaction_count(), 0);
         let scheduler = pool.take_scheduler(context);
-        scheduler.schedule_execution(&(tx0, 0));
+        scheduler.schedule_execution(&(tx0, 0)).unwrap();
         let bank = BankWithScheduler::new(bank, Some(scheduler));
         assert_matches!(bank.wait_for_completed_scheduler(), Some((Ok(()), _)));
         assert_eq!(bank.transaction_count(), 1);
@@ -1207,7 +1999,7 @@ mod tests {
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
         let pool =
             DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache);
-        let context = SchedulingContext::new(bank.clone());
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
         let mut scheduler = pool.take_scheduler(context);
 
         let unfunded_keypair = Keypair::new();
@@ -1219,9 +2011,9 @@ mod tests {
                 genesis_config.hash(),
             ));
         assert_eq!(bank.transaction_count(), 0);
-        scheduler.schedule_execution(&(bad_tx, 0));
+        scheduler.schedule_execution(&(bad_tx, 0)).unwrap();
         // simulate the task-sending thread is stalled for some reason.
-        std::thread::sleep(std::time::Duration::from_secs(1));
+        sleep(Duration::from_secs(1));
         assert_eq!(bank.transaction_count(), 0);
 
         let good_tx_after_bad_tx =
@@ -1237,25 +2029,25 @@ mod tests {
                 .result,
             Ok(_)
         );
-        scheduler.schedule_execution(&(good_tx_after_bad_tx, 1));
+        sleep(Duration::from_secs(3));
+        scheduler
+            .schedule_execution(&(good_tx_after_bad_tx, 1))
+            .unwrap_err();
+        error!("last pause!");
         scheduler.pause_for_recent_blockhash();
         // transaction_count should remain same as scheduler should be bailing out.
         // That's because we're testing the serialized failing execution case in this test.
-        // However, currently threaded impl can't properly abort in this situtation..
-        // so, 1 should be observed, intead of 0.
         // Also note that bank.transaction_count() is generally racy by nature, because
         // blockstore_processor and unified_scheduler both tend to process non-conflicting batches
         // in parallel as part of the normal operation.
-        assert_eq!(bank.transaction_count(), 1);
+        assert_eq!(bank.transaction_count(), 0);
 
         let bank = BankWithScheduler::new(bank, Some(scheduler));
         assert_matches!(
             bank.wait_for_completed_scheduler(),
-            Some((
-                Err(solana_sdk::transaction::TransactionError::AccountNotFound),
-                _timings
-            ))
+            Some((Ok(()), _timings))
         );
+        pool.uninstalled_from_bank_forks();
     }
 
     #[test]
@@ -1266,10 +2058,17 @@ mod tests {
         const BLOCKED_TRANSACTION_INDEX: usize = 1;
         static LOCK_TO_STALL: Mutex<()> = Mutex::new(());
 
-        #[derive(Debug)]
+        #[derive(Debug, Clone)]
         struct StallingHandler;
-        impl TaskHandler for StallingHandler {
+        impl TaskHandler<DefaultScheduleExecutionArg> for StallingHandler {
+            fn create<T: SpawnableScheduler<Self, DefaultScheduleExecutionArg>>(
+                _pool: &SchedulerPool<T, Self, DefaultScheduleExecutionArg>,
+            ) -> Self {
+                Self
+            }
+
             fn handle(
+                &self,
                 result: &mut Result<()>,
                 timings: &mut ExecuteTimings,
                 bank: &Arc<Bank>,
@@ -1282,7 +2081,8 @@ mod tests {
                     BLOCKED_TRANSACTION_INDEX => {}
                     _ => unreachable!(),
                 };
-                DefaultTaskHandler::handle(
+                <DefaultTaskHandler as TaskHandler<DefaultScheduleExecutionArg>>::handle(
+                    &DefaultTaskHandler,
                     result,
                     timings,
                     bank,
@@ -1316,25 +2116,27 @@ mod tests {
         let bank = Bank::new_for_tests(&genesis_config);
         let bank = setup_dummy_fork_graph(bank);
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
-        let pool = SchedulerPool::<PooledScheduler<StallingHandler>, _>::new_dyn(
-            None,
-            None,
-            None,
-            None,
-            ignored_prioritization_fee_cache,
-        );
-        let context = SchedulingContext::new(bank.clone());
+        let pool = SchedulerPool::<
+            PooledScheduler<StallingHandler, DefaultScheduleExecutionArg>,
+            _,
+            _,
+        >::new_dyn(None, None, None, None, ignored_prioritization_fee_cache);
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
 
         assert_eq!(bank.transaction_count(), 0);
         let scheduler = pool.take_scheduler(context);
 
         // Stall handling tx0 and tx1
         let lock_to_stall = LOCK_TO_STALL.lock().unwrap();
-        scheduler.schedule_execution(&(tx0, STALLED_TRANSACTION_INDEX));
-        scheduler.schedule_execution(&(tx1, BLOCKED_TRANSACTION_INDEX));
+        scheduler
+            .schedule_execution(&(tx0, STALLED_TRANSACTION_INDEX))
+            .unwrap();
+        scheduler
+            .schedule_execution(&(tx1, BLOCKED_TRANSACTION_INDEX))
+            .unwrap();
 
         // Wait a bit for the scheduler thread to decide to block tx1
-        std::thread::sleep(std::time::Duration::from_secs(1));
+        sleep(Duration::from_secs(1));
 
         // Resume handling by unlocking LOCK_TO_STALL
         drop(lock_to_stall);
@@ -1347,10 +2149,17 @@ mod tests {
     fn test_scheduler_mismatched_scheduling_context_race() {
         solana_logger::setup();
 
-        #[derive(Debug)]
+        #[derive(Debug, Clone)]
         struct TaskAndContextChecker;
-        impl TaskHandler for TaskAndContextChecker {
+        impl TaskHandler<DefaultScheduleExecutionArg> for TaskAndContextChecker {
+            fn create<T: SpawnableScheduler<Self, DefaultScheduleExecutionArg>>(
+                _pool: &SchedulerPool<T, Self, DefaultScheduleExecutionArg>,
+            ) -> Self {
+                Self
+            }
+
             fn handle(
+                &self,
                 _result: &mut Result<()>,
                 _timings: &mut ExecuteTimings,
                 bank: &Arc<Bank>,
@@ -1379,7 +2188,11 @@ mod tests {
         ));
 
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
-        let pool = SchedulerPool::<PooledScheduler<TaskAndContextChecker>, _>::new(
+        let pool = SchedulerPool::<
+            PooledScheduler<TaskAndContextChecker, DefaultScheduleExecutionArg>,
+            _,
+            _,
+        >::new(
             Some(4), // spawn 4 threads
             None,
             None,
@@ -1395,8 +2208,8 @@ mod tests {
                 2,
                 genesis_config.hash(),
             ));
-        let context0 = &SchedulingContext::new(bank0.clone());
-        let context1 = &SchedulingContext::new(bank1.clone());
+        let context0 = &SchedulingContext::new(SchedulingMode::BlockVerification, bank0.clone());
+        let context1 = &SchedulingContext::new(SchedulingMode::BlockVerification, bank1.clone());
 
         // Exercise the scheduler by busy-looping to expose the race condition
         for (context, index) in [(context0, 0), (context1, 1)]
@@ -1405,7 +2218,7 @@ mod tests {
             .take(10000)
         {
             let scheduler = pool.take_scheduler(context.clone());
-            scheduler.schedule_execution(&(dummy_tx, index));
+            scheduler.schedule_execution(&(dummy_tx, index)).unwrap();
             scheduler.wait_for_termination(false).1.return_to_pool();
         }
     }
@@ -1415,7 +2228,7 @@ mod tests {
         Mutex<ResultWithTimings>,
         Mutex<Vec<JoinHandle<ResultWithTimings>>>,
         SchedulingContext,
-        Arc<SchedulerPool<Self, DefaultTaskHandler>>,
+        Arc<SchedulerPool<Self, DefaultTaskHandler, DefaultScheduleExecutionArg>>,
     );
 
     impl<const TRIGGER_RACE_CONDITION: bool> AsyncScheduler<TRIGGER_RACE_CONDITION> {
@@ -1434,7 +2247,7 @@ mod tests {
         }
     }
 
-    impl<const TRIGGER_RACE_CONDITION: bool> InstalledScheduler
+    impl<const TRIGGER_RACE_CONDITION: bool> InstalledScheduler<DefaultScheduleExecutionArg>
         for AsyncScheduler<TRIGGER_RACE_CONDITION>
     {
         fn id(&self) -> SchedulerId {
@@ -1445,20 +2258,24 @@ mod tests {
             &self.2
         }
 
-        fn schedule_execution(&self, &(transaction, index): &(&SanitizedTransaction, usize)) {
+        fn schedule_execution(
+            &self,
+            &(transaction, index): &(&SanitizedTransaction, usize),
+        ) -> Result<()> {
             let transaction_and_index = (transaction.clone(), index);
             let context = self.context().clone();
             let pool = self.3.clone();
 
-            self.1.lock().unwrap().push(std::thread::spawn(move || {
+            self.1.lock().unwrap().push(thread::spawn(move || {
                 // intentionally sleep to simulate race condition where register_recent_blockhash
                 // is handle before finishing executing scheduled transactions
-                std::thread::sleep(std::time::Duration::from_secs(1));
+                sleep(Duration::from_secs(1));
 
                 let mut result = Ok(());
                 let mut timings = ExecuteTimings::default();
 
-                <DefaultTaskHandler as TaskHandler>::handle(
+                <DefaultTaskHandler as TaskHandler<DefaultScheduleExecutionArg>>::handle(
+                    &DefaultTaskHandler,
                     &mut result,
                     &mut timings,
                     context.bank(),
@@ -1468,6 +2285,8 @@ mod tests {
                 );
                 (result, timings)
             }));
+
+            Ok(())
         }
 
         fn wait_for_termination(
@@ -1475,7 +2294,7 @@ mod tests {
             _is_dropped: bool,
         ) -> (ResultWithTimings, UninstalledSchedulerBox) {
             self.do_wait();
-            let result_with_timings = std::mem::replace(
+            let result_with_timings = mem::replace(
                 &mut *self.0.lock().unwrap(),
                 initialized_result_with_timings(),
             );
@@ -1500,7 +2319,8 @@ mod tests {
         }
     }
 
-    impl<const TRIGGER_RACE_CONDITION: bool> SpawnableScheduler<DefaultTaskHandler>
+    impl<const TRIGGER_RACE_CONDITION: bool>
+        SpawnableScheduler<DefaultTaskHandler, DefaultScheduleExecutionArg>
         for AsyncScheduler<TRIGGER_RACE_CONDITION>
     {
         // well, i wish i can use ! (never type).....
@@ -1515,8 +2335,9 @@ mod tests {
         }
 
         fn spawn(
-            pool: Arc<SchedulerPool<Self, DefaultTaskHandler>>,
+            pool: Arc<SchedulerPool<Self, DefaultTaskHandler, DefaultScheduleExecutionArg>>,
             initial_context: SchedulingContext,
+            _handler: DefaultTaskHandler,
         ) -> Self {
             AsyncScheduler::<TRIGGER_RACE_CONDITION>(
                 Mutex::new(initialized_result_with_timings()),
@@ -1527,6 +2348,14 @@ mod tests {
         }
     }
 
+    impl<const TRIGGER_RACE_CONDITION: bool> RetirableSchedulerInner
+        for AsyncScheduler<TRIGGER_RACE_CONDITION>
+    {
+        fn retire_if_stale(&mut self) -> bool {
+            unimplemented!();
+        }
+    }
+
     fn do_test_scheduler_schedule_execution_recent_blockhash_edge_case<
         const TRIGGER_RACE_CONDITION: bool,
     >() {
@@ -1556,24 +2385,24 @@ mod tests {
             );
         }
         let bank = setup_dummy_fork_graph(bank);
-        let context = SchedulingContext::new(bank.clone());
+        let context = SchedulingContext::new(SchedulingMode::BlockVerification, bank.clone());
 
         let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64));
-        let pool =
-            SchedulerPool::<AsyncScheduler<TRIGGER_RACE_CONDITION>, DefaultTaskHandler>::new_dyn(
-                None,
-                None,
-                None,
-                None,
-                ignored_prioritization_fee_cache,
-            );
+        let pool = SchedulerPool::<AsyncScheduler<TRIGGER_RACE_CONDITION>, _, _>::new_dyn(
+            None,
+            None,
+            None,
+            None,
+            ignored_prioritization_fee_cache,
+        );
         let scheduler = pool.take_scheduler(context);
 
         let bank = BankWithScheduler::new(bank, Some(scheduler));
         assert_eq!(bank.transaction_count(), 0);
 
         // schedule but not immediately execute transaction
-        bank.schedule_transaction_executions([(&very_old_valid_tx, &0)].into_iter());
+        bank.schedule_transaction_executions([(&very_old_valid_tx, &0)].into_iter())
+            .unwrap();
         // this calls register_recent_blockhash internally
         bank.fill_bank_with_ticks_for_tests();
 
@@ -1648,7 +2477,15 @@ mod tests {
             prioritization_fee_cache,
         };
 
-        DefaultTaskHandler::handle(result, timings, bank, tx, 0, handler_context);
+        <DefaultTaskHandler as TaskHandler<DefaultScheduleExecutionArg>>::handle(
+            &DefaultTaskHandler,
+            result,
+            timings,
+            bank,
+            tx,
+            0,
+            handler_context,
+        );
         assert_matches!(result, Err(TransactionError::AccountLoadedTwice));
     }
 }
diff --git a/validator/src/cli.rs b/validator/src/cli.rs
index 0eed324a9a9d0c..948c32f0e6cb3f 100644
--- a/validator/src/cli.rs
+++ b/validator/src/cli.rs
@@ -1514,7 +1514,6 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
         .arg(
             Arg::with_name("block_verification_method")
                 .long("block-verification-method")
-                .hidden(hidden_unless_forced())
                 .value_name("METHOD")
                 .takes_value(true)
                 .possible_values(BlockVerificationMethod::cli_names())
@@ -1531,7 +1530,6 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
         .arg(
             Arg::with_name("unified_scheduler_handler_threads")
                 .long("unified-scheduler-handler-threads")
-                .hidden(hidden_unless_forced())
                 .value_name("COUNT")
                 .takes_value(true)
                 .validator(|s| is_within_range(s, 1..))