From 464ce8b793dc0be08b2160f0af3e18bdd446311a Mon Sep 17 00:00:00 2001 From: Chris O'Neil Date: Mon, 13 Apr 2026 14:29:48 +0100 Subject: [PATCH] fix: shard channel saturation causing upload degradation at 1000-node scale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After ~18-20 hours of running a 1000-node testnet with continuous uploads, identity exchange timeouts accumulate and upload times degrade from ~3 min to 13+ min. Root cause: the per-shard inbound message channels (32 slots) fill up because the shard consumer holds write locks across async calls, blocking all 8 consumers. Messages — including identity announces — are dropped, causing 15-second timeouts on every new connection. Fix 1: Increase channel capacity - MESSAGE_RECV_CHANNEL_CAPACITY: 256 -> 2048 (256 per shard vs 32) - MIN_SHARD_CHANNEL_CAPACITY: 16 -> 128 Fix 2: Don't hold write locks across .await - Scope the peer_to_channel write lock to a block that drops immediately after extracting is_new_peer and inserted - The async register_connection_peer_id() call now runs without holding any write locks, eliminating head-of-line blocking across all shards - Trades a microsecond consistency window for dramatically better throughput under sustained load Evidence from testnet rc-17 (1000 nodes, 20+ hours): - Nodes showed 6,000-18,000+ dropped messages (total_drops counter) - Identity exchange timeouts: 119 in a single upload attempt - Upload degradation: 229s -> 527s over 9 hours (hourly averages) - CPU was not saturated (20-39%), confirming lock contention not compute - Bandwidth dropped 40-70% as nodes processed fewer messages Co-Authored-By: Claude Opus 4.6 (1M context) --- src/network.rs | 2 +- src/transport_handle.rs | 39 +++++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/network.rs b/src/network.rs index 02aae27..45ee365 100644 --- a/src/network.rs +++ b/src/network.rs @@ -108,7 +108,7 @@ pub fn is_dht_participant(user_agent: &str) -> bool { } /// Capacity of the internal channel used by the message receiving system. -pub(crate) const MESSAGE_RECV_CHANNEL_CAPACITY: usize = 256; +pub(crate) const MESSAGE_RECV_CHANNEL_CAPACITY: usize = 2048; /// Maximum number of concurrent in-flight request/response operations. pub(crate) const MAX_ACTIVE_REQUESTS: usize = 256; diff --git a/src/transport_handle.rs b/src/transport_handle.rs index 0447b63..020bea2 100644 --- a/src/transport_handle.rs +++ b/src/transport_handle.rs @@ -1722,18 +1722,22 @@ impl TransportHandle { if let Some(ref app_id) = authenticated_node_id && *app_id != self_peer_id { - // Hold `peer_to_channel` across the transport-level - // `register_connection_peer_id` call so the app-level - // map and the transport's internal addr→peer map are - // consistent for any concurrent reader. Without this, - // there is a microsecond window in which a hole-punch - // lookup that depends on the registration can fail - // spuriously even though the app-level map already - // says the peer is known. - let mut p2c = peer_to_channel.write().await; - let is_new_peer = !p2c.contains_key(app_id); - let channels = p2c.entry(*app_id).or_default(); - let inserted = channels.insert(channel_id.clone()); + // Update app-level peer↔channel mappings under write locks, + // then drop all locks before the async transport registration. + // This trades a microsecond consistency window (hole-punch + // lookups may briefly miss a peer that the app-level map + // already knows) for dramatically better throughput under + // load — the previous pattern held a write lock across an + // async call, blocking all 8 shard consumers on every + // authenticated message. + let (is_new_peer, inserted) = { + let mut p2c = peer_to_channel.write().await; + let is_new = !p2c.contains_key(app_id); + let channels = p2c.entry(*app_id).or_default(); + let ins = channels.insert(channel_id.clone()); + (is_new, ins) + }; // p2c lock dropped here + if inserted { channel_to_peers .write() @@ -1743,17 +1747,12 @@ impl TransportHandle { .insert(*app_id); } - // Register peer ID at the low-level transport - // endpoint so PUNCH_ME_NOW relay can find this - // peer by identity instead of socket address. + // Register peer ID at the low-level transport endpoint. + // Now runs without holding any write locks. dual_node_for_peer_reg .register_connection_peer_id(from_addr, *app_id.to_bytes()) .await; - // Drop the lock before emitting events so subscribers - // that re-enter the registry don't deadlock. - drop(p2c); - if is_new_peer { peer_user_agents .write() @@ -1853,7 +1852,7 @@ const MESSAGE_DISPATCH_SHARDS: usize = 8; /// MESSAGE_DISPATCH_SHARDS`, but when that division rounds to something /// too small for healthy bursts we floor it at this value so each shard /// retains a reasonable amount of buffering headroom. -const MIN_SHARD_CHANNEL_CAPACITY: usize = 16; +const MIN_SHARD_CHANNEL_CAPACITY: usize = 128; /// Log a warning every Nth dropped message in the dispatcher. ///