From 50f3241950320d9a1f68e245721bf57ae73221b5 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Sat, 4 Apr 2026 17:00:53 +0200 Subject: [PATCH 1/3] fix: remove references to deleted ConnectionHealth API in tests Tests were using ConnectionHealth enum and connection_health() method that were removed from PeerConnection/Node. Rewrote health tests to use is_connected() and removed stale health ping/pong fields from struct literals. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/constrained_integration.rs | 4 -- tests/simultaneous_connect_dedup.rs | 65 ++++++++++------------------- 2 files changed, 23 insertions(+), 46 deletions(-) diff --git a/tests/constrained_integration.rs b/tests/constrained_integration.rs index 1f55b96b5..2b30b6a14 100644 --- a/tests/constrained_integration.rs +++ b/tests/constrained_integration.rs @@ -756,8 +756,6 @@ fn test_peer_connection_transport_addr() { authenticated: true, connected_at: Instant::now(), last_activity: Instant::now(), - last_health_ping_sent: None, - last_health_pong_received: None, }; assert_eq!(peer_conn_udp.remote_addr, udp_addr); assert_eq!( @@ -776,8 +774,6 @@ fn test_peer_connection_transport_addr() { authenticated: false, connected_at: Instant::now(), last_activity: Instant::now(), - last_health_ping_sent: None, - last_health_pong_received: None, }; assert_eq!(peer_conn_ble.remote_addr, ble_addr); assert_eq!( diff --git a/tests/simultaneous_connect_dedup.rs b/tests/simultaneous_connect_dedup.rs index 09377a22c..b5f8fe0c5 100644 --- a/tests/simultaneous_connect_dedup.rs +++ b/tests/simultaneous_connect_dedup.rs @@ -10,7 +10,7 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use saorsa_transport::{ConnectionHealth, Node}; +use saorsa_transport::Node; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::time::Duration; use tokio::time::timeout; @@ -375,10 +375,9 @@ async fn test_connect_after_failure_succeeds() { // Phase 1.3: Phantom Connection Detection & Recovery Tests // ============================================================================ -/// Test that two connected nodes have healthy connection status after -/// the health check PING/PONG cycle runs. +/// Test that two connected nodes report as connected via is_connected. #[tokio::test] -async fn test_connection_health_status() { +async fn test_connection_status() { let node_a = create_localhost_node().await; let node_b = create_localhost_node().await; @@ -400,36 +399,18 @@ async fn test_connection_health_status() { let conn_addr = remote_socket_addr(&conn); - // Immediately after connect, health should be Healthy (not yet probed) - let health = node_a.connection_health(&conn_addr).await; - assert_eq!( - health, - Some(ConnectionHealth::Healthy), - "Newly connected peer should be Healthy" - ); - - // Wait for at least one health check cycle (30s reaper interval + margin) - // The reaper will send a PING, and the reader task on the remote end - // responds with a PONG. - tokio::time::sleep(Duration::from_secs(35)).await; - - // After one cycle, the peer should still be Healthy (PONG received) - let health_after = node_a.connection_health(&conn_addr).await; + // Immediately after connect, node_a should see the peer as connected assert!( - matches!( - health_after, - Some(ConnectionHealth::Healthy) | Some(ConnectionHealth::Checking) - ), - "After one health cycle, peer should be Healthy or Checking, got {:?}", - health_after + node_a.is_connected(&conn_addr).await, + "Newly connected peer should be reported as connected" ); - // node_a should still have exactly 1 peer (not evicted) + // node_a should have exactly 1 peer let peers = node_a.connected_peers().await; assert_eq!( peers.len(), 1, - "Healthy connection should not be evicted, but got {} peers", + "Should have exactly 1 connected peer, but got {} peers", peers.len() ); @@ -437,21 +418,23 @@ async fn test_connection_health_status() { node_b.shutdown().await; } -/// Test that connection_health returns None for unknown peers. +/// Test that is_connected returns false for unknown peers. #[tokio::test] -async fn test_connection_health_unknown_peer() { +async fn test_connection_status_unknown_peer() { let node = create_localhost_node().await; let unknown_addr: SocketAddr = "127.0.0.1:59999".parse().unwrap(); - let health = node.connection_health(&unknown_addr).await; - assert_eq!(health, None, "Unknown peer should return None"); + assert!( + !node.is_connected(&unknown_addr).await, + "Unknown peer should not be reported as connected" + ); node.shutdown().await; } -/// Test that after disconnect, connection_health returns None. +/// Test that after disconnect, is_connected returns false. #[tokio::test] -async fn test_connection_health_after_disconnect() { +async fn test_connection_status_after_disconnect() { let node_a = create_localhost_node().await; let node_b = create_localhost_node().await; @@ -472,9 +455,9 @@ async fn test_connection_health_after_disconnect() { let conn_addr = remote_socket_addr(&conn); // Verify connected - assert_eq!( - node_a.connection_health(&conn_addr).await, - Some(ConnectionHealth::Healthy), + assert!( + node_a.is_connected(&conn_addr).await, + "Peer should be connected" ); // Disconnect @@ -483,12 +466,10 @@ async fn test_connection_health_after_disconnect() { .await .expect("disconnect should succeed"); - // After disconnect, health should be None - let health = node_a.connection_health(&conn_addr).await; - assert_eq!( - health, None, - "Disconnected peer should return None, got {:?}", - health + // After disconnect, should no longer be connected + assert!( + !node_a.is_connected(&conn_addr).await, + "Disconnected peer should not be reported as connected" ); node_a.shutdown().await; From 56e89b67523baa18de28f4a4d64ae51b19ddf241 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Sat, 4 Apr 2026 17:57:52 +0200 Subject: [PATCH 2/3] perf: tighten network timeouts for faster connection establishment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce connection strategy timeouts based on RTT analysis (300ms worst-case cross-globe RTT). The previous values were overly conservative, causing up to 89s worst-case connection time to unreachable peers. Changes: - Direct connection: 5s → 2s (1-2 RTTs + margin) - Hole-punch round: 15s → 3s (3 RTTs + margin) - Max hole-punch rounds: 3 → 2 - Relay establishment: 30s → 10s - Post-hole-punch direct retry: 3s → 1s (warm NAT pinhole) - Default send ACK timeout: 1s → 500ms - Remove redundant hardcoded 15s deadline in try_hole_punch; outer strategy.holepunch_timeout() is now the single source of truth Worst-case connection path drops from 89s to ~21s. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/config/nat_timeouts.rs | 2 +- src/connection_strategy.rs | 20 ++++++++++---------- src/p2p_endpoint.rs | 23 ++++++++++++----------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/config/nat_timeouts.rs b/src/config/nat_timeouts.rs index 861f7fa13..f07011c27 100644 --- a/src/config/nat_timeouts.rs +++ b/src/config/nat_timeouts.rs @@ -132,7 +132,7 @@ impl Default for RelayTimeouts { } /// Default time to wait for the peer to acknowledge stream data after a send. -const DEFAULT_SEND_ACK_TIMEOUT: Duration = Duration::from_secs(1); +const DEFAULT_SEND_ACK_TIMEOUT: Duration = Duration::from_millis(500); /// Fast-network send ACK timeout. const FAST_SEND_ACK_TIMEOUT: Duration = Duration::from_millis(500); diff --git a/src/connection_strategy.rs b/src/connection_strategy.rs index 11c9bed8b..12e1b4fb7 100644 --- a/src/connection_strategy.rs +++ b/src/connection_strategy.rs @@ -178,11 +178,11 @@ pub struct StrategyConfig { impl Default for StrategyConfig { fn default() -> Self { Self { - ipv4_timeout: Duration::from_secs(5), - ipv6_timeout: Duration::from_secs(5), - holepunch_timeout: Duration::from_secs(15), - relay_timeout: Duration::from_secs(30), - max_holepunch_rounds: 3, + ipv4_timeout: Duration::from_secs(2), + ipv6_timeout: Duration::from_secs(2), + holepunch_timeout: Duration::from_secs(3), + relay_timeout: Duration::from_secs(10), + max_holepunch_rounds: 2, ipv6_enabled: true, relay_enabled: true, coordinator: None, @@ -484,11 +484,11 @@ mod tests { #[test] fn test_default_config() { let config = StrategyConfig::default(); - assert_eq!(config.ipv4_timeout, Duration::from_secs(5)); - assert_eq!(config.ipv6_timeout, Duration::from_secs(5)); - assert_eq!(config.holepunch_timeout, Duration::from_secs(15)); - assert_eq!(config.relay_timeout, Duration::from_secs(30)); - assert_eq!(config.max_holepunch_rounds, 3); + assert_eq!(config.ipv4_timeout, Duration::from_secs(2)); + assert_eq!(config.ipv6_timeout, Duration::from_secs(2)); + assert_eq!(config.holepunch_timeout, Duration::from_secs(3)); + assert_eq!(config.relay_timeout, Duration::from_secs(10)); + assert_eq!(config.max_holepunch_rounds, 2); assert!(config.ipv6_enabled); assert!(config.relay_enabled); } diff --git a/src/p2p_endpoint.rs b/src/p2p_endpoint.rs index 2c1b25f07..ca07f5e9d 100644 --- a/src/p2p_endpoint.rs +++ b/src/p2p_endpoint.rs @@ -87,6 +87,11 @@ const EVENT_CHANNEL_CAPACITY: usize = 256; /// event-driven reader-exit detection. const STALE_REAPER_INTERVAL: Duration = Duration::from_secs(10); +/// Quick direct connection attempt after a failed hole-punch round. +/// If the target's outgoing packets created a NAT binding, a QUIC handshake +/// through the pinhole needs only 1-2 RTTs (~600ms at 300ms worst-case RTT). +const POST_HOLEPUNCH_DIRECT_RETRY_TIMEOUT: Duration = Duration::from_secs(1); + use crate::SHUTDOWN_DRAIN_TIMEOUT; /// Extract the raw SPKI (SubjectPublicKeyInfo) bytes from a QUIC connection's @@ -1544,7 +1549,8 @@ impl P2pEndpoint { // the target's outgoing packets even though our // try_hole_punch didn't detect the connection. if let Ok(Ok(peer_conn)) = - timeout(Duration::from_secs(3), self.connect(target)).await + timeout(POST_HOLEPUNCH_DIRECT_RETRY_TIMEOUT, self.connect(target)) + .await { info!("✓ Post-hole-punch direct connect succeeded to {}", target); return Ok(( @@ -1578,7 +1584,8 @@ impl P2pEndpoint { Err(_) => { // Same: try a quick direct connect after timeout if let Ok(Ok(peer_conn)) = - timeout(Duration::from_secs(3), self.connect(target)).await + timeout(POST_HOLEPUNCH_DIRECT_RETRY_TIMEOUT, self.connect(target)) + .await { info!("✓ Post-hole-punch direct connect succeeded to {}", target); return Ok(( @@ -1816,7 +1823,8 @@ impl P2pEndpoint { // Poll for the connection to appear. The target node will receive // the relayed PUNCH_ME_NOW and initiate a QUIC connection to us, // which gets accepted by saorsa-core's transport handler. - let deadline = tokio::time::Instant::now() + Duration::from_secs(15); + // No internal deadline — the outer strategy.holepunch_timeout() + // cancels this future when it expires. let mut poll_count = 0u32; loop { @@ -1873,7 +1881,7 @@ impl P2pEndpoint { } } - // Wait briefly then re-check, or timeout + // Wait briefly then re-check; the outer timeout cancels us on expiry tokio::select! { _ = self.inner.connection_notify().notified() => { debug!("try_hole_punch: connection_notify fired for {}", target); @@ -1881,13 +1889,6 @@ impl P2pEndpoint { _ = self.shutdown.cancelled() => { return Err(EndpointError::ShuttingDown); } - _ = tokio::time::sleep_until(deadline) => { - info!( - "try_hole_punch: TIMEOUT after 15s for {} (polled {} times)", - target, poll_count - ); - return Err(EndpointError::Timeout); - } // Wake periodically to drive session and re-check connections _ = tokio::time::sleep(Duration::from_millis(500)) => {} } From eb1eb64e0745087834ff35c3ab18fed3def29969 Mon Sep 17 00:00:00 2001 From: Warm Beer Date: Sat, 4 Apr 2026 18:13:37 +0200 Subject: [PATCH 3/3] =?UTF-8?q?fix:=20differentiate=20FAST=5FSEND=5FACK=5F?= =?UTF-8?q?TIMEOUT=20from=20default=20(500ms=20=E2=86=92=20250ms)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After lowering DEFAULT_SEND_ACK_TIMEOUT to 500ms, both constants held the same value, making TimeoutConfig::fast() identical to default for send_ack_timeout. Lower the fast variant to 250ms to match the halved-interval pattern used throughout the fast profile. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/config/nat_timeouts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/nat_timeouts.rs b/src/config/nat_timeouts.rs index f07011c27..ceba2347d 100644 --- a/src/config/nat_timeouts.rs +++ b/src/config/nat_timeouts.rs @@ -134,8 +134,8 @@ impl Default for RelayTimeouts { /// Default time to wait for the peer to acknowledge stream data after a send. const DEFAULT_SEND_ACK_TIMEOUT: Duration = Duration::from_millis(500); -/// Fast-network send ACK timeout. -const FAST_SEND_ACK_TIMEOUT: Duration = Duration::from_millis(500); +/// Fast-network send ACK timeout (halved from default, matching the fast profile pattern). +const FAST_SEND_ACK_TIMEOUT: Duration = Duration::from_millis(250); /// Master timeout configuration #[derive(Debug, Clone, Serialize, Deserialize)]