diff --git a/src/endpoint.rs b/src/endpoint.rs index 1291d4f2..99cf2417 100644 --- a/src/endpoint.rs +++ b/src/endpoint.rs @@ -385,9 +385,7 @@ impl Endpoint { .connections .iter() .filter_map(|(_, meta)| { - meta.peer_id - .as_ref() - .map(|pid| hex::encode(&pid.0[..8])) + meta.peer_id.as_ref().map(|pid| hex::encode(&pid.0[..8])) }) .collect(); tracing::warn!( diff --git a/src/nat_traversal_api.rs b/src/nat_traversal_api.rs index e5152058..f039af35 100644 --- a/src/nat_traversal_api.rs +++ b/src/nat_traversal_api.rs @@ -161,7 +161,7 @@ impl TransportCandidate { } } -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, warn}; use std::sync::atomic::{AtomicBool, Ordering}; // Use parking_lot for faster, non-poisoning locks that work better with async code @@ -298,10 +298,6 @@ pub struct NatTraversalEndpoint { /// MASQUE relay server - every node provides relay services (symmetric P2P) /// Per ADR-004: All nodes are equal and participate in relaying with resource budgets relay_server: Option>, - /// Successful candidate pairs discovered via hole punching - /// Maps remote SocketAddr to the validated address that successfully responded - /// Uses DashMap for fine-grained concurrent access without blocking workers - successful_candidates: Arc>, /// Transport candidates received from peers (multi-transport support) /// Maps remote SocketAddr to all known transport candidates for that peer /// Enables routing decisions based on transport type and capabilities @@ -571,34 +567,6 @@ impl BootstrapNode { } } -/// A candidate pair for hole punching (ICE-like) -#[derive(Debug, Clone)] -pub struct CandidatePair { - /// Local candidate address - pub local_candidate: CandidateAddress, - /// Remote candidate address - pub remote_candidate: CandidateAddress, - /// Combined priority for this pair - pub priority: u64, - /// Current state of this candidate pair - pub state: CandidatePairState, -} - -/// State of a candidate pair during hole punching -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CandidatePairState { - /// Waiting to be checked - Waiting, - /// Currently being checked - InProgress, - /// Check succeeded - Succeeded, - /// Check failed - Failed, - /// Cancelled due to higher priority success - Cancelled, -} - /// Active NAT traversal session state #[derive(Debug)] struct NatTraversalSession { @@ -1393,7 +1361,6 @@ impl NatTraversalEndpoint { relay_manager, relay_sessions: Arc::new(dashmap::DashMap::new()), relay_server, - successful_candidates: Arc::new(dashmap::DashMap::new()), transport_candidates: Arc::new(dashmap::DashMap::new()), transport_registry, peer_address_update_rx: TokioMutex::new(peer_addr_rx), @@ -1803,7 +1770,6 @@ impl NatTraversalEndpoint { relay_manager, relay_sessions: Arc::new(dashmap::DashMap::new()), relay_server, - successful_candidates: Arc::new(dashmap::DashMap::new()), transport_candidates: Arc::new(dashmap::DashMap::new()), transport_registry, peer_address_update_rx: TokioMutex::new(peer_addr_rx), @@ -3242,163 +3208,12 @@ impl NatTraversalEndpoint { Ok(connection) } - /// Attempt connection with automatic fallback strategies - /// - /// Connection attempts follow this priority order: - /// 1. **Direct connection** - simple QUIC connect to the target address - /// 2. **Hole punching** - coordinated NAT traversal with candidate discovery - /// 3. **Relay** - last resort via MASQUE through connected peers (symmetric P2P) - /// - /// # Symmetric P2P Relay Strategy - /// When relay is needed: - /// - First try connected peers as relays (any peer can relay) - /// - Fall back to configured relay_nodes (for bootstrap scenarios only) - pub async fn connect_with_fallback( - &self, - server_name: &str, - remote_addr: SocketAddr, - ) -> Result { - // Step 1: Try direct connection first - info!("Attempting direct connection to {}", remote_addr); - match self.connect_to(server_name, remote_addr).await { - Ok(conn) => { - info!("Direct connection to {} succeeded", remote_addr); - return Ok(conn); - } - Err(e) => { - info!( - "Direct connection to {} failed ({:?}), trying hole punching", - remote_addr, e - ); - } - } - - // Step 2: Try hole punching (coordinated NAT traversal) - info!("Attempting hole punching for {}", remote_addr); - match self.attempt_hole_punching(remote_addr) { - Ok(()) => { - // Hole punching succeeded - NAT mappings are established - // Now try to connect again using the discovered path - info!( - "Hole punching succeeded for {}, retrying connection", - remote_addr - ); - - // Get the successful candidate pair address if available - let connect_addr = self - .get_successful_candidate_address(remote_addr) - .unwrap_or(remote_addr); - - match self.connect_to(server_name, connect_addr).await { - Ok(conn) => { - info!("Connection via hole punching to {} succeeded", remote_addr); - return Ok(conn); - } - Err(e) => { - info!( - "Connection after hole punching failed ({:?}), trying relay", - e - ); - } - } - } - Err(e) => { - info!( - "Hole punching for {} failed ({:?}), trying relay", - remote_addr, e - ); - } - } - - // Step 3: Relay is the last resort - info!( - "Attempting relay connection to {} (last resort)", - remote_addr - ); - - // Symmetric P2P: Collect connected peers to use as potential relays - // Any connected peer can provide relay services - // DashMap provides lock-free concurrent access - let connected_peers: Vec = self - .connections - .iter() - .filter(|entry| entry.value().close_reason().is_none()) // Only active connections - .map(|entry| entry.value().remote_address()) - .filter(|addr| *addr != remote_addr) // Don't try to relay through the target - .collect(); - - info!( - "Found {} connected peers to try as relays", - connected_peers.len() - ); - - // Also add configured relay nodes as fallback (for bootstrapping) - let mut relay_candidates: Vec = connected_peers; - if let Some(ref manager) = self.relay_manager { - let configured_relays = manager.available_relays().await; - for relay in configured_relays { - if !relay_candidates.contains(&relay) { - relay_candidates.push(relay); - } - } - } - - if relay_candidates.is_empty() { - return Err(NatTraversalError::ConnectionFailed( - "No connected peers or relay nodes available".to_string(), - )); - } - - // Try each relay in order - let mut last_error = None; - for relay_addr in relay_candidates { - info!("Attempting connection via relay: {}", relay_addr); - - // Establish relay session (CONNECT-UDP Bind) - match self.establish_relay_session(relay_addr).await { - Ok(public_addr) => { - info!( - "Relay session established via {} with public address {:?}", - relay_addr, public_addr - ); - - // Now attempt the connection through the relay - // The relay session is stored and the connection can use datagram forwarding - // For now, we attempt a direct connection to the peer using our relay public address - // The peer should be able to reach us through the relay - - // Try connecting to the peer - the relay will forward our traffic - match self.connect_to(server_name, remote_addr).await { - Ok(conn) => { - info!( - "Connected to {} via relay {} (public addr: {:?})", - remote_addr, relay_addr, public_addr - ); - return Ok(conn); - } - Err(e) => { - warn!( - "Connection via relay {} failed: {:?}, trying next relay", - relay_addr, e - ); - last_error = Some(e); - } - } - } - Err(e) => { - warn!( - "Failed to establish relay session with {}: {:?}", - relay_addr, e - ); - last_error = Some(e); - } - } - } - - Err(last_error.unwrap_or_else(|| { - NatTraversalError::ConnectionFailed("All relay attempts failed".to_string()) - })) - } + // Removed: the duplicate `NatTraversalEndpoint::connect_with_fallback`. + // Production hole-punch fallback lives in + // `crate::p2p_endpoint::P2pEndpoint::connect_with_fallback`, reached via + // `LinkTransport::dial_addr` and the `saorsa-transport` example binary. + // See the tombstone further down this file for the deleted helpers and + // why they could never have worked. /// Get the relay manager for advanced relay operations /// @@ -4820,291 +4635,25 @@ impl NatTraversalEndpoint { Ok(frame) } - #[allow(dead_code)] - fn attempt_hole_punching(&self, target_addr: SocketAddr) -> Result<(), NatTraversalError> { - debug!("Attempting hole punching for {}", target_addr); - - // Get candidate pairs for this target - let candidate_pairs = self.get_candidate_pairs_for_addr(target_addr)?; - - if candidate_pairs.is_empty() { - return Err(NatTraversalError::NoCandidatesFound); - } - - info!( - "Generated {} candidate pairs for hole punching with {}", - candidate_pairs.len(), - target_addr - ); - - // Attempt hole punching with each candidate pair - - self.attempt_quic_hole_punching(target_addr, candidate_pairs) - } - - /// Generate candidate pairs for hole punching based on ICE-like algorithm - #[allow(dead_code)] - fn get_candidate_pairs_for_addr( - &self, - target_addr: SocketAddr, - ) -> Result, NatTraversalError> { - let discovery_session_id = DiscoverySessionId::Remote(target_addr); - - // Get discovered candidates from the discovery manager - // parking_lot::Mutex doesn't poison - let discovery_candidates = { - let discovery = self.discovery_manager.lock(); - discovery.get_candidates(discovery_session_id) - }; - - if discovery_candidates.is_empty() { - return Err(NatTraversalError::NoCandidatesFound); - } - - // Create candidate pairs with priorities (ICE-like pairing) - let mut candidate_pairs = Vec::new(); - let local_candidates = discovery_candidates - .iter() - .filter(|c| matches!(c.source, CandidateSource::Local)) - .collect::>(); - let remote_candidates = discovery_candidates - .iter() - .filter(|c| !matches!(c.source, CandidateSource::Local)) - .collect::>(); - - // Pair each local candidate with each remote candidate - // Skip cross-family pairs (IPv4 ↔ IPv6) as they cannot connect at the socket level - for local in &local_candidates { - for remote in &remote_candidates { - // Cross-family pairs will always fail - skip them - let local_is_v4 = local.address.ip().is_ipv4(); - let remote_is_v4 = remote.address.ip().is_ipv4(); - if local_is_v4 != remote_is_v4 { - trace!( - "Skipping cross-family candidate pair: {} ↔ {}", - local.address, remote.address - ); - continue; - } - - let pair_priority = self.calculate_candidate_pair_priority(local, remote); - candidate_pairs.push(CandidatePair { - local_candidate: (*local).clone(), - remote_candidate: (*remote).clone(), - priority: pair_priority, - state: CandidatePairState::Waiting, - }); - } - } - - // Sort by priority (highest first) - candidate_pairs.sort_by(|a, b| b.priority.cmp(&a.priority)); - - // Limit to reasonable number for initial attempts - candidate_pairs.truncate(8); - - Ok(candidate_pairs) - } - - /// Calculate candidate pair priority using ICE algorithm - #[allow(dead_code)] - fn calculate_candidate_pair_priority( - &self, - local: &CandidateAddress, - remote: &CandidateAddress, - ) -> u64 { - // ICE candidate pair priority formula: min(G,D) * 2^32 + max(G,D) * 2 + (G>D ? 1 : 0) - // Where G is controlling agent priority, D is controlled agent priority - - let local_type_preference = match local.source { - CandidateSource::Local => 126, - CandidateSource::Observed { .. } => 100, - CandidateSource::Predicted => 75, - CandidateSource::Peer => 50, - }; - - let remote_type_preference = match remote.source { - CandidateSource::Local => 126, - CandidateSource::Observed { .. } => 100, - CandidateSource::Predicted => 75, - CandidateSource::Peer => 50, - }; - - // Simplified priority calculation - let local_priority = (local_type_preference as u64) << 8 | local.priority as u64; - let remote_priority = (remote_type_preference as u64) << 8 | remote.priority as u64; - - let min_priority = local_priority.min(remote_priority); - let max_priority = local_priority.max(remote_priority); - - (min_priority << 32) - | (max_priority << 1) - | if local_priority > remote_priority { - 1 - } else { - 0 - } - } - - /// Real QUIC-based hole punching implementation - #[allow(dead_code)] - fn attempt_quic_hole_punching( - &self, - target_addr: SocketAddr, - candidate_pairs: Vec, - ) -> Result<(), NatTraversalError> { - let _endpoint = self.inner_endpoint.as_ref().ok_or_else(|| { - NatTraversalError::ConfigError("QUIC endpoint not initialized".to_string()) - })?; - - for pair in candidate_pairs { - debug!( - "Attempting hole punch with candidate pair: {} -> {}", - pair.local_candidate.address, pair.remote_candidate.address - ); - - // Create PATH_CHALLENGE frame data (8 random bytes) - let mut challenge_data = [0u8; 8]; - for byte in &mut challenge_data { - *byte = rand::random(); - } - - // Create a raw UDP socket bound to the local candidate address - let local_socket = - std::net::UdpSocket::bind(pair.local_candidate.address).map_err(|e| { - NatTraversalError::NetworkError(format!( - "Failed to bind to local candidate: {e}" - )) - })?; - - // Craft a minimal QUIC packet with PATH_CHALLENGE frame - let path_challenge_packet = self.create_path_challenge_packet(challenge_data)?; - - // Send the packet to the remote candidate address - match local_socket.send_to(&path_challenge_packet, pair.remote_candidate.address) { - Ok(bytes_sent) => { - debug!( - "Sent {} bytes for hole punch from {} to {}", - bytes_sent, pair.local_candidate.address, pair.remote_candidate.address - ); - - // Set a short timeout for response - local_socket - .set_read_timeout(Some(Duration::from_millis(100))) - .map_err(|e| { - NatTraversalError::NetworkError(format!("Failed to set timeout: {e}")) - })?; - - // Try to receive a response - let mut response_buffer = [0u8; 1024]; - match local_socket.recv_from(&mut response_buffer) { - Ok((_bytes_received, response_addr)) => { - if response_addr == pair.remote_candidate.address { - info!( - "Hole punch succeeded for {}: {} <-> {}", - target_addr, - pair.local_candidate.address, - pair.remote_candidate.address - ); - - // Store successful candidate pair for connection establishment - self.store_successful_candidate_pair(target_addr, pair)?; - return Ok(()); - } else { - debug!( - "Received response from unexpected address: {}", - response_addr - ); - } - } - Err(e) - if e.kind() == std::io::ErrorKind::WouldBlock - || e.kind() == std::io::ErrorKind::TimedOut => - { - debug!("No response received for hole punch attempt"); - } - Err(e) => { - debug!("Error receiving hole punch response: {}", e); - } - } - } - Err(e) => { - debug!("Failed to send hole punch packet: {}", e); - } - } - } - - // If we get here, all hole punch attempts failed - Err(NatTraversalError::HolePunchingFailed) - } - - /// Create a minimal QUIC packet with PATH_CHALLENGE frame for hole punching - fn create_path_challenge_packet( - &self, - challenge_data: [u8; 8], - ) -> Result, NatTraversalError> { - // Create a minimal QUIC packet structure - // This is a simplified implementation - in production, you'd use proper QUIC packet construction - let mut packet = Vec::new(); - - // QUIC packet header (simplified) - packet.push(0x40); // Short header, fixed bit set - packet.extend_from_slice(&[0, 0, 0, 1]); // Connection ID (simplified) - - // PATH_CHALLENGE frame - packet.push(0x1a); // PATH_CHALLENGE frame type - packet.extend_from_slice(&challenge_data); // 8-byte challenge data - - Ok(packet) - } - - /// Store successful candidate pair for later connection establishment - fn store_successful_candidate_pair( - &self, - target_addr: SocketAddr, - pair: CandidatePair, - ) -> Result<(), NatTraversalError> { - debug!( - "Storing successful candidate pair for {}: {} <-> {}", - target_addr, pair.local_candidate.address, pair.remote_candidate.address - ); - - // Store the successful remote address for use in connection establishment - // DashMap provides lock-free .insert() - self.successful_candidates - .insert(target_addr, pair.remote_candidate.address); - info!( - "Stored successful candidate for {}: {}", - target_addr, pair.remote_candidate.address - ); - - // Emit events to notify the application - if let Some(ref callback) = self.event_callback { - callback(NatTraversalEvent::PathValidated { - remote_address: target_addr, - rtt: Duration::from_millis(50), // Estimated RTT - }); - - callback(NatTraversalEvent::TraversalSucceeded { - remote_address: target_addr, - final_address: pair.remote_candidate.address, - total_time: Duration::from_secs(1), // Estimated total time - }); - } - - Ok(()) - } - - /// Get the successful candidate address for a target (discovered via hole punching) - /// - /// Returns the remote address that successfully responded during hole punching. - /// This address should be used for establishing the actual QUIC connection. - fn get_successful_candidate_address(&self, target_addr: SocketAddr) -> Option { - // DashMap provides lock-free .get() that returns Option> - self.successful_candidates - .get(&target_addr) - .map(|r| *r.value()) - } + // Removed: the dead `attempt_hole_punching` chain + // (`attempt_quic_hole_punching`, `get_candidate_pairs_for_addr`, + // `calculate_candidate_pair_priority`, `create_path_challenge_packet`, + // `store_successful_candidate_pair`, `get_successful_candidate_address`). + // Only ever called from the duplicate + // `NatTraversalEndpoint::connect_with_fallback` (also removed). Could + // not have worked in production: it bound a fresh `std::net::UdpSocket` + // to a port Quinn already owned (UDP binds are exclusive), then sent a + // hand-rolled `0x40 [0,0,0,1] 0x1a <8 random>` byte sequence that is + // not a valid encrypted QUIC packet (any receiver drops it), then + // blocked the async runtime in a 100 ms `recv_from` for a response no + // compliant peer would ever send. The `#[allow(dead_code)]` markers on + // every function disguised this from grep-driven debugging. + // + // Production hole-punch coordination lives in + // `crate::p2p_endpoint::P2pEndpoint::connect_with_fallback_inner`, + // which drives the coordinator-mediated PUNCH_ME_NOW flow whose + // server-side helpers (`send_coordination_request_with_peer_id`, etc.) + // are defined later in this file. /// Attempt connection to a specific candidate address fn attempt_connection_to_candidate( @@ -6058,10 +5607,13 @@ impl NatTraversalEndpoint { let target_wire_id = target_peer_id.unwrap_or_else(|| Self::wire_id_from_addr(target_addr)); info!( "Sending PUNCH_ME_NOW coordination request for {} to coordinator {} (wire_id={}, from_peer_id={}, from_addr={})", - target_addr, coordinator, + target_addr, + coordinator, hex::encode(&target_wire_id[..8]), - target_peer_id.map(|p| hex::encode(&p[..8])).unwrap_or_else(|| "none".to_string()), - !target_peer_id.is_some(), + target_peer_id + .map(|p| hex::encode(&p[..8])) + .unwrap_or_else(|| "none".to_string()), + target_peer_id.is_none(), ); // Get our external address - this is where the target peer should punch to diff --git a/src/p2p_endpoint.rs b/src/p2p_endpoint.rs index a3d7afc9..9c45749b 100644 --- a/src/p2p_endpoint.rs +++ b/src/p2p_endpoint.rs @@ -1822,7 +1822,10 @@ impl P2pEndpoint { if let Some(ref pid) = target_peer_id { info!( "try_hole_punch: calling initiate_nat_traversal({}, {}) with peer ID {} (dashmap key={})", - target, coordinator, hex::encode(&pid[..8]), target + target, + coordinator, + hex::encode(&pid[..8]), + target ); } else { info!(