diff --git a/crates/networking/p2p/sync/full.rs b/crates/networking/p2p/sync/full.rs index 561a09b080..2bc5878b97 100644 --- a/crates/networking/p2p/sync/full.rs +++ b/crates/networking/p2p/sync/full.rs @@ -5,7 +5,7 @@ use std::cmp::min; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use ethrex_blockchain::{ BatchBlockProcessingFailure, Blockchain, @@ -25,6 +25,34 @@ use crate::snap::constants::{MAX_BLOCK_BODIES_TO_REQUEST, MAX_HEADER_FETCH_ATTEM use super::{EXECUTE_BATCH_SIZE, SyncError}; +/// Forkchoice heads older than this (in seconds) trigger a "consensus is behind" +/// warning during sync. A synced consensus client always advertises a head only +/// a few seconds old, so a large age means the consensus client itself is lagging +/// chain head and is the sync bottleneck. +const STALE_FORKCHOICE_HEAD_SECS: u64 = 1800; + +/// Distance (in blocks) below which the node is considered to be following head. +/// Below this we suppress the per-cycle sync-target logging to avoid noise on an +/// already-synced node, which runs a sync cycle on every slot. +const FOLLOW_DISTANCE: u64 = 8; + +/// Render a duration in seconds as a compact human string, e.g. "13d 4h". +fn humanize_secs(secs: u64) -> String { + if secs < 60 { + return "< 1m".to_string(); + } + let days = secs / 86_400; + let hours = (secs % 86_400) / 3_600; + let mins = (secs % 3_600) / 60; + if days > 0 { + format!("{days}d {hours}h") + } else if hours > 0 { + format!("{hours}h {mins}m") + } else { + format!("{mins}m") + } +} + /// Performs full sync cycle - fetches and executes all blocks between current head and sync head /// /// # Returns @@ -50,6 +78,13 @@ pub async fn sync_cycle_full( pending_blocks.insert(0, block); } + // The consensus-provided forkchoice head, captured before `sync_head` is rewound + // over the pending blocks above. Used for sync-target diagnostics so we report the + // actual head rather than the rewound ancestor we end up requesting headers from. + let fcu_head = pending_blocks + .last() + .map(|block| (block.header.number, block.header.timestamp)); + // Request all block headers between the sync head and our local chain // We will begin from the sync head so that we download the latest state first, ensuring we follow the correct chain // This step is not parallelized @@ -60,6 +95,12 @@ pub async fn sync_cycle_full( let mut attempts = 0; + // Tracks whether this cycle started meaningfully behind the consensus-provided + // head, so we can log progress and a final "caught up" message without spamming + // a synced node. Set on the first batch of headers we fetch. + let mut started_behind = false; + let mut sync_target_logged = false; + // Request and store all block headers from the advertised sync head loop { let Some(mut block_headers) = peers @@ -92,6 +133,35 @@ pub async fn sync_cycle_full( first_header.number, last_header.number, ); + + // On the first batch, report the distance to the consensus-provided head and + // warn if that head is stale (a strong signal the consensus client is behind). + if !sync_target_logged { + sync_target_logged = true; + let (target, target_ts) = + fcu_head.unwrap_or((first_header.number, first_header.timestamp)); + let local_head = store.get_latest_block_number().await?; + let behind = target.saturating_sub(local_head); + if behind > FOLLOW_DISTANCE { + started_behind = true; + info!( + "Sync target from consensus forkchoice: block {target} ({behind} blocks ahead of local head {local_head})" + ); + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let age = now.saturating_sub(target_ts); + if age > STALE_FORKCHOICE_HEAD_SECS { + warn!( + "Consensus forkchoice head (block {target}) is {} old. This can happen while the consensus client is still catching up to chain head; \ + if so, execution will only advance as fast as it does. If sync seems slow, it may be worth checking the consensus client's sync status.", + humanize_secs(age) + ); + } + } + } + end_block_number = end_block_number.max(first_header.number); start_block_number = last_header.number; @@ -266,6 +336,15 @@ pub async fn sync_cycle_full( .await?; } + // If this cycle started behind, announce that we've caught up to the head the + // consensus client gave us, so the operator can tell idle-waiting from a hang. + if started_behind { + let local_head = store.get_latest_block_number().await?; + info!( + "Reached consensus-provided head at block {local_head}. Waiting for the next forkchoice update from the consensus client." + ); + } + store.clear_fullsync_headers().await?; Ok(()) } diff --git a/crates/networking/rpc/eth/client.rs b/crates/networking/rpc/eth/client.rs index 6ae2bfeb34..f5ea6a7160 100644 --- a/crates/networking/rpc/eth/client.rs +++ b/crates/networking/rpc/eth/client.rs @@ -58,13 +58,26 @@ impl RpcHandler for Syncing { if context.blockchain.is_synced() { Ok(Value::Bool(!context.blockchain.is_synced())) } else { + let current_block = context.storage.get_latest_block_number().await?; + // `get_last_fcu_head` returns the head *hash* from the last forkchoiceUpdated. + // Resolve it to a block number. If the header isn't canonical yet it may still + // be a pending block whose number we can read; only when neither is available + // (e.g. mid snap-sync, target not downloaded) fall back to the current block + // instead of reporting garbage. + let head_hash = syncer + .get_last_fcu_head() + .map_err(|error| RpcErr::Internal(error.to_string()))?; + let highest_block = match context.storage.get_block_number(head_hash).await? { + Some(number) => number, + None => match context.storage.get_pending_block(head_hash).await? { + Some(block) => block.header.number, + None => current_block, + }, + }; let syncing_status = SyncingStatusRpc { starting_block: context.storage.get_earliest_block_number().await?, - current_block: context.storage.get_latest_block_number().await?, - highest_block: syncer - .get_last_fcu_head() - .map_err(|error| RpcErr::Internal(error.to_string()))? - .to_low_u64_be(), + current_block, + highest_block, }; serde_json::to_value(syncing_status) .map_err(|error| RpcErr::Internal(error.to_string()))