From 8dde0faefbe744dcd6146f4d5b9e0ea2153a7d28 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Thu, 9 Apr 2026 19:44:20 -0300 Subject: [PATCH 01/31] =?UTF-8?q?feat(l1):=20add=20snap=20sync=20observabi?= =?UTF-8?q?lity=20=E2=80=94=20metrics,=20RPC=20endpoints,=20monitor=20diag?= =?UTF-8?q?nostics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add multi-layered observability for snap sync to diagnose intermittent pivot update failures on mainnet: - Prometheus metrics: eligible peers, inflight requests, pivot age, phase tracking, pivot update/storage request/header resolution outcome counters - Admin RPC: admin_peerScores (full peer table with scores, eligibility) and admin_syncStatus (phase, pivot, staleness, recent events) - Log instrumentation: TRACE per-peer dialogue, DEBUG for staleness, pivot updates with peer pool state, error classification - Docker monitor: polls new RPC endpoints, keeps rolling snapshot buffer, dumps on degradation detection with adaptive frequency - REPL: admin.peerScores and admin.syncStatus commands --- crates/blockchain/metrics/api.rs | 8 +- crates/blockchain/metrics/mod.rs | 2 + crates/blockchain/metrics/sync.rs | 154 ++++++++++++++++++ crates/networking/p2p/network.rs | 10 ++ crates/networking/p2p/peer_handler.rs | 26 ++- crates/networking/p2p/peer_table.rs | 59 ++++++- crates/networking/p2p/snap/client.rs | 13 ++ crates/networking/p2p/sync.rs | 75 ++++++++- crates/networking/p2p/sync/snap_sync.rs | 109 ++++++++++++- crates/networking/p2p/sync_manager.rs | 16 +- crates/networking/rpc/admin/mod.rs | 2 +- crates/networking/rpc/admin/peers.rs | 37 +++++ crates/networking/rpc/rpc.rs | 2 + tooling/repl/src/commands/admin.rs | 14 ++ tooling/sync/docker_monitor.py | 205 +++++++++++++++++++++++- 15 files changed, 718 insertions(+), 14 deletions(-) create mode 100644 crates/blockchain/metrics/sync.rs diff --git a/crates/blockchain/metrics/api.rs b/crates/blockchain/metrics/api.rs index bda3b98f735..e57451274ff 100644 --- a/crates/blockchain/metrics/api.rs +++ b/crates/blockchain/metrics/api.rs @@ -2,7 +2,7 @@ use axum::{Router, routing::get}; use crate::{ MetricsApiError, blocks::METRICS_BLOCKS, gather_default_metrics, node::METRICS_NODE, - p2p::METRICS_P2P, process::METRICS_PROCESS, transactions::METRICS_TX, + p2p::METRICS_P2P, process::METRICS_PROCESS, sync::METRICS_SYNC, transactions::METRICS_TX, }; pub async fn start_prometheus_metrics_api( @@ -54,6 +54,12 @@ pub(crate) async fn get_metrics() -> String { Err(_) => tracing::error!("Failed to gather METRICS_P2P"), }; + ret_string.push('\n'); + match METRICS_SYNC.gather_metrics() { + Ok(s) => ret_string.push_str(&s), + Err(_) => tracing::error!("Failed to gather METRICS_SYNC"), + }; + ret_string.push('\n'); if let Some(node_metrics) = METRICS_NODE.get() { match node_metrics.gather_metrics() { diff --git a/crates/blockchain/metrics/mod.rs b/crates/blockchain/metrics/mod.rs index 47f188a09ca..82df7e57065 100644 --- a/crates/blockchain/metrics/mod.rs +++ b/crates/blockchain/metrics/mod.rs @@ -14,6 +14,8 @@ pub mod process; pub mod profiling; #[cfg(feature = "api")] pub mod rpc; +#[cfg(any(feature = "api", feature = "metrics"))] +pub mod sync; #[cfg(any(feature = "api", feature = "transactions"))] pub mod transactions; diff --git a/crates/blockchain/metrics/sync.rs b/crates/blockchain/metrics/sync.rs new file mode 100644 index 00000000000..f51ce96a9e6 --- /dev/null +++ b/crates/blockchain/metrics/sync.rs @@ -0,0 +1,154 @@ +use prometheus::{Encoder, IntCounterVec, IntGauge, Opts, Registry, TextEncoder}; +use std::sync::LazyLock; + +use crate::MetricsError; + +pub static METRICS_SYNC: LazyLock = LazyLock::new(MetricsSync::default); + +#[derive(Debug, Clone)] +pub struct MetricsSync { + // Gauges — current state + eligible_peers: IntGauge, + snap_peers: IntGauge, + inflight_requests: IntGauge, + pivot_age_seconds: IntGauge, + current_phase: IntGauge, + + // Counters — cumulative outcomes + pivot_updates: IntCounterVec, + storage_requests: IntCounterVec, + header_resolution: IntCounterVec, +} + +impl Default for MetricsSync { + fn default() -> Self { + Self::new() + } +} + +impl MetricsSync { + pub fn new() -> Self { + MetricsSync { + eligible_peers: IntGauge::new( + "ethrex_sync_eligible_peers", + "Number of peers eligible for requests (passing can_try_more_requests)", + ) + .expect("Failed to create eligible_peers metric"), + snap_peers: IntGauge::new( + "ethrex_sync_snap_peers", + "Number of connected peers supporting the snap protocol", + ) + .expect("Failed to create snap_peers metric"), + inflight_requests: IntGauge::new( + "ethrex_sync_inflight_requests", + "Total inflight requests across all peers", + ) + .expect("Failed to create inflight_requests metric"), + pivot_age_seconds: IntGauge::new( + "ethrex_sync_pivot_age_seconds", + "Age of the current pivot block in seconds", + ) + .expect("Failed to create pivot_age_seconds metric"), + current_phase: IntGauge::new( + "ethrex_sync_current_phase", + "Current snap sync phase (0=idle, 1=headers, 2=account_ranges, 3=account_insertion, 4=storage_ranges, 5=storage_insertion, 6=healing, 7=bytecodes)", + ) + .expect("Failed to create current_phase metric"), + pivot_updates: IntCounterVec::new( + Opts::new( + "ethrex_sync_pivot_updates_total", + "Total pivot update attempts by outcome", + ), + &["outcome"], + ) + .expect("Failed to create pivot_updates metric"), + storage_requests: IntCounterVec::new( + Opts::new( + "ethrex_sync_storage_requests_total", + "Total storage range requests by outcome", + ), + &["outcome"], + ) + .expect("Failed to create storage_requests metric"), + header_resolution: IntCounterVec::new( + Opts::new( + "ethrex_sync_header_resolution_total", + "Total header resolution attempts by outcome", + ), + &["outcome"], + ) + .expect("Failed to create header_resolution metric"), + } + } + + // --- Gauge setters --- + + pub fn set_eligible_peers(&self, count: i64) { + self.eligible_peers.set(count); + } + + pub fn set_snap_peers(&self, count: i64) { + self.snap_peers.set(count); + } + + pub fn set_inflight_requests(&self, count: i64) { + self.inflight_requests.set(count); + } + + pub fn set_pivot_age_seconds(&self, age: i64) { + self.pivot_age_seconds.set(age); + } + + pub fn set_current_phase(&self, phase: i64) { + self.current_phase.set(phase); + } + + // --- Counter incrementers --- + + pub fn inc_pivot_update(&self, outcome: &str) { + self.pivot_updates.with_label_values(&[outcome]).inc(); + } + + pub fn inc_storage_request(&self, outcome: &str) { + self.storage_requests.with_label_values(&[outcome]).inc(); + } + + pub fn inc_header_resolution(&self, outcome: &str) { + self.header_resolution.with_label_values(&[outcome]).inc(); + } + + // --- Gather --- + + pub fn gather_metrics(&self) -> Result { + let r = Registry::new(); + + r.register(Box::new(self.eligible_peers.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + r.register(Box::new(self.snap_peers.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + r.register(Box::new(self.inflight_requests.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + r.register(Box::new(self.pivot_age_seconds.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + r.register(Box::new(self.current_phase.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + r.register(Box::new(self.pivot_updates.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + r.register(Box::new(self.storage_requests.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + r.register(Box::new(self.header_resolution.clone())) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + + let encoder = TextEncoder::new(); + let metric_families = r.gather(); + + let mut buffer = Vec::new(); + encoder + .encode(&metric_families, &mut buffer) + .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; + + let res = String::from_utf8(buffer)?; + + Ok(res) + } +} diff --git a/crates/networking/p2p/network.rs b/crates/networking/p2p/network.rs index 3fc2bac185f..7d224c7cac3 100644 --- a/crates/networking/p2p/network.rs +++ b/crates/networking/p2p/network.rs @@ -725,6 +725,16 @@ pub async fn periodically_show_peer_stats_after_sync(peer_table: &PeerTable) { }) .count(); info!("Snap Peers: {snap_active_peers} / Total Peers: {active_peers}"); + #[cfg(feature = "metrics")] + { + ethrex_metrics::sync::METRICS_SYNC.set_snap_peers(snap_active_peers as i64); + // Compute eligible peers via diagnostics (which calls can_try_more_requests) + let diag = peer_table.get_peer_diagnostics().await.unwrap_or_default(); + let eligible = diag.iter().filter(|p| p.eligible).count(); + let inflight: i64 = diag.iter().map(|p| p.inflight_requests).sum(); + ethrex_metrics::sync::METRICS_SYNC.set_eligible_peers(eligible as i64); + ethrex_metrics::sync::METRICS_SYNC.set_inflight_requests(inflight); + } interval.tick().await; } } diff --git a/crates/networking/p2p/peer_handler.rs b/crates/networking/p2p/peer_handler.rs index a921bc975a4..915cbf35b10 100644 --- a/crates/networking/p2p/peer_handler.rs +++ b/crates/networking/p2p/peer_handler.rs @@ -1,7 +1,7 @@ use crate::rlpx::initiator::RLPxInitiator; use crate::{ metrics::{CurrentStepValue, METRICS}, - peer_table::{PeerData, PeerTable, PeerTableServerProtocol as _}, + peer_table::{PeerData, PeerDiagnostics, PeerTable, PeerTableServerProtocol as _}, rlpx::{ connection::server::PeerConnection, error::PeerConnectionError, @@ -170,6 +170,16 @@ impl PeerHandler { .get_peer_connections(SUPPORTED_ETH_CAPABILITIES.to_vec()) .await?; + let selected_peers: Vec<_> = peer_connection + .iter() + .take(MAX_PEERS_TO_ASK) + .map(|(id, _)| *id) + .collect(); + debug!( + retry = retries, + peers_selected = ?selected_peers, + "request_block_headers: resolving sync head with peers" + ); for (peer_id, mut connection) in peer_connection.into_iter().take(MAX_PEERS_TO_ASK) { match ask_peer_head_number( peer_id, @@ -183,10 +193,16 @@ impl PeerHandler { Ok(number) => { sync_head_number = number; if number != 0 { + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_header_resolution("found"); break; } + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_header_resolution("unknown"); } Err(err) => { + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_header_resolution("timeout"); debug!( "Sync Log 13: Failed to retrieve sync head block number from peer {peer_id}: {err}" ); @@ -578,6 +594,14 @@ impl PeerHandler { } Ok(None) } + /// Returns diagnostic snapshots for all connected peers (scores, requests, eligibility). + pub async fn read_peer_diagnostics(&self) -> Vec { + self.peer_table + .get_peer_diagnostics() + .await + .unwrap_or_default() + } + /// Returns the PeerData for each connected Peer pub async fn read_connected_peers(&mut self) -> Vec { self.peer_table diff --git a/crates/networking/p2p/peer_table.rs b/crates/networking/p2p/peer_table.rs index 7c33022032f..23f3f841f72 100644 --- a/crates/networking/p2p/peer_table.rs +++ b/crates/networking/p2p/peer_table.rs @@ -187,6 +187,8 @@ pub struct PeerData { score: i64, /// Track the amount of concurrent requests this peer is handling requests: i64, + /// Timestamp (seconds since UNIX epoch) of the last successful response from this peer + pub last_response_time: Option, } impl PeerData { @@ -204,10 +206,25 @@ impl PeerData { connection, score: Default::default(), requests: Default::default(), + last_response_time: None, } } } +/// Diagnostic snapshot of a peer's state, used by admin RPC endpoints. +#[derive(Debug, Clone, serde::Serialize)] +pub struct PeerDiagnostics { + pub peer_id: H256, + pub score: i64, + pub inflight_requests: i64, + pub eligible: bool, + pub capabilities: Vec, + pub ip: IpAddr, + pub client_version: String, + pub connection_direction: String, + pub last_response_time: Option, +} + /// Result of contact validation. #[derive(Debug, Clone)] pub enum ContactValidation { @@ -303,6 +320,7 @@ pub trait PeerTableServerProtocol: Send + Sync { capabilities: Vec, ) -> Response>; fn get_session_info(&self, node_id: H256) -> Response>; + fn get_peer_diagnostics(&self) -> Response>; } #[derive(Debug)] @@ -451,9 +469,14 @@ impl PeerTableServer { msg: peer_table_server_protocol::RecordSuccess, _ctx: &Context, ) { - self.peers - .entry(msg.node_id) - .and_modify(|peer_data| peer_data.score = (peer_data.score + 1).min(MAX_SCORE)); + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + self.peers.entry(msg.node_id).and_modify(|peer_data| { + peer_data.score = (peer_data.score + 1).min(MAX_SCORE); + peer_data.last_response_time = Some(now); + }); } #[send_handler] @@ -808,6 +831,36 @@ impl PeerTableServer { .or_else(|| self.contacts.get(&msg.node_id)?.session.clone()) } + #[request_handler] + async fn handle_get_peer_diagnostics( + &mut self, + _msg: peer_table_server_protocol::GetPeerDiagnostics, + _ctx: &Context, + ) -> Vec { + self.peers + .iter() + .map(|(id, peer_data)| PeerDiagnostics { + peer_id: *id, + score: peer_data.score, + inflight_requests: peer_data.requests, + eligible: self.can_try_more_requests(&peer_data.score, &peer_data.requests), + capabilities: peer_data + .supported_capabilities + .iter() + .map(|c| format!("{}/{}", c.protocol(), c.version)) + .collect(), + ip: peer_data.node.ip, + client_version: peer_data.node.version.clone().unwrap_or_default(), + connection_direction: if peer_data.is_connection_inbound { + "inbound".to_string() + } else { + "outbound".to_string() + }, + last_response_time: peer_data.last_response_time, + }) + .collect() + } + // === Private helper methods === // Weighting function used to select best peer diff --git a/crates/networking/p2p/snap/client.rs b/crates/networking/p2p/snap/client.rs index 927032b0c41..08e01f70be2 100644 --- a/crates/networking/p2p/snap/client.rs +++ b/crates/networking/p2p/snap/client.rs @@ -98,6 +98,7 @@ pub async fn request_account_range( account_state_snapshots_dir: &Path, pivot_header: &mut BlockHeader, block_sync_state: &mut SnapBlockSyncState, + diagnostics: &std::sync::Arc>, ) -> Result<(), SnapError> { METRICS .current_step @@ -256,6 +257,7 @@ pub async fn request_account_range( pivot_header.timestamp, peers, block_sync_state, + diagnostics, ) .await .expect("Should be able to update pivot") @@ -1277,6 +1279,7 @@ async fn request_storage_ranges_worker( limit_hash: task.end_hash.unwrap_or(HASH_MAX), response_bytes: MAX_RESPONSE_BYTES, }); + tracing::trace!(peer_id = %peer_id, msg_type = "GetStorageRanges", "Sending storage range request"); let Ok(RLPxMessage::StorageRanges(StorageRanges { id: _, slots, @@ -1287,11 +1290,17 @@ async fn request_storage_ranges_worker( .outgoing_request(request, PEER_REPLY_TIMEOUT) .await else { + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_storage_request("timeout"); + tracing::trace!(peer_id = %peer_id, msg_type = "GetStorageRanges", outcome = "timeout", "Storage range request failed"); tracing::debug!("Failed to get storage range"); tx.send(empty_task_result).await.ok(); return Ok(()); }; if slots.is_empty() && proof.is_empty() { + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_storage_request("empty"); + tracing::trace!(peer_id = %peer_id, msg_type = "StorageRanges", outcome = "empty", "Storage range response empty"); tx.send(empty_task_result).await.ok(); tracing::debug!("Received empty storage range"); return Ok(()); @@ -1387,6 +1396,7 @@ async fn request_storage_ranges_worker( } else { (start + account_storages.len(), end, H256::zero()) }; + let slot_count: usize = account_storages.iter().map(|s| s.len()).sum(); let task_result = StorageTaskResult { start_index: start, account_storages, @@ -1395,6 +1405,9 @@ async fn request_storage_ranges_worker( remaining_end, remaining_hash_range: (remaining_start_hash, task.end_hash), }; + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_storage_request("success"); + tracing::trace!(peer_id = %peer_id, msg_type = "StorageRanges", outcome = "success", slots = slot_count, "Storage range response received"); tx.send(task_result).await.ok(); Ok::<(), SnapError>(()) } diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 8f1ff36fcf1..95a6e9f809c 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -29,7 +29,7 @@ use std::sync::{ use tokio::sync::mpsc::error::SendError; use tokio::time::Instant; use tokio_util::sync::CancellationToken; -use tracing::{error, info}; +use tracing::{debug, error, info}; // Re-export types used by submodules pub use snap_sync::{ @@ -53,6 +53,56 @@ pub enum SyncMode { Snap, } +/// Diagnostic snapshot of the sync state, used by admin RPC endpoints. +#[derive(Debug, Clone, Default, serde::Serialize)] +pub struct SyncDiagnostics { + pub sync_mode: String, + pub current_phase: String, + pub pivot_block_number: Option, + pub pivot_timestamp: Option, + pub pivot_age_seconds: Option, + pub staleness_threshold_seconds: u64, + pub phase_progress: std::collections::HashMap, + pub recent_pivot_changes: std::collections::VecDeque, + pub recent_errors: std::collections::VecDeque, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct PivotChangeEvent { + pub timestamp: u64, + pub old_pivot_number: u64, + pub new_pivot_number: u64, + pub outcome: String, + pub failure_reason: Option, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct SyncErrorEvent { + pub timestamp: u64, + pub error_type: String, + pub error_message: String, + pub recoverable: bool, +} + +impl SyncDiagnostics { + const MAX_PIVOT_CHANGES: usize = 10; + const MAX_ERRORS: usize = 20; + + pub fn push_pivot_change(&mut self, event: PivotChangeEvent) { + if self.recent_pivot_changes.len() >= Self::MAX_PIVOT_CHANGES { + self.recent_pivot_changes.pop_front(); + } + self.recent_pivot_changes.push_back(event); + } + + pub fn push_error(&mut self, event: SyncErrorEvent) { + if self.recent_errors.len() >= Self::MAX_ERRORS { + self.recent_errors.pop_front(); + } + self.recent_errors.push_back(event); + } +} + /// Manager in charge the sync process #[derive(Debug)] pub struct Syncer { @@ -66,6 +116,7 @@ pub struct Syncer { /// This string indicates a folder where the snap algorithm will store temporary files that are /// used during the syncing process datadir: PathBuf, + diagnostics: Arc>, } impl Syncer { @@ -75,6 +126,7 @@ impl Syncer { cancel_token: CancellationToken, blockchain: Arc, datadir: PathBuf, + diagnostics: Arc>, ) -> Self { Self { snap_enabled, @@ -82,6 +134,7 @@ impl Syncer { cancel_token, blockchain, datadir, + diagnostics, } } @@ -97,6 +150,7 @@ impl Syncer { let start_time = Instant::now(); match self.sync_cycle(sync_head, store).await { Ok(()) => { + self.diagnostics.write().await.current_phase = "idle".to_string(); info!( time_elapsed_s = start_time.elapsed().as_secs(), %sync_head, @@ -106,7 +160,23 @@ impl Syncer { // If the error is irrecoverable, we exit ethrex Err(error) => { - match error.is_recoverable() { + let recoverable = error.is_recoverable(); + debug!( + error_type = %error, + recoverable = recoverable, + action = if recoverable { "retry" } else { "exit" }, + "Sync cycle error classification" + ); + self.diagnostics.write().await.push_error(SyncErrorEvent { + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + error_type: format!("{:?}", std::mem::discriminant(&error)), + error_message: error.to_string(), + recoverable, + }); + match recoverable { false => { // We exit the node, as we can't recover this error error!( @@ -144,6 +214,7 @@ impl Syncer { sync_head, store, &self.datadir, + &self.diagnostics, ) .await; METRICS.disable().await; diff --git a/crates/networking/p2p/sync/snap_sync.rs b/crates/networking/p2p/sync/snap_sync.rs index 519f1c73389..8755563c261 100644 --- a/crates/networking/p2p/sync/snap_sync.rs +++ b/crates/networking/p2p/sync/snap_sync.rs @@ -105,6 +105,7 @@ pub async fn sync_cycle_snap( sync_head: H256, store: Store, datadir: &Path, + diagnostics: &Arc>, ) -> Result<(), SyncError> { // Request all block headers between the current head and the sync head // We will begin from the current head so that we download the earliest state first @@ -241,7 +242,7 @@ pub async fn sync_cycle_snap( }; } - snap_sync(peers, &store, &mut block_sync_state, datadir).await?; + snap_sync(peers, &store, &mut block_sync_state, datadir, diagnostics).await?; store.clear_snap_state().await?; snap_enabled.store(false, Ordering::Relaxed); @@ -255,6 +256,7 @@ pub async fn snap_sync( store: &Store, block_sync_state: &mut SnapBlockSyncState, datadir: &Path, + diagnostics: &Arc>, ) -> Result<(), SyncError> { // snap-sync: launch tasks to fetch blocks and state in parallel // - Fetch each block's body and its receipt via eth p2p requests @@ -274,6 +276,7 @@ pub async fn snap_sync( pivot_header.timestamp, peers, block_sync_state, + diagnostics, ) .await?; } @@ -281,6 +284,17 @@ pub async fn snap_sync( "Selected block {} as pivot for snap sync", pivot_header.number ); + { + let mut diag = diagnostics.write().await; + diag.pivot_block_number = Some(pivot_header.number); + diag.pivot_timestamp = Some(pivot_header.timestamp); + let pivot_age = current_unix_time().saturating_sub(pivot_header.timestamp); + diag.pivot_age_seconds = Some(pivot_age); + diag.staleness_threshold_seconds = (SNAP_LIMIT as u64) * SECONDS_PER_BLOCK; + diag.sync_mode = "snap".to_string(); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_pivot_age_seconds(pivot_age as i64); + } let state_root = pivot_header.state_root; let account_state_snapshots_dir = get_account_state_snapshots_dir(datadir); @@ -303,6 +317,9 @@ pub async fn snap_sync( // The function request_account_range writes the leafs into files in // account_state_snapshots_dir + diagnostics.write().await.current_phase = "account_ranges".to_string(); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_current_phase(2); info!("Starting to download account ranges from peers"); request_account_range( peers, @@ -311,10 +328,23 @@ pub async fn snap_sync( account_state_snapshots_dir.as_ref(), &mut pivot_header, block_sync_state, + diagnostics, ) .await?; info!("Finish downloading account ranges from peers"); + { + let mut diag = diagnostics.write().await; + diag.current_phase = "account_insertion".to_string(); + diag.phase_progress.insert( + "account_ranges_downloaded".to_string(), + METRICS + .downloaded_account_tries + .load(std::sync::atomic::Ordering::Relaxed), + ); + } + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_current_phase(3); *METRICS.account_tries_insert_start_time.lock().await = Some(SystemTime::now()); METRICS .current_step @@ -341,6 +371,9 @@ pub async fn snap_sync( info!("Original state root: {state_root:?}"); info!("Computed state root after request_account_rages: {computed_state_root:?}"); + diagnostics.write().await.current_phase = "storage_ranges".to_string(); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_current_phase(4); *METRICS.storage_tries_download_start_time.lock().await = Some(SystemTime::now()); // We start downloading the storage leafs. To do so, we need to be sure that the storage root // is correct. To do so, we always heal the state trie before requesting storage rates @@ -354,6 +387,7 @@ pub async fn snap_sync( pivot_header.timestamp, peers, block_sync_state, + diagnostics, ) .await?; } @@ -429,6 +463,9 @@ pub async fn snap_sync( info!("Finished request_storage_ranges"); *METRICS.storage_tries_download_end_time.lock().await = Some(SystemTime::now()); + diagnostics.write().await.current_phase = "storage_insertion".to_string(); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_current_phase(5); *METRICS.storage_tries_insert_start_time.lock().await = Some(SystemTime::now()); METRICS .current_step @@ -448,6 +485,9 @@ pub async fn snap_sync( info!("Finished storing storage tries"); } + diagnostics.write().await.current_phase = "healing".to_string(); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_current_phase(6); *METRICS.heal_start_time.lock().await = Some(SystemTime::now()); info!("Starting Healing Process"); let mut global_state_leafs_healed: u64 = 0; @@ -461,6 +501,7 @@ pub async fn snap_sync( pivot_header.timestamp, peers, block_sync_state, + diagnostics, ) .await?; } @@ -506,6 +547,9 @@ pub async fn snap_sync( let mut seen_code_hashes = HashSet::new(); let mut code_hashes_to_download = Vec::new(); + diagnostics.write().await.current_phase = "bytecodes".to_string(); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_current_phase(7); info!("Starting download code hashes from peers"); for entry in std::fs::read_dir(&code_hashes_dir) .map_err(|_| SyncError::CodeHashesSnapshotsDirNotFound)? @@ -635,6 +679,7 @@ pub async fn update_pivot( block_timestamp: u64, peers: &mut PeerHandler, block_sync_state: &mut SnapBlockSyncState, + diagnostics: &Arc>, ) -> Result { const MAX_RETRIES_PER_PEER: u64 = 3; const MAX_TOTAL_FAILURES: u64 = 15; @@ -656,6 +701,18 @@ pub async fn update_pivot( loop { if total_failures >= MAX_TOTAL_FAILURES { + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_pivot_update("max_failures"); + diagnostics + .write() + .await + .push_pivot_change(super::PivotChangeEvent { + timestamp: current_unix_time(), + old_pivot_number: block_number, + new_pivot_number: new_pivot_block_number, + outcome: "max_failures".to_string(), + failure_reason: Some(format!("Exhausted {MAX_TOTAL_FAILURES} total failures")), + }); return Err(SyncError::PeerHandler( crate::peer_handler::PeerHandlerError::BlockHeaders, )); @@ -679,6 +736,8 @@ pub async fn update_pivot( .await? else { debug!("We tried to get peers during update_pivot, but we found no free peers"); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_pivot_update("no_peers"); consecutive_failures = consecutive_failures.saturating_add(1); total_failures = total_failures.saturating_add(1); continue; @@ -701,6 +760,18 @@ pub async fn update_pivot( } let peer_score = peers.peer_table.get_score(peer_id).await?; + let diag = peers.read_peer_diagnostics().await; + let eligible_count = diag.iter().filter(|p| p.eligible).count(); + let total_count = diag.len(); + debug!( + eligible_peers = eligible_count, + total_peers = total_count, + selected_peer = %peer_id, + peer_score = peer_score, + consecutive_failures = consecutive_failures, + total_failures = total_failures, + "update_pivot: attempting with peer" + ); info!( "Trying to update pivot to {new_pivot_block_number} with peer {peer_id} (score: {peer_score})" ); @@ -714,6 +785,8 @@ pub async fn update_pivot( warn!( "Received None pivot from peer {peer_id} (score after penalizing: {peer_score}). Retrying" ); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_pivot_update("peer_none"); last_failed_peer = Some(peer_id); consecutive_failures = consecutive_failures.saturating_add(1); total_failures = total_failures.saturating_add(1); @@ -722,7 +795,25 @@ pub async fn update_pivot( // Reward peer peers.peer_table.record_success(peer_id)?; + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.inc_pivot_update("success"); info!("Succesfully updated pivot"); + { + let mut diag = diagnostics.write().await; + diag.push_pivot_change(super::PivotChangeEvent { + timestamp: current_unix_time(), + old_pivot_number: block_number, + new_pivot_number: pivot.number, + outcome: "success".to_string(), + failure_reason: None, + }); + diag.pivot_block_number = Some(pivot.number); + diag.pivot_timestamp = Some(pivot.timestamp); + let pivot_age = current_unix_time().saturating_sub(pivot.timestamp); + diag.pivot_age_seconds = Some(pivot_age); + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_pivot_age_seconds(pivot_age as i64); + } let block_headers = peers .request_block_headers(block_number + 1, pivot.hash()) .await? @@ -736,7 +827,21 @@ pub async fn update_pivot( } pub fn block_is_stale(block_header: &BlockHeader) -> bool { - calculate_staleness_timestamp(block_header.timestamp) < current_unix_time() + let threshold = calculate_staleness_timestamp(block_header.timestamp); + let now = current_unix_time(); + let is_stale = threshold < now; + if is_stale { + let pivot_age = now.saturating_sub(block_header.timestamp); + let staleness_limit = (SNAP_LIMIT as u64) * SECONDS_PER_BLOCK; + debug!( + pivot_number = block_header.number, + pivot_timestamp = block_header.timestamp, + pivot_age_seconds = pivot_age, + staleness_threshold_seconds = staleness_limit, + "Pivot block detected as stale" + ); + } + is_stale } pub fn calculate_staleness_timestamp(timestamp: u64) -> u64 { diff --git a/crates/networking/p2p/sync_manager.rs b/crates/networking/p2p/sync_manager.rs index 59e126f8963..5bc2e4462cf 100644 --- a/crates/networking/p2p/sync_manager.rs +++ b/crates/networking/p2p/sync_manager.rs @@ -18,7 +18,7 @@ use tracing::{error, info, warn}; use crate::{ peer_handler::PeerHandler, - sync::{SyncMode, Syncer}, + sync::{SyncDiagnostics, SyncMode, Syncer}, }; /// Abstraction to interact with the active sync process without disturbing it @@ -30,6 +30,7 @@ pub struct SyncManager { syncer: Arc>, last_fcu_head: Arc>, store: Store, + diagnostics: Arc>, } impl SyncManager { @@ -76,18 +77,21 @@ impl SyncManager { } } + let diagnostics = Arc::new(tokio::sync::RwLock::new(SyncDiagnostics::default())); let syncer = Arc::new(Mutex::new(Syncer::new( peer_handler, snap_enabled.clone(), cancel_token, blockchain, datadir, + diagnostics.clone(), ))); let sync_manager = Self { snap_enabled, syncer, last_fcu_head: Arc::new(Mutex::new(H256::zero())), store: store.clone(), + diagnostics, }; // If the node was in the middle of a sync and then re-started we must resume syncing // Otherwise we will incorreclty assume the node is already synced and work on invalid state @@ -120,6 +124,16 @@ impl SyncManager { self.snap_enabled.store(false, Ordering::Relaxed); } + /// Returns a snapshot of the current sync diagnostics. + pub async fn get_sync_diagnostics(&self) -> SyncDiagnostics { + self.diagnostics.read().await.clone() + } + + /// Returns a reference to the diagnostics RwLock for updating from the sync code. + pub fn diagnostics(&self) -> &Arc> { + &self.diagnostics + } + /// Updates the last fcu head. This may be used on the next sync cycle if needed fn set_head(&self, fcu_head: H256) { if let Ok(mut latest_fcu_head) = self.last_fcu_head.try_lock() { diff --git a/crates/networking/rpc/admin/mod.rs b/crates/networking/rpc/admin/mod.rs index 16e16c6535c..2f3668a6176 100644 --- a/crates/networking/rpc/admin/mod.rs +++ b/crates/networking/rpc/admin/mod.rs @@ -10,7 +10,7 @@ use crate::{ utils::{RpcErr, RpcRequest}, }; mod peers; -pub use peers::{add_peer, peers}; +pub use peers::{add_peer, peer_scores, peers, sync_status}; #[derive(Serialize, Debug)] #[serde(rename_all = "camelCase")] diff --git a/crates/networking/rpc/admin/peers.rs b/crates/networking/rpc/admin/peers.rs index 795229f5a94..2ee28fbe72b 100644 --- a/crates/networking/rpc/admin/peers.rs +++ b/crates/networking/rpc/admin/peers.rs @@ -152,6 +152,43 @@ async fn peer_is_connected(peer_handler: &mut PeerHandler, enode_url: &str) -> b .any(|peer| peer.node.enode_url() == *enode_url) } +pub async fn peer_scores(context: &mut RpcApiContext) -> Result { + let Some(peer_handler) = &context.peer_handler else { + return Err(RpcErr::Internal("Peer handler not initialized".to_string())); + }; + + let diagnostics = peer_handler.read_peer_diagnostics().await; + let total = diagnostics.len(); + let eligible = diagnostics.iter().filter(|p| p.eligible).count(); + let avg_score = if total > 0 { + diagnostics.iter().map(|p| p.score).sum::() / total as i64 + } else { + 0 + }; + let total_inflight: i64 = diagnostics.iter().map(|p| p.inflight_requests).sum(); + + let response = serde_json::json!({ + "peers": diagnostics, + "summary": { + "total_peers": total, + "eligible_peers": eligible, + "average_score": avg_score, + "total_inflight_requests": total_inflight, + } + }); + + Ok(response) +} + +pub async fn sync_status(context: &mut RpcApiContext) -> Result { + let Some(syncer) = &context.syncer else { + return Err(RpcErr::Internal("Sync manager not initialized".to_string())); + }; + + let diag = syncer.get_sync_diagnostics().await; + serde_json::to_value(diag).map_err(|e| RpcErr::Internal(e.to_string())) +} + // TODO: Adapt the test to the new P2P architecture. #[cfg(test)] mod tests { diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index b50efa05de7..d2465fe6c09 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -890,6 +890,8 @@ pub async fn map_admin_requests( match req.method.as_str() { "admin_nodeInfo" => admin::node_info(context.storage, &context.node_data).await, "admin_peers" => admin::peers(&mut context).await, + "admin_peerScores" => admin::peer_scores(&mut context).await, + "admin_syncStatus" => admin::sync_status(&mut context).await, "admin_setLogLevel" => admin::set_log_level(req, &context.log_filter_handler), "admin_addPeer" => admin::add_peer(&mut context, req).await, unknown_admin_method => Err(RpcErr::MethodNotFound(unknown_admin_method.to_owned())), diff --git a/tooling/repl/src/commands/admin.rs b/tooling/repl/src/commands/admin.rs index f1ae2c7d599..e6a79a9dad6 100644 --- a/tooling/repl/src/commands/admin.rs +++ b/tooling/repl/src/commands/admin.rs @@ -48,5 +48,19 @@ pub fn commands() -> Vec { params: ENODE, description: "Adds a peer by enode URL", }, + CommandDef { + namespace: "admin", + name: "peerScores", + rpc_method: "admin_peerScores", + params: NO_PARAMS, + description: "Returns peer diagnostics: scores, inflight requests, eligibility", + }, + CommandDef { + namespace: "admin", + name: "syncStatus", + rpc_method: "admin_syncStatus", + params: NO_PARAMS, + description: "Returns sync diagnostics: phase, pivot, staleness, recent events", + }, ] } diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index 0ab1c4254c0..cd5327cc2b4 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -61,6 +61,191 @@ } +# Diagnostics polling configuration +DIAGNOSTICS_NORMAL_INTERVAL = 30 # seconds between polls during normal operation +DIAGNOSTICS_DEGRADED_INTERVAL = 5 # seconds between polls during degradation +DIAGNOSTICS_NORMAL_BUFFER_SIZE = 20 # snapshots kept in normal mode +DIAGNOSTICS_DEGRADED_BUFFER_SIZE = 60 # snapshots kept in degraded mode +DEGRADATION_ELIGIBLE_PEERS_THRESHOLD = 5 # trigger if eligible peers below this +DEGRADATION_STALL_TIMEOUT = 60 # trigger if zero progress for this many seconds +DEGRADATION_STALENESS_RATIO = 0.8 # trigger if pivot age > 80% of threshold +DEGRADATION_RECOVERY_TIMEOUT = 60 # seconds of health before leaving degraded mode + + +class DiagnosticsTracker: + """Polls admin_peerScores and admin_syncStatus, keeps rolling buffer, dumps on degradation.""" + + def __init__(self, instances: list): + self.instances = instances + self.buffers: dict[str, list[dict]] = {inst.name: [] for inst in instances} + self.degraded: dict[str, bool] = {inst.name: False for inst in instances} + self.degraded_since: dict[str, float] = {inst.name: 0 for inst in instances} + self.healthy_since: dict[str, float] = {inst.name: 0 for inst in instances} + self.last_poll: dict[str, float] = {inst.name: 0 for inst in instances} + self.events: list[dict] = [] # degradation events across all networks + self.dumped_for_run: dict[str, bool] = {inst.name: False for inst in instances} + self._last_progress: dict[str, Optional[str]] = {inst.name: None for inst in instances} + + def poll_interval(self, name: str) -> float: + return DIAGNOSTICS_DEGRADED_INTERVAL if self.degraded[name] else DIAGNOSTICS_NORMAL_INTERVAL + + def buffer_limit(self, name: str) -> int: + return DIAGNOSTICS_DEGRADED_BUFFER_SIZE if self.degraded[name] else DIAGNOSTICS_NORMAL_BUFFER_SIZE + + def should_poll(self, name: str) -> bool: + return (time.time() - self.last_poll[name]) >= self.poll_interval(name) + + def poll(self, inst) -> None: + """Poll diagnostics RPC endpoints for a single instance.""" + if inst.status in ("success", "failed", "waiting"): + return + if not self.should_poll(inst.name): + return + + self.last_poll[inst.name] = time.time() + peer_scores = rpc_call(inst.rpc_url, "admin_peerScores") + sync_status = rpc_call(inst.rpc_url, "admin_syncStatus") + + if peer_scores is None and sync_status is None: + return # node not reachable, skip + + snapshot = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "epoch": time.time(), + "peer_scores": peer_scores, + "sync_status": sync_status, + } + + buf = self.buffers[inst.name] + buf.append(snapshot) + # Trim buffer to limit + limit = self.buffer_limit(inst.name) + while len(buf) > limit: + buf.pop(0) + + self._check_degradation(inst, snapshot) + + def _check_degradation(self, inst, snapshot: dict) -> None: + """Check for degradation conditions and trigger dump if needed.""" + now = time.time() + name = inst.name + reasons = [] + + # Check eligible peers + if snapshot.get("peer_scores") and isinstance(snapshot["peer_scores"], dict): + summary = snapshot["peer_scores"].get("summary", {}) + eligible = summary.get("eligible_peers", 999) + if eligible < DEGRADATION_ELIGIBLE_PEERS_THRESHOLD: + reasons.append(f"eligible_peers={eligible}") + + # Check sync progress stall + if snapshot.get("sync_status") and isinstance(snapshot["sync_status"], dict): + phase = snapshot["sync_status"].get("current_phase", "idle") + progress_key = str(snapshot["sync_status"].get("phase_progress", {})) + if phase not in ("idle", ""): + if self._last_progress[name] is not None and self._last_progress[name] == progress_key: + # No progress change — but we only flag after DEGRADATION_STALL_TIMEOUT + pass # tracked by the outer loop via last_block + self._last_progress[name] = progress_key + + # Check staleness ratio + pivot_age = snapshot["sync_status"].get("pivot_age_seconds") + threshold = snapshot["sync_status"].get("staleness_threshold_seconds", 0) + if pivot_age and threshold and threshold > 0: + ratio = pivot_age / threshold + if ratio > DEGRADATION_STALENESS_RATIO: + reasons.append(f"staleness_ratio={ratio:.2f}") + + if reasons: + if not self.degraded[name]: + self.degraded[name] = True + self.degraded_since[name] = now + self.healthy_since[name] = 0 + event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "network": name, + "event_type": "degradation_start", + "reasons": reasons, + "eligible_peers": snapshot.get("peer_scores", {}).get("summary", {}).get("eligible_peers"), + "phase": snapshot.get("sync_status", {}).get("current_phase"), + } + self.events.append(event) + print(f"⚠️ [{name}] Degradation detected: {', '.join(reasons)} — increasing poll frequency") + # Dump snapshots on degradation + if not self.dumped_for_run.get(name): + self._dump_snapshots(name) + else: + # Healthy — check if we can exit degraded mode + if self.degraded[name]: + if self.healthy_since[name] == 0: + self.healthy_since[name] = now + elif (now - self.healthy_since[name]) >= DEGRADATION_RECOVERY_TIMEOUT: + self.degraded[name] = False + self.healthy_since[name] = 0 + event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "network": name, + "event_type": "degradation_end", + } + self.events.append(event) + print(f"✅ [{name}] Degradation resolved — resuming normal poll frequency") + + def on_failure(self, name: str) -> None: + """Called when a network fails — dump snapshots if not already dumped.""" + if not self.dumped_for_run.get(name): + self._dump_snapshots(name) + event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "network": name, + "event_type": "failure", + } + self.events.append(event) + + def _dump_snapshots(self, name: str) -> None: + """Dump the rolling buffer to disk.""" + self.dumped_for_run[name] = True + buf = self.buffers[name] + if not buf: + return + # Find the current run's log directory + run_dirs = sorted(LOGS_DIR.glob("run_*"), key=lambda p: p.name, reverse=True) + if not run_dirs: + return + out_path = run_dirs[0] / f"{name}_peer_snapshots.json" + try: + import json + out_path.write_text(json.dumps(buf, indent=2, default=str)) + print(f"📸 [{name}] Dumped {len(buf)} diagnostic snapshots to {out_path}") + except Exception as e: + print(f"⚠️ [{name}] Failed to dump snapshots: {e}") + + def format_degradation_events(self) -> str: + """Format degradation events for the summary.txt.""" + if not self.events: + return "" + lines = ["\n Degradation Events:"] + for ev in self.events: + ts = ev["timestamp"] + net = ev.get("network", "?") + evt = ev.get("event_type", "?") + reasons = ev.get("reasons", []) + detail = f" ({', '.join(reasons)})" if reasons else "" + lines.append(f" {ts} [{net}] {evt}{detail}") + return "\n".join(lines) + + def reset(self) -> None: + """Reset state for a new run.""" + for name in self.buffers: + self.buffers[name] = [] + self.degraded[name] = False + self.degraded_since[name] = 0 + self.healthy_since[name] = 0 + self.last_poll[name] = 0 + self.dumped_for_run[name] = False + self._last_progress[name] = None + self.events = [] + + @dataclass class Instance: name: str @@ -421,7 +606,7 @@ def save_all_logs(instances: list[Instance], run_id: str, compose_file: str): print(f"📁 Logs saved to {LOGS_DIR}/run_{run_id}/\n") -def log_run_result(run_id: str, run_count: int, instances: list[Instance], hostname: str, branch: str, commit: str, build_profile: str = ""): +def log_run_result(run_id: str, run_count: int, instances: list[Instance], hostname: str, branch: str, commit: str, build_profile: str = "", diagnostics_tracker: Optional['DiagnosticsTracker'] = None): """Append run result to the persistent log file.""" ensure_logs_dir() all_success = all(i.status == "success" for i in instances) @@ -477,6 +662,12 @@ def log_run_result(run_id: str, run_count: int, instances: list[Instance], hostn for name, count, duration in phases: lines.append(f" {name:<{max_name_len}} {duration} ({count})") + # Include degradation events if any + if diagnostics_tracker: + degradation_text = diagnostics_tracker.format_degradation_events() + if degradation_text: + lines.append(degradation_text) + lines.append("") # Append to log file with open(RUN_LOG_FILE, "a") as f: @@ -699,7 +890,8 @@ def main(): containers = [f"ethrex-{n}" for n in names] instances = [Instance(n, p, c) for n, p, c in zip(names, ports, containers)] - + tracker = DiagnosticsTracker(instances) + # Detect state of already-running containers for inst in instances: if t := container_start_time(inst.container): @@ -763,6 +955,7 @@ def main(): # Reset instances since we restarted for inst in instances: reset_instance(inst) + tracker.reset() time.sleep(30) # Wait for containers to start print(f"{'='*60}\n") @@ -770,6 +963,12 @@ def main(): last_print = 0 while True: changed = any(update_instance(i, args.timeout) for i in instances) + # Poll diagnostics endpoints + for inst in instances: + tracker.poll(inst) + # Trigger dump on failure + if inst.status == "failed" and changed: + tracker.on_failure(inst.name) if changed or (time.time() - last_print) > STATUS_PRINT_INTERVAL: print_status(instances) last_print = time.time() @@ -779,7 +978,7 @@ def main(): time.sleep(CHECK_INTERVAL) # Log the run result and save container logs BEFORE any restart save_all_logs(instances, run_id, args.compose_file) - log_run_result(run_id, run_count, instances, hostname, branch, commit, args.build_profile) + log_run_result(run_id, run_count, instances, hostname, branch, commit, args.build_profile, tracker) # Send a single Slack summary notification for the run if not args.no_slack: slack_notify(run_id, run_count, instances, hostname, branch, commit, args.build_profile) From e363561c58a0e484b2c039d3981e00718ae5aa66 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Thu, 9 Apr 2026 20:09:51 -0300 Subject: [PATCH 02/31] fix(l1): add diagnostics for header download phase in snap sync --- crates/networking/p2p/sync/snap_sync.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/networking/p2p/sync/snap_sync.rs b/crates/networking/p2p/sync/snap_sync.rs index 8755563c261..c5edd21e61b 100644 --- a/crates/networking/p2p/sync/snap_sync.rs +++ b/crates/networking/p2p/sync/snap_sync.rs @@ -119,6 +119,13 @@ pub async fn sync_cycle_snap( .get_block_number(current_head) .await? .ok_or(SyncError::BlockNumber(current_head))?; + { + let mut diag = diagnostics.write().await; + diag.current_phase = "headers".to_string(); + diag.sync_mode = "snap".to_string(); + } + #[cfg(feature = "metrics")] + ethrex_metrics::sync::METRICS_SYNC.set_current_phase(1); info!( "Syncing from current head {:?} to sync_head {:?}", current_head, sync_head @@ -237,6 +244,15 @@ pub async fn sync_cycle_snap( .await?; } + // Update diagnostics with header progress + { + let mut diag = diagnostics.write().await; + diag.phase_progress.insert( + "headers_downloaded".to_string(), + block_sync_state.block_hashes.len() as u64, + ); + } + if sync_head_found { break; }; From b6681a51169840c66dae83a49e3389514ac21753 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Fri, 10 Apr 2026 09:40:35 -0300 Subject: [PATCH 03/31] =?UTF-8?q?feat(l1):=20dynamic=20log=20level=20on=20?= =?UTF-8?q?degradation=20=E2=80=94=20TRACE=20for=20peer=20comms=20when=20i?= =?UTF-8?q?ssues=20detected?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docker monitor bumps log level to TRACE via admin_setLogLevel RPC when degradation is detected, capturing detailed per-peer dialogue in container logs. Restores to DEBUG on recovery. Also sets default RUST_LOG in docker-compose to enable DEBUG-level sync events (staleness, pivot updates, error classification). --- tooling/sync/docker-compose.multisync.yaml | 2 ++ tooling/sync/docker_monitor.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tooling/sync/docker-compose.multisync.yaml b/tooling/sync/docker-compose.multisync.yaml index 200f91326a7..f5ab6888b7d 100644 --- a/tooling/sync/docker-compose.multisync.yaml +++ b/tooling/sync/docker-compose.multisync.yaml @@ -28,6 +28,8 @@ x-ethrex-common: ðrex-common image: "${ETHREX_IMAGE:-ghcr.io/lambdaclass/ethrex:main}" pull_policy: "${ETHREX_PULL_POLICY:-always}" + environment: + RUST_LOG: "${RUST_LOG:-info,ethrex_p2p::sync=debug}" ulimits: nofile: 1000000 restart: unless-stopped diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index cd5327cc2b4..a96cd7f3570 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -70,6 +70,8 @@ DEGRADATION_STALL_TIMEOUT = 60 # trigger if zero progress for this many seconds DEGRADATION_STALENESS_RATIO = 0.8 # trigger if pivot age > 80% of threshold DEGRADATION_RECOVERY_TIMEOUT = 60 # seconds of health before leaving degraded mode +LOG_LEVEL_NORMAL = "info,ethrex_p2p::sync=debug" +LOG_LEVEL_DEGRADED = "info,ethrex_p2p=trace" class DiagnosticsTracker: @@ -171,6 +173,11 @@ def _check_degradation(self, inst, snapshot: dict) -> None: } self.events.append(event) print(f"⚠️ [{name}] Degradation detected: {', '.join(reasons)} — increasing poll frequency") + # Bump log level to TRACE for detailed peer comms + if rpc_set_log_level(inst.rpc_url, LOG_LEVEL_DEGRADED): + print(f"🔍 [{name}] Log level bumped to TRACE for peer diagnostics") + else: + print(f"⚠️ [{name}] Failed to bump log level") # Dump snapshots on degradation if not self.dumped_for_run.get(name): self._dump_snapshots(name) @@ -189,6 +196,11 @@ def _check_degradation(self, inst, snapshot: dict) -> None: } self.events.append(event) print(f"✅ [{name}] Degradation resolved — resuming normal poll frequency") + # Restore log level to normal + if rpc_set_log_level(inst.rpc_url, LOG_LEVEL_NORMAL): + print(f"📝 [{name}] Log level restored to DEBUG") + else: + print(f"⚠️ [{name}] Failed to restore log level") def on_failure(self, name: str) -> None: """Called when a network fails — dump snapshots if not already dumped.""" @@ -459,6 +471,15 @@ def rpc_call(url: str, method: str) -> Optional[Any]: return None +def rpc_set_log_level(url: str, level: str) -> bool: + """Set the node's log level via admin_setLogLevel RPC.""" + try: + resp = requests.post(url, json={"jsonrpc": "2.0", "method": "admin_setLogLevel", "params": [level], "id": 1}, timeout=5).json() + return resp.get("result") is not None and "error" not in resp + except Exception: + return False + + def parse_phase_timings(run_id: str, container: str) -> list[tuple[str, str, str]]: """Parse phase completion times from saved container logs. From bc7b0491993becf625835da64aa7cab27715e17d Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Fri, 10 Apr 2026 11:51:26 -0300 Subject: [PATCH 04/31] fix(l1): compute live sync diagnostics on query instead of stale snapshots Read pivot age from current time and progress counters from METRICS atomics on each admin_syncStatus call, so the RPC always returns fresh values instead of stale data from the last phase transition. --- crates/networking/p2p/sync_manager.rs | 46 +++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/sync_manager.rs b/crates/networking/p2p/sync_manager.rs index 5bc2e4462cf..6866f02cc42 100644 --- a/crates/networking/p2p/sync_manager.rs +++ b/crates/networking/p2p/sync_manager.rs @@ -124,9 +124,51 @@ impl SyncManager { self.snap_enabled.store(false, Ordering::Relaxed); } - /// Returns a snapshot of the current sync diagnostics. + /// Returns a snapshot of the current sync diagnostics with live values. pub async fn get_sync_diagnostics(&self) -> SyncDiagnostics { - self.diagnostics.read().await.clone() + use crate::metrics::METRICS; + use std::sync::atomic::Ordering::Relaxed; + + let mut diag = self.diagnostics.read().await.clone(); + + // Compute live pivot age + if let Some(ts) = diag.pivot_timestamp { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + diag.pivot_age_seconds = Some(now.saturating_sub(ts)); + } + + // Populate live progress from METRICS atomics + let headers = METRICS.downloaded_headers.get() as u64; + let accounts_downloaded = METRICS.downloaded_account_tries.load(Relaxed); + let accounts_inserted = METRICS.account_tries_inserted.load(Relaxed); + let storage_downloaded = METRICS.storage_leaves_downloaded.get() as u64; + let storage_inserted = METRICS.storage_leaves_inserted.get() as u64; + + if headers > 0 { + diag.phase_progress + .insert("headers_downloaded".into(), headers); + } + if accounts_downloaded > 0 { + diag.phase_progress + .insert("accounts_downloaded".into(), accounts_downloaded); + } + if accounts_inserted > 0 { + diag.phase_progress + .insert("accounts_inserted".into(), accounts_inserted); + } + if storage_downloaded > 0 { + diag.phase_progress + .insert("storage_slots_downloaded".into(), storage_downloaded); + } + if storage_inserted > 0 { + diag.phase_progress + .insert("storage_slots_inserted".into(), storage_inserted); + } + + diag } /// Returns a reference to the diagnostics RwLock for updating from the sync code. From f20c95f7f912ed5dd16108544a861b2b78703da1 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Fri, 10 Apr 2026 12:20:03 -0300 Subject: [PATCH 05/31] feat(l1): render arrays of objects as tables in REPL formatter When a JSON response contains an array of objects (like admin_peerScores peers list), render as an aligned table with headers instead of collapsing to a single line. Makes diagnostic endpoints readable in the REPL. --- tooling/repl/src/formatter.rs | 83 +++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tooling/repl/src/formatter.rs b/tooling/repl/src/formatter.rs index b2975a62e13..0abb4939b87 100644 --- a/tooling/repl/src/formatter.rs +++ b/tooling/repl/src/formatter.rs @@ -112,6 +112,7 @@ fn format_object_box(map: &serde_json::Map, title: &str) -> Strin /// Flatten a JSON object into (key, plain-text-value) pairs. /// Nested objects are expanded with dot-separated keys. +/// Arrays of objects are rendered as inline tables. fn flatten_object(map: &serde_json::Map, prefix: &str) -> Vec<(String, String)> { let mut rows = Vec::new(); for (key, value) in map { @@ -124,6 +125,10 @@ fn flatten_object(map: &serde_json::Map, prefix: &str) -> Vec<(St Value::Object(nested) if !nested.is_empty() => { rows.extend(flatten_object(nested, &full_key)); } + Value::Array(arr) if !arr.is_empty() && arr.iter().all(|v| v.is_object()) => { + // Render array of objects as a table + rows.push((full_key, format_object_array_table(arr))); + } Value::Array(arr) => { let items: Vec = arr.iter().map(inline_value).collect(); rows.push((full_key, items.join(", "))); @@ -136,6 +141,84 @@ fn flatten_object(map: &serde_json::Map, prefix: &str) -> Vec<(St rows } +/// Render an array of objects as a compact table with headers. +fn format_object_array_table(arr: &[Value]) -> String { + if arr.is_empty() { + return "[]".to_string(); + } + + // Collect all keys from all objects to build columns + let mut columns: Vec = Vec::new(); + for item in arr { + if let Value::Object(map) = item { + for key in map.keys() { + if !columns.contains(key) { + columns.push(key.clone()); + } + } + } + } + + if columns.is_empty() { + return "[]".to_string(); + } + + // Compute column widths + let col_values: Vec> = arr + .iter() + .map(|item| { + columns + .iter() + .map(|col| item.get(col).map(|v| inline_value(v)).unwrap_or_default()) + .collect() + }) + .collect(); + + let col_widths: Vec = columns + .iter() + .enumerate() + .map(|(i, header)| { + let max_val = col_values.iter().map(|row| row[i].len()).max().unwrap_or(0); + header.len().max(max_val).min(30) + }) + .collect(); + + let mut out = String::new(); + + // Header + out.push('\n'); + let header_parts: Vec = columns + .iter() + .zip(&col_widths) + .map(|(h, w)| format!("{:>width$}", h, width = *w)) + .collect(); + out.push_str(&format!(" {}", header_parts.join(" "))); + + // Separator + let sep_parts: Vec = col_widths.iter().map(|w| "─".repeat(*w)).collect(); + out.push_str(&format!("\n {}", sep_parts.join("──"))); + + // Rows + for row in &col_values { + let parts: Vec = row + .iter() + .zip(&col_widths) + .map(|(val, w)| { + let truncated = if val.len() > *w { + format!("{}…", &val[..*w - 1]) + } else { + val.clone() + }; + format!("{:>width$}", truncated, width = *w) + }) + .collect(); + out.push_str(&format!("\n {}", parts.join(" "))); + } + + out.push_str(&format!("\n ({} items)", arr.len())); + out +} + /// Convert a Value to a plain-text string for table cells. fn inline_value(value: &Value) -> String { match value { From 4eda5a8d185c50cdf62b39edbe6e0ef3a4b0564c Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Fri, 10 Apr 2026 16:29:24 -0300 Subject: [PATCH 06/31] =?UTF-8?q?fix(l1):=20improve=20monitor=20failure=20?= =?UTF-8?q?capture=20=E2=80=94=20force=20dump=20on=20failure,=20detect=20h?= =?UTF-8?q?ealing=20phase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - on_failure: do a final RPC poll and bump log level before dumping - _dump_snapshots: support force=True to re-dump even if already dumped - Detect healing phase as degradation trigger to get 5s polling during the high-risk window where pivot updates happen - Always capture peer state at time of failure for post-mortem analysis --- tooling/repl/src/formatter.rs | 77 ++++++++++++++++++++-------------- tooling/sync/docker_monitor.py | 26 ++++++++---- 2 files changed, 64 insertions(+), 39 deletions(-) diff --git a/tooling/repl/src/formatter.rs b/tooling/repl/src/formatter.rs index 0abb4939b87..f7c1049fe9e 100644 --- a/tooling/repl/src/formatter.rs +++ b/tooling/repl/src/formatter.rs @@ -70,42 +70,57 @@ fn format_object_box(map: &serde_json::Map, title: &str) -> Strin let rows = flatten_object(map, ""); - let key_w = rows.iter().map(|(k, _)| k.len()).max().unwrap_or(0); - let val_w = rows - .iter() - .map(|(_, v)| v.len()) - .max() - .unwrap_or(0) - .min(MAX_VALUE_DISPLAY_LEN); - let content_w = key_w + 3 + val_w; - let box_w = content_w + 4; // "│ " + content + " │" + // Separate scalar rows from table sections (arrays of objects rendered below the box) + let mut scalar_rows = Vec::new(); + let mut table_sections: Vec<(String, String)> = Vec::new(); + for (key, value) in rows { + if value.starts_with('\n') && value.contains("items)") { + table_sections.push((key, value)); + } else { + scalar_rows.push((key, value)); + } + } let mut out = String::new(); - // Top border - if title.is_empty() { - out.push_str(&format!("┌{}┐\n", "─".repeat(box_w - 2))); - } else { - let fill = (box_w - 2).saturating_sub(title.len() + 1); - out.push_str(&format!("┌─{}{}┐\n", title.bold(), "─".repeat(fill))); + if !scalar_rows.is_empty() { + let key_w = scalar_rows.iter().map(|(k, _)| k.len()).max().unwrap_or(0); + let val_w = scalar_rows + .iter() + .map(|(_, v)| v.len()) + .max() + .unwrap_or(0) + .min(MAX_VALUE_DISPLAY_LEN); + let content_w = key_w + 3 + val_w; + let box_w = content_w + 4; + + if title.is_empty() { + out.push_str(&format!("┌{}┐\n", "─".repeat(box_w - 2))); + } else { + let fill = (box_w - 2).saturating_sub(title.len() + 1); + out.push_str(&format!("┌─{}{}┐\n", title.bold(), "─".repeat(fill))); + } + + for (key, value) in &scalar_rows { + let display_val = truncate_middle(value, val_w); + let key_pad = " ".repeat(key_w.saturating_sub(key.len())); + let val_pad = " ".repeat(val_w.saturating_sub(display_val.len())); + out.push_str(&format!( + "│ {}{} {}{} │\n", + key_pad, + key.cyan(), + colorize_inline(&display_val), + val_pad, + )); + } + + out.push_str(&format!("└{}┘", "─".repeat(box_w - 2))); } - // Rows - for (key, value) in &rows { - let display_val = truncate_middle(value, val_w); - let key_pad = " ".repeat(key_w.saturating_sub(key.len())); - let val_pad = " ".repeat(val_w.saturating_sub(display_val.len())); - out.push_str(&format!( - "│ {}{} {}{} │\n", - key_pad, - key.cyan(), - colorize_inline(&display_val), - val_pad, - )); - } - - // Bottom border - out.push_str(&format!("└{}┘", "─".repeat(box_w - 2))); + // Render table sections below the box + for (key, table) in &table_sections { + out.push_str(&format!("\n {}:{}", key.cyan(), table)); + } out } diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index a96cd7f3570..206be4b0e47 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -158,6 +158,10 @@ def _check_degradation(self, inst, snapshot: dict) -> None: if ratio > DEGRADATION_STALENESS_RATIO: reasons.append(f"staleness_ratio={ratio:.2f}") + # Healing phase is high-risk for pivot failures — increase polling + if phase == "healing": + reasons.append("healing_phase") + if reasons: if not self.degraded[name]: self.degraded[name] = True @@ -179,8 +183,7 @@ def _check_degradation(self, inst, snapshot: dict) -> None: else: print(f"⚠️ [{name}] Failed to bump log level") # Dump snapshots on degradation - if not self.dumped_for_run.get(name): - self._dump_snapshots(name) + self._dump_snapshots(name) else: # Healthy — check if we can exit degraded mode if self.degraded[name]: @@ -202,10 +205,15 @@ def _check_degradation(self, inst, snapshot: dict) -> None: else: print(f"⚠️ [{name}] Failed to restore log level") - def on_failure(self, name: str) -> None: - """Called when a network fails — dump snapshots if not already dumped.""" - if not self.dumped_for_run.get(name): - self._dump_snapshots(name) + def on_failure(self, inst, name: str) -> None: + """Called when a network fails — do a final poll and dump snapshots.""" + # Do one last poll to capture the state at failure time + self.last_poll[name] = 0 # force immediate poll + self.poll(inst) + # Bump log level to capture any post-failure details + rpc_set_log_level(inst.rpc_url, LOG_LEVEL_DEGRADED) + # Always dump on failure, even if previously dumped for degradation + self._dump_snapshots(name, force=True) event = { "timestamp": datetime.utcnow().isoformat() + "Z", "network": name, @@ -213,8 +221,10 @@ def on_failure(self, name: str) -> None: } self.events.append(event) - def _dump_snapshots(self, name: str) -> None: + def _dump_snapshots(self, name: str, force: bool = False) -> None: """Dump the rolling buffer to disk.""" + if not force and self.dumped_for_run.get(name): + return self.dumped_for_run[name] = True buf = self.buffers[name] if not buf: @@ -989,7 +999,7 @@ def main(): tracker.poll(inst) # Trigger dump on failure if inst.status == "failed" and changed: - tracker.on_failure(inst.name) + tracker.on_failure(inst, inst.name) if changed or (time.time() - last_print) > STATUS_PRINT_INTERVAL: print_status(instances) last_print = time.time() From 5066523baf9bc7bc9640905852a92c7ac25bb24b Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Fri, 10 Apr 2026 16:33:48 -0300 Subject: [PATCH 07/31] refactor(l1): rename _check_degradation to _check_alert_conditions The method now detects both actual degradation (low eligible peers, staleness) and high-risk phases (healing), so the name should reflect the broader scope. --- tooling/sync/docker_monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index 206be4b0e47..02f731cdbfd 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -125,9 +125,9 @@ def poll(self, inst) -> None: while len(buf) > limit: buf.pop(0) - self._check_degradation(inst, snapshot) + self._check_alert_conditions(inst, snapshot) - def _check_degradation(self, inst, snapshot: dict) -> None: + def _check_alert_conditions(self, inst, snapshot: dict) -> None: """Check for degradation conditions and trigger dump if needed.""" now = time.time() name = inst.name From 3ca7e075b6c4a5e1bcd47db33828e545b6d0318a Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Sat, 11 Apr 2026 17:12:48 -0300 Subject: [PATCH 08/31] fix(l1): create run directory early so snapshot dumps work during degradation The monitor was trying to dump peer snapshots to the run directory during degradation events, but the directory didn't exist yet (created at run end by save_all_logs). Now set_run_id() creates the directory immediately. Also adds peer_top.py live viewer script. --- tooling/sync/docker_monitor.py | 17 ++- tooling/sync/peer_top.py | 236 +++++++++++++++++++++++++++++++++ tooling/sync/peer_top.sh | 28 ++++ 3 files changed, 277 insertions(+), 4 deletions(-) create mode 100644 tooling/sync/peer_top.py create mode 100644 tooling/sync/peer_top.sh diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index 02f731cdbfd..bd5902f5c27 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -221,6 +221,13 @@ def on_failure(self, inst, name: str) -> None: } self.events.append(event) + def set_run_id(self, run_id: str) -> None: + """Set the current run ID so snapshots go to the right directory.""" + self.run_id = run_id + # Ensure the directory exists now, not at the end of the run + run_dir = LOGS_DIR / f"run_{run_id}" + run_dir.mkdir(parents=True, exist_ok=True) + def _dump_snapshots(self, name: str, force: bool = False) -> None: """Dump the rolling buffer to disk.""" if not force and self.dumped_for_run.get(name): @@ -229,11 +236,11 @@ def _dump_snapshots(self, name: str, force: bool = False) -> None: buf = self.buffers[name] if not buf: return - # Find the current run's log directory - run_dirs = sorted(LOGS_DIR.glob("run_*"), key=lambda p: p.name, reverse=True) - if not run_dirs: + if not hasattr(self, 'run_id') or not self.run_id: return - out_path = run_dirs[0] / f"{name}_peer_snapshots.json" + run_dir = LOGS_DIR / f"run_{self.run_id}" + run_dir.mkdir(parents=True, exist_ok=True) + out_path = run_dir / f"{name}_peer_snapshots.json" try: import json out_path.write_text(json.dumps(buf, indent=2, default=str)) @@ -954,6 +961,7 @@ def main(): # Get run count from existing logs (persists across restarts) run_count = get_next_run_count() run_id = generate_run_id() + tracker.set_run_id(run_id) print(f"📁 Logs will be saved to {LOGS_DIR.absolute()}") print(f"📝 Run history: {RUN_LOG_FILE.absolute()}") @@ -1021,6 +1029,7 @@ def main(): # Prepare for another run run_count += 1 run_id = generate_run_id() # New run ID for the new cycle + tracker.set_run_id(run_id) # If auto-update is enabled, the loop will pull/build/restart # Otherwise, just restart containers now diff --git a/tooling/sync/peer_top.py b/tooling/sync/peer_top.py new file mode 100644 index 00000000000..185a2b7130a --- /dev/null +++ b/tooling/sync/peer_top.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Live peer table viewer — like top for ethrex peers.""" +import os, signal, sys, time +import requests as req + +ENDPOINT = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:18547" +INTERVAL = float(sys.argv[2]) if len(sys.argv) > 2 else 1.0 + +# ANSI colors +RED = "\033[31m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +CYAN = "\033[36m" +DIM = "\033[2m" +BOLD = "\033[1m" +RESET = "\033[0m" + +# Track previous scores for delta coloring +prev_scores: dict[str, int] = {} + + +def fetch(method): + try: + r = req.post( + ENDPOINT, + json={"jsonrpc": "2.0", "method": method, "params": [], "id": 1}, + timeout=3, + ) + return r.json().get("result") + except Exception: + return None + + +start_time = time.time() + + +def color_score(peer_id: str, score: int) -> str: + """Color the score based on value and delta from previous tick.""" + prev = prev_scores.get(peer_id) + if score <= -30: + color = RED + elif score <= 0: + color = YELLOW + else: + color = GREEN + + if prev is not None and prev != score: + if score > prev: + # Score went up — bright green arrow + return f"{GREEN}{BOLD}{score:>4} \u2191{RESET}" + else: + # Score went down — bright red arrow + return f"{RED}{BOLD}{score:>4} \u2193{RESET}" + return f"{color}{score:>4} {RESET}" + + +def render(): + global prev_scores + lines = [] + elapsed = int(time.time() - start_time) + h, m, s = elapsed // 3600, (elapsed % 3600) // 60, elapsed % 60 + now_str = time.strftime("%H:%M:%S") + lines.append( + f"{BOLD}peer_top{RESET} {DIM}— {now_str} — up {h:02d}:{m:02d}:{s:02d} — {ENDPOINT}{RESET}" + ) + lines.append("") + sync = fetch("admin_syncStatus") + data = fetch("admin_peerScores") + + if sync: + phase = sync.get("current_phase") or "idle" + pivot = sync.get("pivot_block_number") or "?" + age = sync.get("pivot_age_seconds") + threshold = sync.get("staleness_threshold_seconds", 0) + progress = sync.get("phase_progress", {}) + age_str = f"{age}s" if age else "?" + + # Color staleness margin + if age and threshold: + margin_secs = threshold - age + if margin_secs < 0: + margin_color = RED + elif margin_secs < 300: + margin_color = YELLOW + else: + margin_color = GREEN + margin = f"{margin_color}({margin_secs}s to stale){RESET}" + else: + margin = "" + + lines.append( + f"{BOLD}Phase:{RESET} {CYAN}{phase}{RESET} " + f"{BOLD}Pivot:{RESET} {pivot} " + f"{BOLD}Age:{RESET} {age_str} {margin}" + ) + if progress: + parts = [f"{k}={v:,}" for k, v in progress.items()] + lines.append(f"{DIM}Progress: {', '.join(parts)}{RESET}") + + # Pivot update history + pivot_changes = sync.get("recent_pivot_changes", []) + if pivot_changes: + lines.append("") + lines.append(f"{BOLD}Pivot History:{RESET} (last {len(pivot_changes)})") + for pc in pivot_changes[-5:]: # show last 5 + ts = pc.get("timestamp", 0) + ts_str = time.strftime("%H:%M:%S", time.localtime(ts)) if ts else "?" + old_n = pc.get("old_pivot_number", "?") + new_n = pc.get("new_pivot_number", "?") + outcome = pc.get("outcome", "?") + reason = pc.get("failure_reason", "") + if outcome == "success": + icon = f"{GREEN}\u2713{RESET}" + else: + icon = f"{RED}\u2717{RESET}" + if reason: + reason = f" {RED}{reason}{RESET}" + lines.append( + f" {icon} {DIM}{ts_str}{RESET} {old_n} \u2192 {new_n} [{outcome}]{reason}" + ) + + # Recent errors + errors = sync.get("recent_errors", []) + if errors: + lines.append("") + lines.append(f"{BOLD}Recent Errors:{RESET} (last {len(errors)})") + for err in errors[-3:]: # show last 3 + ts = err.get("timestamp", 0) + ts_str = time.strftime("%H:%M:%S", time.localtime(ts)) if ts else "?" + msg = err.get("error_message", "?")[:60] + recov = f"{GREEN}recoverable{RESET}" if err.get("recoverable") else f"{RED}irrecoverable{RESET}" + lines.append(f" {DIM}{ts_str}{RESET} {msg} [{recov}]") + + lines.append("") + + if not data: + lines.append(f"{RED}Node not reachable{RESET}") + return lines + + s = data["summary"] + peers = data["peers"] + + # Color eligible count + elig_count = s["eligible_peers"] + if elig_count < 5: + elig_color = RED + elif elig_count < 20: + elig_color = YELLOW + else: + elig_color = GREEN + + lines.append( + f"{BOLD}Peers:{RESET} {s['total_peers']} " + f"{BOLD}Eligible:{RESET} {elig_color}{elig_count}{RESET} " + f"{BOLD}Avg Score:{RESET} {s['average_score']} " + f"{BOLD}Inflight:{RESET} {s['total_inflight_requests']}" + ) + lines.append("") + lines.append( + f"{DIM}{'Peer ID':>14} {'Score':>6} {'Reqs':>5} {'Elig':>5}" + f" {'Capabilities':>22} {'Dir':>4} {'Client':>35}{RESET}" + ) + lines.append(f"{DIM}{'-' * 97}{RESET}") + + new_scores = {} + for p in sorted(peers, key=lambda x: x["score"], reverse=True): + pid_full = p["peer_id"] + pid = pid_full[:6] + ".." + pid_full[-4:] + score = p["score"] + new_scores[pid_full] = score + + score_str = color_score(pid_full, score) + + # Group capabilities by protocol + by_proto = {} + for c in p["capabilities"]: + parts = c.split("/") + proto = parts[0] + ver = parts[1] if len(parts) > 1 else "?" + by_proto.setdefault(proto, []).append(ver) + caps = " ".join(f"{k}/{','.join(vs)}" for k, vs in by_proto.items()) + client = p["client_version"][:35] + d = p["connection_direction"][:3] + + if p["eligible"]: + elig = f"{GREEN}\u2713{RESET}" + else: + elig = f"{RED}\u2717{RESET}" + + reqs = p["inflight_requests"] + reqs_str = f"{YELLOW}{reqs:>5}{RESET}" if reqs > 0 else f"{reqs:>5}" + + lines.append( + f"{pid:>14} {score_str} {reqs_str}" + f" {elig:>14} {caps:>22} {d:>4} {DIM}{client:>35}{RESET}" + ) + + prev_scores = new_scores + return lines + + +def cleanup(*_): + sys.stdout.write("\033[?1049l\033[?25h") + sys.stdout.flush() + sys.exit(0) + + +signal.signal(signal.SIGINT, cleanup) +signal.signal(signal.SIGTERM, cleanup) + +sys.stdout.write("\033[?1049h\033[?25l\033[2J") +sys.stdout.flush() + +try: + prev_line_count = 0 + while True: + lines = render() + try: + term_rows = os.get_terminal_size().lines + except OSError: + term_rows = 40 + if len(lines) > term_rows - 2: + hidden = len(lines) - term_rows + 3 + lines = lines[: term_rows - 3] + lines.append(f" {DIM}... {hidden} more peers (resize terminal to see all){RESET}") + buf = "\033[H" + for line in lines: + buf += f"{line}\033[K\n" + for _ in range(max(0, prev_line_count - len(lines))): + buf += "\033[K\n" + sys.stdout.write(buf) + sys.stdout.flush() + prev_line_count = len(lines) + time.sleep(INTERVAL) +except Exception: + cleanup() diff --git a/tooling/sync/peer_top.sh b/tooling/sync/peer_top.sh new file mode 100644 index 00000000000..ba8bf5cb6a2 --- /dev/null +++ b/tooling/sync/peer_top.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Usage: ./peer_top.sh [endpoint] [interval] +# Example: ./peer_top.sh http://localhost:18547 1 + +ENDPOINT="${1:-http://localhost:18547}" +INTERVAL="${2:-1}" + +watch -n "$INTERVAL" "curl -s -X POST $ENDPOINT \ + -H 'Content-Type: application/json' \ + -d '{\"jsonrpc\":\"2.0\",\"method\":\"admin_peerScores\",\"params\":[],\"id\":1}' \ + | python3 -c ' +import json, sys +try: + d = json.load(sys.stdin)[\"result\"] + s = d[\"summary\"] + print(f\"Peers: {s[\"total_peers\"]} Eligible: {s[\"eligible_peers\"]} Avg Score: {s[\"average_score\"]} Inflight: {s[\"total_inflight_requests\"]}\") + print() + print(f\"{\"Peer ID\":>14} {\"Score\":>6} {\"Reqs\":>5} {\"Elig\":>5} {\"Caps\":>12} {\"Dir\":>8} {\"Client\":>30}\") + print(\"-\" * 86) + for p in sorted(d[\"peers\"], key=lambda x: x[\"score\"], reverse=True): + pid = p[\"peer_id\"][:6] + \"..\" + p[\"peer_id\"][-4:] + caps = \",\".join(p[\"capabilities\"]) + client = p[\"client_version\"][:30] + d2 = p[\"connection_direction\"][:3] + print(f\"{pid:>14} {p[\"score\"]:>6} {p[\"inflight_requests\"]:>5} {str(p[\"eligible\"]):>5} {caps:>12} {d2:>8} {client:>30}\") +except Exception as e: + print(f\"Error: {e}\") +'" From 3bbfdc8bb5132a8f8b46ce9330515c0a99dcd602 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Mon, 13 Apr 2026 18:32:09 -0300 Subject: [PATCH 09/31] =?UTF-8?q?feat(l1):=20peer=5Ftop=20responsive=20lay?= =?UTF-8?q?out=20=E2=80=94=20use=20full=20terminal=20width?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tooling/sync/peer_top.py | 116 +++++++++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 41 deletions(-) diff --git a/tooling/sync/peer_top.py b/tooling/sync/peer_top.py index 185a2b7130a..48a1b49daa6 100644 --- a/tooling/sync/peer_top.py +++ b/tooling/sync/peer_top.py @@ -54,7 +54,18 @@ def color_score(peer_id: str, score: int) -> str: return f"{color}{score:>4} {RESET}" -def render(): +def trim_client(client: str, width: int) -> str: + """Trim client/version string to width. When very tight, show just the client name.""" + if len(client) <= width: + return client + if width < 10: + # Space is tight — drop version info, keep the client name + name = client.split("/")[0] + return name[:width] + return client[: width - 1] + "\u2026" + + +def render(term_cols: int): global prev_scores lines = [] elapsed = int(time.time() - start_time) @@ -156,11 +167,23 @@ def render(): f"{BOLD}Inflight:{RESET} {s['total_inflight_requests']}" ) lines.append("") + + # Column widths — fixed columns + dynamic Capabilities / Client + # Layout: PID Score Reqs Elig Caps Dir Client + W_PID, W_SCORE, W_REQS, W_ELIG, W_DIR = 14, 6, 5, 4, 4 + SEPARATORS = 6 # one space between each of the 7 columns + fixed = W_PID + W_SCORE + W_REQS + W_ELIG + W_DIR + SEPARATORS # = 39 + # Budget for Caps + Client. Leave 1 char right-margin. + budget = max(20, term_cols - fixed - 1) + W_CAPS = max(12, min(22, budget - 10)) # caps capped at 22, min 12 + W_CLIENT = max(8, budget - W_CAPS) + lines.append( - f"{DIM}{'Peer ID':>14} {'Score':>6} {'Reqs':>5} {'Elig':>5}" - f" {'Capabilities':>22} {'Dir':>4} {'Client':>35}{RESET}" + f"{DIM}{'Peer ID':>{W_PID}} {'Score':>{W_SCORE}} {'Reqs':>{W_REQS}}" + f" {'Elig':>{W_ELIG}} {'Capabilities':<{W_CAPS}} {'Dir':>{W_DIR}}" + f" {'Client':<{W_CLIENT}}{RESET}" ) - lines.append(f"{DIM}{'-' * 97}{RESET}") + lines.append(f"{DIM}{'-' * (fixed + W_CAPS + W_CLIENT)}{RESET}") new_scores = {} for p in sorted(peers, key=lambda x: x["score"], reverse=True): @@ -179,20 +202,23 @@ def render(): ver = parts[1] if len(parts) > 1 else "?" by_proto.setdefault(proto, []).append(ver) caps = " ".join(f"{k}/{','.join(vs)}" for k, vs in by_proto.items()) - client = p["client_version"][:35] + if len(caps) > W_CAPS: + caps = caps[: W_CAPS - 1] + "\u2026" + client = trim_client(p["client_version"], W_CLIENT) d = p["connection_direction"][:3] - if p["eligible"]: - elig = f"{GREEN}\u2713{RESET}" - else: - elig = f"{RED}\u2717{RESET}" + elig_char = "\u2713" if p["eligible"] else "\u2717" + elig_col = GREEN if p["eligible"] else RED + # Visible-width 1, right-aligned in W_ELIG column + elig_str = f"{' ' * (W_ELIG - 1)}{elig_col}{elig_char}{RESET}" reqs = p["inflight_requests"] - reqs_str = f"{YELLOW}{reqs:>5}{RESET}" if reqs > 0 else f"{reqs:>5}" + reqs_str = f"{YELLOW}{reqs:>{W_REQS}}{RESET}" if reqs > 0 else f"{reqs:>{W_REQS}}" lines.append( - f"{pid:>14} {score_str} {reqs_str}" - f" {elig:>14} {caps:>22} {d:>4} {DIM}{client:>35}{RESET}" + f"{pid:>{W_PID}} {score_str} {reqs_str}" + f" {elig_str} {caps:<{W_CAPS}} {d:>{W_DIR}}" + f" {DIM}{client:<{W_CLIENT}}{RESET}" ) prev_scores = new_scores @@ -205,32 +231,40 @@ def cleanup(*_): sys.exit(0) -signal.signal(signal.SIGINT, cleanup) -signal.signal(signal.SIGTERM, cleanup) - -sys.stdout.write("\033[?1049h\033[?25l\033[2J") -sys.stdout.flush() - -try: - prev_line_count = 0 - while True: - lines = render() - try: - term_rows = os.get_terminal_size().lines - except OSError: - term_rows = 40 - if len(lines) > term_rows - 2: - hidden = len(lines) - term_rows + 3 - lines = lines[: term_rows - 3] - lines.append(f" {DIM}... {hidden} more peers (resize terminal to see all){RESET}") - buf = "\033[H" - for line in lines: - buf += f"{line}\033[K\n" - for _ in range(max(0, prev_line_count - len(lines))): - buf += "\033[K\n" - sys.stdout.write(buf) - sys.stdout.flush() - prev_line_count = len(lines) - time.sleep(INTERVAL) -except Exception: - cleanup() +def main(): + signal.signal(signal.SIGINT, cleanup) + signal.signal(signal.SIGTERM, cleanup) + + sys.stdout.write("\033[?1049h\033[?25l\033[2J") + sys.stdout.flush() + + try: + prev_line_count = 0 + while True: + try: + size = os.get_terminal_size() + term_rows, term_cols = size.lines, size.columns + except OSError: + term_rows, term_cols = 40, 120 + lines = render(term_cols) + if len(lines) > term_rows - 2: + hidden = len(lines) - term_rows + 3 + lines = lines[: term_rows - 3] + lines.append( + f" {DIM}... {hidden} more peers (resize terminal to see all){RESET}" + ) + buf = "\033[H" + for line in lines: + buf += f"{line}\033[K\n" + for _ in range(max(0, prev_line_count - len(lines))): + buf += "\033[K\n" + sys.stdout.write(buf) + sys.stdout.flush() + prev_line_count = len(lines) + time.sleep(INTERVAL) + except Exception: + cleanup() + + +if __name__ == "__main__": + main() From 32695435e6bd4a02325c71fe35631bbb8fba16bd Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 08:29:27 -0300 Subject: [PATCH 10/31] =?UTF-8?q?fix(l1):=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20clippy,=20metrics=20pattern,=20monitor=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - fix clippy redundant closure in formatter.rs - fix unnecessary u64 casts in sync_manager.rs - migrate MetricsSync to default prometheus registry (Pattern B) so new sync metrics register once at init instead of per-gather. Removes gather_metrics() — exported via gather_default_metrics() automatically - fix admin_syncStatus reporting stale phase after recoverable error - fix on_failure() poll bypass — add force param so final poll executes even when instance status is already "failed" - fix peer_top.sh Python 3.12+ f-string syntax — use str.format() - remove dead DEGRADATION_STALL_TIMEOUT constant --- crates/blockchain/metrics/api.rs | 9 +-- crates/blockchain/metrics/sync.rs | 90 ++++++++------------------- crates/networking/p2p/sync.rs | 1 + crates/networking/p2p/sync_manager.rs | 6 +- tooling/repl/src/formatter.rs | 2 +- tooling/sync/docker_monitor.py | 13 ++-- tooling/sync/peer_top.sh | 26 +++++--- 7 files changed, 54 insertions(+), 93 deletions(-) diff --git a/crates/blockchain/metrics/api.rs b/crates/blockchain/metrics/api.rs index e57451274ff..a08d458585d 100644 --- a/crates/blockchain/metrics/api.rs +++ b/crates/blockchain/metrics/api.rs @@ -2,7 +2,7 @@ use axum::{Router, routing::get}; use crate::{ MetricsApiError, blocks::METRICS_BLOCKS, gather_default_metrics, node::METRICS_NODE, - p2p::METRICS_P2P, process::METRICS_PROCESS, sync::METRICS_SYNC, transactions::METRICS_TX, + p2p::METRICS_P2P, process::METRICS_PROCESS, transactions::METRICS_TX, }; pub async fn start_prometheus_metrics_api( @@ -54,11 +54,8 @@ pub(crate) async fn get_metrics() -> String { Err(_) => tracing::error!("Failed to gather METRICS_P2P"), }; - ret_string.push('\n'); - match METRICS_SYNC.gather_metrics() { - Ok(s) => ret_string.push_str(&s), - Err(_) => tracing::error!("Failed to gather METRICS_SYNC"), - }; + // METRICS_SYNC uses the default Prometheus registry (Pattern B), + // so its metrics are already included in gather_default_metrics() above. ret_string.push('\n'); if let Some(node_metrics) = METRICS_NODE.get() { diff --git a/crates/blockchain/metrics/sync.rs b/crates/blockchain/metrics/sync.rs index f51ce96a9e6..e0d14fc2743 100644 --- a/crates/blockchain/metrics/sync.rs +++ b/crates/blockchain/metrics/sync.rs @@ -1,7 +1,8 @@ -use prometheus::{Encoder, IntCounterVec, IntGauge, Opts, Registry, TextEncoder}; +use prometheus::{IntCounterVec, IntGauge, register_int_counter_vec, register_int_gauge}; use std::sync::LazyLock; -use crate::MetricsError; +// Metrics defined in this module register into the Prometheus default registry. +// The metrics API exposes them via `gather_default_metrics()`. pub static METRICS_SYNC: LazyLock = LazyLock::new(MetricsSync::default); @@ -29,53 +30,47 @@ impl Default for MetricsSync { impl MetricsSync { pub fn new() -> Self { MetricsSync { - eligible_peers: IntGauge::new( + eligible_peers: register_int_gauge!( "ethrex_sync_eligible_peers", - "Number of peers eligible for requests (passing can_try_more_requests)", + "Number of peers eligible for requests (passing can_try_more_requests)" ) .expect("Failed to create eligible_peers metric"), - snap_peers: IntGauge::new( + snap_peers: register_int_gauge!( "ethrex_sync_snap_peers", - "Number of connected peers supporting the snap protocol", + "Number of connected peers supporting the snap protocol" ) .expect("Failed to create snap_peers metric"), - inflight_requests: IntGauge::new( + inflight_requests: register_int_gauge!( "ethrex_sync_inflight_requests", - "Total inflight requests across all peers", + "Total inflight requests across all peers" ) .expect("Failed to create inflight_requests metric"), - pivot_age_seconds: IntGauge::new( + pivot_age_seconds: register_int_gauge!( "ethrex_sync_pivot_age_seconds", - "Age of the current pivot block in seconds", + "Age of the current pivot block in seconds" ) .expect("Failed to create pivot_age_seconds metric"), - current_phase: IntGauge::new( + current_phase: register_int_gauge!( "ethrex_sync_current_phase", - "Current snap sync phase (0=idle, 1=headers, 2=account_ranges, 3=account_insertion, 4=storage_ranges, 5=storage_insertion, 6=healing, 7=bytecodes)", + "Current snap sync phase (0=idle, 1=headers, 2=account_ranges, 3=account_insertion, 4=storage_ranges, 5=storage_insertion, 6=healing, 7=bytecodes)" ) .expect("Failed to create current_phase metric"), - pivot_updates: IntCounterVec::new( - Opts::new( - "ethrex_sync_pivot_updates_total", - "Total pivot update attempts by outcome", - ), - &["outcome"], + pivot_updates: register_int_counter_vec!( + "ethrex_sync_pivot_updates_total", + "Total pivot update attempts by outcome", + &["outcome"] ) .expect("Failed to create pivot_updates metric"), - storage_requests: IntCounterVec::new( - Opts::new( - "ethrex_sync_storage_requests_total", - "Total storage range requests by outcome", - ), - &["outcome"], + storage_requests: register_int_counter_vec!( + "ethrex_sync_storage_requests_total", + "Total storage range requests by outcome", + &["outcome"] ) .expect("Failed to create storage_requests metric"), - header_resolution: IntCounterVec::new( - Opts::new( - "ethrex_sync_header_resolution_total", - "Total header resolution attempts by outcome", - ), - &["outcome"], + header_resolution: register_int_counter_vec!( + "ethrex_sync_header_resolution_total", + "Total header resolution attempts by outcome", + &["outcome"] ) .expect("Failed to create header_resolution metric"), } @@ -116,39 +111,4 @@ impl MetricsSync { pub fn inc_header_resolution(&self, outcome: &str) { self.header_resolution.with_label_values(&[outcome]).inc(); } - - // --- Gather --- - - pub fn gather_metrics(&self) -> Result { - let r = Registry::new(); - - r.register(Box::new(self.eligible_peers.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - r.register(Box::new(self.snap_peers.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - r.register(Box::new(self.inflight_requests.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - r.register(Box::new(self.pivot_age_seconds.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - r.register(Box::new(self.current_phase.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - r.register(Box::new(self.pivot_updates.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - r.register(Box::new(self.storage_requests.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - r.register(Box::new(self.header_resolution.clone())) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - - let encoder = TextEncoder::new(); - let metric_families = r.gather(); - - let mut buffer = Vec::new(); - encoder - .encode(&metric_families, &mut buffer) - .map_err(|e| MetricsError::PrometheusErr(e.to_string()))?; - - let res = String::from_utf8(buffer)?; - - Ok(res) - } } diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 95a6e9f809c..2d0fca5cbf3 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -161,6 +161,7 @@ impl Syncer { // If the error is irrecoverable, we exit ethrex Err(error) => { let recoverable = error.is_recoverable(); + self.diagnostics.write().await.current_phase = "idle".to_string(); debug!( error_type = %error, recoverable = recoverable, diff --git a/crates/networking/p2p/sync_manager.rs b/crates/networking/p2p/sync_manager.rs index 6866f02cc42..c124d581c0d 100644 --- a/crates/networking/p2p/sync_manager.rs +++ b/crates/networking/p2p/sync_manager.rs @@ -141,11 +141,11 @@ impl SyncManager { } // Populate live progress from METRICS atomics - let headers = METRICS.downloaded_headers.get() as u64; + let headers = METRICS.downloaded_headers.get(); let accounts_downloaded = METRICS.downloaded_account_tries.load(Relaxed); let accounts_inserted = METRICS.account_tries_inserted.load(Relaxed); - let storage_downloaded = METRICS.storage_leaves_downloaded.get() as u64; - let storage_inserted = METRICS.storage_leaves_inserted.get() as u64; + let storage_downloaded = METRICS.storage_leaves_downloaded.get(); + let storage_inserted = METRICS.storage_leaves_inserted.get(); if headers > 0 { diag.phase_progress diff --git a/tooling/repl/src/formatter.rs b/tooling/repl/src/formatter.rs index f7c1049fe9e..e343a27be7e 100644 --- a/tooling/repl/src/formatter.rs +++ b/tooling/repl/src/formatter.rs @@ -184,7 +184,7 @@ fn format_object_array_table(arr: &[Value]) -> String { .map(|item| { columns .iter() - .map(|col| item.get(col).map(|v| inline_value(v)).unwrap_or_default()) + .map(|col| item.get(col).map(inline_value).unwrap_or_default()) .collect() }) .collect(); diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index bd5902f5c27..dc879449bba 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -67,7 +67,6 @@ DIAGNOSTICS_NORMAL_BUFFER_SIZE = 20 # snapshots kept in normal mode DIAGNOSTICS_DEGRADED_BUFFER_SIZE = 60 # snapshots kept in degraded mode DEGRADATION_ELIGIBLE_PEERS_THRESHOLD = 5 # trigger if eligible peers below this -DEGRADATION_STALL_TIMEOUT = 60 # trigger if zero progress for this many seconds DEGRADATION_STALENESS_RATIO = 0.8 # trigger if pivot age > 80% of threshold DEGRADATION_RECOVERY_TIMEOUT = 60 # seconds of health before leaving degraded mode LOG_LEVEL_NORMAL = "info,ethrex_p2p::sync=debug" @@ -97,11 +96,11 @@ def buffer_limit(self, name: str) -> int: def should_poll(self, name: str) -> bool: return (time.time() - self.last_poll[name]) >= self.poll_interval(name) - def poll(self, inst) -> None: + def poll(self, inst, force: bool = False) -> None: """Poll diagnostics RPC endpoints for a single instance.""" - if inst.status in ("success", "failed", "waiting"): + if not force and inst.status in ("success", "failed", "waiting"): return - if not self.should_poll(inst.name): + if not force and not self.should_poll(inst.name): return self.last_poll[inst.name] = time.time() @@ -146,8 +145,7 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: progress_key = str(snapshot["sync_status"].get("phase_progress", {})) if phase not in ("idle", ""): if self._last_progress[name] is not None and self._last_progress[name] == progress_key: - # No progress change — but we only flag after DEGRADATION_STALL_TIMEOUT - pass # tracked by the outer loop via last_block + pass # stall detection not yet implemented self._last_progress[name] = progress_key # Check staleness ratio @@ -208,8 +206,7 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: def on_failure(self, inst, name: str) -> None: """Called when a network fails — do a final poll and dump snapshots.""" # Do one last poll to capture the state at failure time - self.last_poll[name] = 0 # force immediate poll - self.poll(inst) + self.poll(inst, force=True) # Bump log level to capture any post-failure details rpc_set_log_level(inst.rpc_url, LOG_LEVEL_DEGRADED) # Always dump on failure, even if previously dumped for degradation diff --git a/tooling/sync/peer_top.sh b/tooling/sync/peer_top.sh index ba8bf5cb6a2..e6cef0ae100 100644 --- a/tooling/sync/peer_top.sh +++ b/tooling/sync/peer_top.sh @@ -1,28 +1,34 @@ #!/bin/bash +# Lightweight peer viewer (no 'requests' dependency) — uses watch + curl. +# For the full TUI, use: python3 peer_top.py [endpoint] +# # Usage: ./peer_top.sh [endpoint] [interval] # Example: ./peer_top.sh http://localhost:18547 1 ENDPOINT="${1:-http://localhost:18547}" INTERVAL="${2:-1}" -watch -n "$INTERVAL" "curl -s -X POST $ENDPOINT \ - -H 'Content-Type: application/json' \ - -d '{\"jsonrpc\":\"2.0\",\"method\":\"admin_peerScores\",\"params\":[],\"id\":1}' \ - | python3 -c ' +watch -n "$INTERVAL" 'curl -s -X POST '"$ENDPOINT"' \ + -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"admin_peerScores\",\"params\":[],\"id\":1}" \ + | python3 -c " import json, sys try: d = json.load(sys.stdin)[\"result\"] s = d[\"summary\"] - print(f\"Peers: {s[\"total_peers\"]} Eligible: {s[\"eligible_peers\"]} Avg Score: {s[\"average_score\"]} Inflight: {s[\"total_inflight_requests\"]}\") + print(\"Peers: {} Eligible: {} Avg Score: {} Inflight: {}\".format( + s[\"total_peers\"], s[\"eligible_peers\"], s[\"average_score\"], s[\"total_inflight_requests\"])) print() - print(f\"{\"Peer ID\":>14} {\"Score\":>6} {\"Reqs\":>5} {\"Elig\":>5} {\"Caps\":>12} {\"Dir\":>8} {\"Client\":>30}\") + print(\"{:>14} {:>6} {:>5} {:>5} {:>12} {:>8} {:>30}\".format( + \"Peer ID\", \"Score\", \"Reqs\", \"Elig\", \"Caps\", \"Dir\", \"Client\")) print(\"-\" * 86) for p in sorted(d[\"peers\"], key=lambda x: x[\"score\"], reverse=True): pid = p[\"peer_id\"][:6] + \"..\" + p[\"peer_id\"][-4:] - caps = \",\".join(p[\"capabilities\"]) + caps = \",\".join(p[\"capabilities\"])[:12] client = p[\"client_version\"][:30] d2 = p[\"connection_direction\"][:3] - print(f\"{pid:>14} {p[\"score\"]:>6} {p[\"inflight_requests\"]:>5} {str(p[\"eligible\"]):>5} {caps:>12} {d2:>8} {client:>30}\") + print(\"{:>14} {:>6} {:>5} {:>5} {:>12} {:>8} {:>30}\".format( + pid, p[\"score\"], p[\"inflight_requests\"], p[\"eligible\"], caps, d2, client)) except Exception as e: - print(f\"Error: {e}\") -'" + print(\"Error: {}\".format(e)) +"' From 68c9d9b901b20e8430fc6c56131886f6837b01f7 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 08:31:31 -0300 Subject: [PATCH 11/31] =?UTF-8?q?chore(l1):=20remove=20redundant=20peer=5F?= =?UTF-8?q?top.sh=20=E2=80=94=20peer=5Ftop.py=20is=20the=20canonical=20too?= =?UTF-8?q?l?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tooling/sync/peer_top.sh | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 tooling/sync/peer_top.sh diff --git a/tooling/sync/peer_top.sh b/tooling/sync/peer_top.sh deleted file mode 100644 index e6cef0ae100..00000000000 --- a/tooling/sync/peer_top.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -# Lightweight peer viewer (no 'requests' dependency) — uses watch + curl. -# For the full TUI, use: python3 peer_top.py [endpoint] -# -# Usage: ./peer_top.sh [endpoint] [interval] -# Example: ./peer_top.sh http://localhost:18547 1 - -ENDPOINT="${1:-http://localhost:18547}" -INTERVAL="${2:-1}" - -watch -n "$INTERVAL" 'curl -s -X POST '"$ENDPOINT"' \ - -H "Content-Type: application/json" \ - -d "{\"jsonrpc\":\"2.0\",\"method\":\"admin_peerScores\",\"params\":[],\"id\":1}" \ - | python3 -c " -import json, sys -try: - d = json.load(sys.stdin)[\"result\"] - s = d[\"summary\"] - print(\"Peers: {} Eligible: {} Avg Score: {} Inflight: {}\".format( - s[\"total_peers\"], s[\"eligible_peers\"], s[\"average_score\"], s[\"total_inflight_requests\"])) - print() - print(\"{:>14} {:>6} {:>5} {:>5} {:>12} {:>8} {:>30}\".format( - \"Peer ID\", \"Score\", \"Reqs\", \"Elig\", \"Caps\", \"Dir\", \"Client\")) - print(\"-\" * 86) - for p in sorted(d[\"peers\"], key=lambda x: x[\"score\"], reverse=True): - pid = p[\"peer_id\"][:6] + \"..\" + p[\"peer_id\"][-4:] - caps = \",\".join(p[\"capabilities\"])[:12] - client = p[\"client_version\"][:30] - d2 = p[\"connection_direction\"][:3] - print(\"{:>14} {:>6} {:>5} {:>5} {:>12} {:>8} {:>30}\".format( - pid, p[\"score\"], p[\"inflight_requests\"], p[\"eligible\"], caps, d2, client)) -except Exception as e: - print(\"Error: {}\".format(e)) -"' From 14b22c5da227be20f392c41308dfc298e316290f Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 08:45:15 -0300 Subject: [PATCH 12/31] refactor(l1): make watched phases configurable in monitor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename hardcoded "healing" trigger to configurable WATCHED_PHASES set. Phases in this set get TRACE logging and fast polling — useful for investigating specific sync stages without editing the script. Default: healing (current investigation target). Override via --watched-phases flag, e.g. --watched-phases "healing,storage_insertion" --- tooling/sync/docker_monitor.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index dc879449bba..6d66e86dc7d 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -69,6 +69,7 @@ DEGRADATION_ELIGIBLE_PEERS_THRESHOLD = 5 # trigger if eligible peers below this DEGRADATION_STALENESS_RATIO = 0.8 # trigger if pivot age > 80% of threshold DEGRADATION_RECOVERY_TIMEOUT = 60 # seconds of health before leaving degraded mode +WATCHED_PHASES: set[str] = {"healing"} # phases that warrant closer monitoring (TRACE + fast polling) LOG_LEVEL_NORMAL = "info,ethrex_p2p::sync=debug" LOG_LEVEL_DEGRADED = "info,ethrex_p2p=trace" @@ -157,8 +158,8 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: reasons.append(f"staleness_ratio={ratio:.2f}") # Healing phase is high-risk for pivot failures — increase polling - if phase == "healing": - reasons.append("healing_phase") + if phase in WATCHED_PHASES: + reasons.append(f"watched_phase:{phase}") if reasons: if not self.degraded[name]: @@ -911,8 +912,14 @@ def main(): help="Docker image tag to build") p.add_argument("--ethrex-dir", default=os.environ.get("ETHREX_DIR", "../.."), help="Path to ethrex repository root") + p.add_argument("--watched-phases", default=",".join(WATCHED_PHASES), + help="Comma-separated sync phases that trigger TRACE logging and fast polling (default: healing)") args = p.parse_args() + # Apply CLI override for watched phases + global WATCHED_PHASES + WATCHED_PHASES = {p.strip() for p in args.watched_phases.split(",") if p.strip()} + # Resolve ethrex directory to absolute path ethrex_dir = os.path.abspath(args.ethrex_dir) From e217075985f5bbd8ec896fbaa360488997df6d19 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 08:47:35 -0300 Subject: [PATCH 13/31] =?UTF-8?q?fix(l1):=20default=20watched=20phases=20t?= =?UTF-8?q?o=20empty=20=E2=80=94=20opt-in,=20not=20opt-out?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tooling/sync/docker_monitor.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index 6d66e86dc7d..49be8bc56f7 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -69,7 +69,16 @@ DEGRADATION_ELIGIBLE_PEERS_THRESHOLD = 5 # trigger if eligible peers below this DEGRADATION_STALENESS_RATIO = 0.8 # trigger if pivot age > 80% of threshold DEGRADATION_RECOVERY_TIMEOUT = 60 # seconds of health before leaving degraded mode -WATCHED_PHASES: set[str] = {"healing"} # phases that warrant closer monitoring (TRACE + fast polling) +# Watched phases: sync phases that warrant closer monitoring. +# When the node enters a watched phase, the monitor bumps the log level to +# TRACE (via admin_setLogLevel) and switches to fast polling (5s intervals). +# This is useful for investigating specific sync stages — e.g. "healing" is +# where pivot-update failures tend to occur. +# +# Default: empty (no phases watched). Set via --watched-phases CLI flag: +# --watched-phases "healing" +# --watched-phases "healing,storage_insertion" +WATCHED_PHASES: set[str] = set() LOG_LEVEL_NORMAL = "info,ethrex_p2p::sync=debug" LOG_LEVEL_DEGRADED = "info,ethrex_p2p=trace" From fc87aa6d5a6d6cd2974dd16f2960148c5c64b935 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 10:04:01 -0300 Subject: [PATCH 14/31] feat(l1): add MULTISYNC_WATCHED_PHASES to Makefile for opt-in TRACE --- tooling/sync/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tooling/sync/Makefile b/tooling/sync/Makefile index 0343373b30d..1f2426bad9c 100644 --- a/tooling/sync/Makefile +++ b/tooling/sync/Makefile @@ -251,6 +251,9 @@ MULTISYNC_BUILD_PROFILE ?= release-with-debug-assertions MULTISYNC_LOCAL_IMAGE ?= ethrex-local:multisync # Branch to track for auto-update mode (defaults to current branch if not set) MULTISYNC_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD) +# Sync phases that trigger TRACE logging and fast polling in the monitor. +# Empty by default (no extra tracing). Example: MULTISYNC_WATCHED_PHASES=healing +MULTISYNC_WATCHED_PHASES ?= multisync-up: ## Start all networks specified in MULTISYNC_NETWORKS via Docker Compose. $(MULTISYNC_COMPOSE) up -d $(MULTISYNC_SERVICES) @@ -341,4 +344,5 @@ multisync-loop-auto: ## Continuous loop with auto-update: pull latest, build, an --branch "$(MULTISYNC_BRANCH)" \ --build-profile "$(MULTISYNC_BUILD_PROFILE)" \ --image-tag "$(MULTISYNC_LOCAL_IMAGE)" \ - --ethrex-dir "$(ETHREX_DIR)" + --ethrex-dir "$(ETHREX_DIR)" \ + $(if $(MULTISYNC_WATCHED_PHASES),--watched-phases "$(MULTISYNC_WATCHED_PHASES)") From 572fa9c99bc00cf8eb0fb1c29756ec30064208d9 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 12:13:55 -0300 Subject: [PATCH 15/31] fix(l1): remove internal jargon from code comment --- crates/blockchain/metrics/api.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/blockchain/metrics/api.rs b/crates/blockchain/metrics/api.rs index a08d458585d..9c0237645b6 100644 --- a/crates/blockchain/metrics/api.rs +++ b/crates/blockchain/metrics/api.rs @@ -54,7 +54,7 @@ pub(crate) async fn get_metrics() -> String { Err(_) => tracing::error!("Failed to gather METRICS_P2P"), }; - // METRICS_SYNC uses the default Prometheus registry (Pattern B), + // METRICS_SYNC registers into the default Prometheus registry at init, // so its metrics are already included in gather_default_metrics() above. ret_string.push('\n'); From 11b38ff8bdb4859a3237c4bf040c7386eb048959 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 16:33:25 -0300 Subject: [PATCH 16/31] feat(l1): add snap sync progress metrics and Grafana dashboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incorporate progress metrics from PR #6468 (Tomi/Esteve) into the observability PR, with improvements: - Add progress gauges: headers, accounts, storage, healing, bytecodes (downloaded/inserted/total) + stage + pivot_block - Push from METRICS atomics via push_sync_prometheus_metrics() in network.rs, called each polling cycle and on phase completion - Grafana dashboard with 7 rows: overview, peer health, headers, accounts, storage, healing, bytecodes — with progress gauges, rate panels (using Grafana rate() instead of app-computed rates), and ETA - All metrics use default Prometheus registry (register at init) - New peer-health row with eligible peers, pivot age, inflight requests, and pivot update outcomes — not present in the original PR Supersedes #6468. --- crates/blockchain/metrics/sync.rs | 124 +- crates/networking/p2p/network.rs | 64 + .../common_dashboards/snapsync_dashboard.json | 1317 +++++++++++++++++ 3 files changed, 1480 insertions(+), 25 deletions(-) create mode 100644 metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json diff --git a/crates/blockchain/metrics/sync.rs b/crates/blockchain/metrics/sync.rs index e0d14fc2743..36abb2b7717 100644 --- a/crates/blockchain/metrics/sync.rs +++ b/crates/blockchain/metrics/sync.rs @@ -8,17 +8,31 @@ pub static METRICS_SYNC: LazyLock = LazyLock::new(MetricsSync::defa #[derive(Debug, Clone)] pub struct MetricsSync { - // Gauges — current state - eligible_peers: IntGauge, - snap_peers: IntGauge, - inflight_requests: IntGauge, - pivot_age_seconds: IntGauge, - current_phase: IntGauge, - - // Counters — cumulative outcomes - pivot_updates: IntCounterVec, - storage_requests: IntCounterVec, - header_resolution: IntCounterVec, + // --- Current state (gauges) --- + pub stage: IntGauge, + pub pivot_block: IntGauge, + pub eligible_peers: IntGauge, + pub snap_peers: IntGauge, + pub inflight_requests: IntGauge, + pub pivot_age_seconds: IntGauge, + + // --- Progress counters (gauges set from METRICS atomics) --- + // Use rate() in Grafana to derive throughput. + pub headers_downloaded: IntGauge, + pub headers_total: IntGauge, + pub accounts_downloaded: IntGauge, + pub accounts_inserted: IntGauge, + pub storage_downloaded: IntGauge, + pub storage_inserted: IntGauge, + pub state_leaves_healed: IntGauge, + pub storage_leaves_healed: IntGauge, + pub bytecodes_downloaded: IntGauge, + pub bytecodes_total: IntGauge, + + // --- Outcome counters (counter vecs) --- + pub pivot_updates: IntCounterVec, + pub storage_requests: IntCounterVec, + pub header_resolution: IntCounterVec, } impl Default for MetricsSync { @@ -30,53 +44,113 @@ impl Default for MetricsSync { impl MetricsSync { pub fn new() -> Self { MetricsSync { + // Current state + stage: register_int_gauge!( + "ethrex_sync_stage", + "Current snap sync stage (0=idle, 1=headers, 2=account_ranges, 3=account_insertion, 4=storage_ranges, 5=storage_insertion, 6=state_healing, 7=storage_healing, 8=bytecodes)" + ) + .expect("Failed to create ethrex_sync_stage"), + pivot_block: register_int_gauge!( + "ethrex_sync_pivot_block", + "Current pivot block number" + ) + .expect("Failed to create ethrex_sync_pivot_block"), eligible_peers: register_int_gauge!( "ethrex_sync_eligible_peers", - "Number of peers eligible for requests (passing can_try_more_requests)" + "Number of peers eligible for requests" ) - .expect("Failed to create eligible_peers metric"), + .expect("Failed to create ethrex_sync_eligible_peers"), snap_peers: register_int_gauge!( "ethrex_sync_snap_peers", "Number of connected peers supporting the snap protocol" ) - .expect("Failed to create snap_peers metric"), + .expect("Failed to create ethrex_sync_snap_peers"), inflight_requests: register_int_gauge!( "ethrex_sync_inflight_requests", "Total inflight requests across all peers" ) - .expect("Failed to create inflight_requests metric"), + .expect("Failed to create ethrex_sync_inflight_requests"), pivot_age_seconds: register_int_gauge!( "ethrex_sync_pivot_age_seconds", "Age of the current pivot block in seconds" ) - .expect("Failed to create pivot_age_seconds metric"), - current_phase: register_int_gauge!( - "ethrex_sync_current_phase", - "Current snap sync phase (0=idle, 1=headers, 2=account_ranges, 3=account_insertion, 4=storage_ranges, 5=storage_insertion, 6=healing, 7=bytecodes)" + .expect("Failed to create ethrex_sync_pivot_age_seconds"), + + // Progress (set periodically from METRICS atomics) + headers_downloaded: register_int_gauge!( + "ethrex_sync_headers_downloaded", + "Headers downloaded so far" + ) + .expect("Failed to create ethrex_sync_headers_downloaded"), + headers_total: register_int_gauge!( + "ethrex_sync_headers_total", + "Total headers to download (pivot block number)" + ) + .expect("Failed to create ethrex_sync_headers_total"), + accounts_downloaded: register_int_gauge!( + "ethrex_sync_accounts_downloaded", + "Account ranges downloaded from peers" + ) + .expect("Failed to create ethrex_sync_accounts_downloaded"), + accounts_inserted: register_int_gauge!( + "ethrex_sync_accounts_inserted", + "Accounts inserted into storage" + ) + .expect("Failed to create ethrex_sync_accounts_inserted"), + storage_downloaded: register_int_gauge!( + "ethrex_sync_storage_downloaded", + "Storage leaves downloaded from peers" ) - .expect("Failed to create current_phase metric"), + .expect("Failed to create ethrex_sync_storage_downloaded"), + storage_inserted: register_int_gauge!( + "ethrex_sync_storage_inserted", + "Storage leaves inserted into storage" + ) + .expect("Failed to create ethrex_sync_storage_inserted"), + state_leaves_healed: register_int_gauge!( + "ethrex_sync_state_leaves_healed", + "State trie leaves healed" + ) + .expect("Failed to create ethrex_sync_state_leaves_healed"), + storage_leaves_healed: register_int_gauge!( + "ethrex_sync_storage_leaves_healed", + "Storage trie leaves healed" + ) + .expect("Failed to create ethrex_sync_storage_leaves_healed"), + bytecodes_downloaded: register_int_gauge!( + "ethrex_sync_bytecodes_downloaded", + "Bytecodes downloaded so far" + ) + .expect("Failed to create ethrex_sync_bytecodes_downloaded"), + bytecodes_total: register_int_gauge!( + "ethrex_sync_bytecodes_total", + "Total bytecodes to download" + ) + .expect("Failed to create ethrex_sync_bytecodes_total"), + + // Outcome counters pivot_updates: register_int_counter_vec!( "ethrex_sync_pivot_updates_total", "Total pivot update attempts by outcome", &["outcome"] ) - .expect("Failed to create pivot_updates metric"), + .expect("Failed to create ethrex_sync_pivot_updates_total"), storage_requests: register_int_counter_vec!( "ethrex_sync_storage_requests_total", "Total storage range requests by outcome", &["outcome"] ) - .expect("Failed to create storage_requests metric"), + .expect("Failed to create ethrex_sync_storage_requests_total"), header_resolution: register_int_counter_vec!( "ethrex_sync_header_resolution_total", "Total header resolution attempts by outcome", &["outcome"] ) - .expect("Failed to create header_resolution metric"), + .expect("Failed to create ethrex_sync_header_resolution_total"), } } - // --- Gauge setters --- + // --- Gauge setters (used by p2p sync code directly) --- pub fn set_eligible_peers(&self, count: i64) { self.eligible_peers.set(count); @@ -95,7 +169,7 @@ impl MetricsSync { } pub fn set_current_phase(&self, phase: i64) { - self.current_phase.set(phase); + self.stage.set(phase); } // --- Counter incrementers --- diff --git a/crates/networking/p2p/network.rs b/crates/networking/p2p/network.rs index 7d224c7cac3..1e988e10e45 100644 --- a/crates/networking/p2p/network.rs +++ b/crates/networking/p2p/network.rs @@ -390,6 +390,10 @@ pub async fn periodically_show_peer_stats_during_syncing( phase_elapsed_str, &phase_metrics(previous_step, &phase_start).await, ); + + // Emit final metrics for completed phase + #[cfg(feature = "metrics")] + push_sync_prometheus_metrics(previous_step); } // Start new phase @@ -416,6 +420,10 @@ pub async fn periodically_show_peer_stats_during_syncing( ) .await; + // Push progress to Prometheus + #[cfg(feature = "metrics")] + push_sync_prometheus_metrics(current_step); + // Update previous interval counters for next rate calculation prev_interval = PhaseCounters::capture_current(); @@ -684,6 +692,62 @@ async fn log_phase_progress( } } +/// Push snap sync progress to Prometheus gauges (from METRICS atomics). +/// Called each polling cycle. Rates are NOT computed here — use rate() in Grafana. +#[cfg(feature = "metrics")] +fn push_sync_prometheus_metrics(step: CurrentStepValue) { + use ethrex_metrics::sync::METRICS_SYNC; + use std::sync::atomic::Ordering::Relaxed; + + let (phase_num, _) = phase_info(step); + METRICS_SYNC.stage.set(phase_num as i64); + METRICS_SYNC + .pivot_block + .set(METRICS.sync_head_block.load(Relaxed) as i64); + + match step { + CurrentStepValue::DownloadingHeaders => { + let total = METRICS.sync_head_block.load(Relaxed); + let downloaded = u64::min(METRICS.downloaded_headers.get(), total); + METRICS_SYNC.headers_downloaded.set(downloaded as i64); + METRICS_SYNC.headers_total.set(total as i64); + } + CurrentStepValue::RequestingAccountRanges => { + let downloaded = METRICS.downloaded_account_tries.load(Relaxed); + METRICS_SYNC.accounts_downloaded.set(downloaded as i64); + } + CurrentStepValue::InsertingAccountRanges | CurrentStepValue::InsertingAccountRangesNoDb => { + let total = METRICS.downloaded_account_tries.load(Relaxed); + let inserted = METRICS.account_tries_inserted.load(Relaxed); + METRICS_SYNC.accounts_downloaded.set(total as i64); + METRICS_SYNC.accounts_inserted.set(inserted as i64); + } + CurrentStepValue::RequestingStorageRanges => { + let downloaded = METRICS.storage_leaves_downloaded.get(); + METRICS_SYNC.storage_downloaded.set(downloaded as i64); + } + CurrentStepValue::InsertingStorageRanges => { + let inserted = METRICS.storage_leaves_inserted.get(); + METRICS_SYNC.storage_inserted.set(inserted as i64); + } + CurrentStepValue::HealingState => { + let healed = METRICS.global_state_trie_leafs_healed.load(Relaxed); + METRICS_SYNC.state_leaves_healed.set(healed as i64); + } + CurrentStepValue::HealingStorage => { + let healed = METRICS.global_storage_tries_leafs_healed.load(Relaxed); + METRICS_SYNC.storage_leaves_healed.set(healed as i64); + } + CurrentStepValue::RequestingBytecodes => { + let total = METRICS.bytecodes_to_download.load(Relaxed); + let downloaded = METRICS.downloaded_bytecodes.load(Relaxed); + METRICS_SYNC.bytecodes_downloaded.set(downloaded as i64); + METRICS_SYNC.bytecodes_total.set(total as i64); + } + CurrentStepValue::None => {} + } +} + fn progress_bar(percentage: f64, width: usize) -> String { let clamped_percentage = percentage.clamp(0.0, 100.0); let filled = ((clamped_percentage / 100.0) * width as f64) as usize; diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json new file mode 100644 index 00000000000..3d85d9b6673 --- /dev/null +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json @@ -0,0 +1,1317 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "Sync Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [ + { "options": { "0": { "text": "Idle", "color": "text", "index": 0 } }, "type": "value" }, + { "options": { "1": { "text": "Headers", "color": "blue", "index": 1 } }, "type": "value" }, + { "options": { "2": { "text": "Account Ranges", "color": "orange", "index": 2 } }, "type": "value" }, + { "options": { "3": { "text": "Account Insertion", "color": "yellow", "index": 3 } }, "type": "value" }, + { "options": { "4": { "text": "Storage Ranges", "color": "purple", "index": 4 } }, "type": "value" }, + { "options": { "5": { "text": "Storage Insertion", "color": "light-purple", "index": 5 } }, "type": "value" }, + { "options": { "6": { "text": "State Healing", "color": "green", "index": 6 } }, "type": "value" }, + { "options": { "7": { "text": "Storage Healing", "color": "light-green", "index": 7 } }, "type": "value" }, + { "options": { "8": { "text": "Bytecodes", "color": "super-light-blue", "index": 8 } }, "type": "value" } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "text", "value": null }, + { "color": "blue", "value": 1 }, + { "color": "orange", "value": 2 }, + { "color": "yellow", "value": 3 }, + { "color": "purple", "value": 4 }, + { "color": "light-purple", "value": 5 }, + { "color": "green", "value": 6 }, + { "color": "light-green", "value": 7 }, + { "color": "super-light-blue", "value": 8 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_stage{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Sync Stage", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_pivot_block{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Pivot Block", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 5 }, + { "color": "green", "value": 20 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_p2p_peer_count{instance=~\"$instance\"}", + "legendFormat": "peers", + "range": true, + "refId": "A" + } + ], + "title": "Peers", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 0.1 }, + { "color": "green", "value": 0.5 } + ] + }, + "unit": "Ggas/s" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "gigagas{instance=~\"$instance\"}", + "legendFormat": "Ggas/s", + "range": true, + "refId": "A" + } + ], + "title": "Throughput Ggas/s", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "title": "Peer Health", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 5 }, + { "color": "green", "value": 20 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 6 }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_eligible_peers{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "eligible", + "range": false, + "refId": "A" + } + ], + "title": "Eligible Peers", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 600 }, + { "color": "red", "value": 1200 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 6 }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_pivot_age_seconds{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "age", + "range": false, + "refId": "A" + } + ], + "title": "Pivot Age", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "red", "value": 100 } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 6 }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_inflight_requests{instance=~\"$instance\"}", + "legendFormat": "inflight", + "range": true, + "refId": "A" + } + ], + "title": "Inflight Requests", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 6 }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\"})", + "instant": true, + "legendFormat": "total", + "range": false, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\", outcome=\"success\"})", + "instant": true, + "legendFormat": "success", + "range": false, + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\", outcome=\"failure\"})", + "instant": true, + "legendFormat": "failure", + "range": false, + "refId": "C" + } + ], + "title": "Pivot Updates", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 10 }, + "id": 102, + "title": "Header Download", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "green", "value": 90 } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 0, "y": 11 }, + "id": 9, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_headers_total{instance=~\"$instance\"}, 1) * 100", + "instant": true, + "legendFormat": "progress", + "range": false, + "refId": "A" + } + ], + "title": "Header Progress", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 10000, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 100 }, + { "color": "green", "value": 1000 } + ] + }, + "unit": "headers/s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 6, "y": 11 }, + "id": 10, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m])", + "instant": true, + "legendFormat": "rate", + "range": false, + "refId": "A" + } + ], + "title": "Headers per Second", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 3600 }, + { "color": "red", "value": 86400 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 12, "y": 11 }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "(ethrex_sync_headers_total{instance=~\"$instance\"} - ethrex_sync_headers_downloaded{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]) > 0", + "instant": true, + "legendFormat": "ETA", + "range": false, + "refId": "A" + } + ], + "title": "Header ETA", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 18, "y": 11 }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "downloaded", + "range": false, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_headers_total{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "total", + "range": false, + "refId": "B" + } + ], + "title": "Header Counts", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 }, + "id": 103, + "title": "Account Ranges", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "green", "value": 90 } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 0, "y": 19 }, + "id": 13, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"} / clamp_min(ethrex_sync_accounts_downloaded{instance=~\"$instance\"}, 1) * 100", + "instant": true, + "legendFormat": "progress", + "range": false, + "refId": "A" + } + ], + "title": "Account Progress", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 10000, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 100 }, + { "color": "green", "value": 1000 } + ] + }, + "unit": "accounts/s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 6, "y": 19 }, + "id": 14, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m])", + "instant": true, + "legendFormat": "rate", + "range": false, + "refId": "A" + } + ], + "title": "Accounts per Second", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 3600 }, + { "color": "red", "value": 86400 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 12, "y": 19 }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "(ethrex_sync_accounts_downloaded{instance=~\"$instance\"} - ethrex_sync_accounts_inserted{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]) > 0", + "instant": true, + "legendFormat": "ETA", + "range": false, + "refId": "A" + } + ], + "title": "Account ETA", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 6, "x": 18, "y": 19 }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_accounts_downloaded{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "downloaded", + "range": false, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "inserted", + "range": false, + "refId": "B" + } + ], + "title": "Account Counts", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }, + "id": 104, + "title": "Storage Ranges", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 27 }, + "id": 17, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_storage_downloaded{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "downloaded", + "range": false, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_storage_inserted{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "inserted", + "range": false, + "refId": "B" + } + ], + "title": "Storage Leaves", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 50000, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 500 }, + { "color": "green", "value": 5000 } + ] + }, + "unit": "leaves/s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 27 }, + "id": 18, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", + "instant": true, + "legendFormat": "rate", + "range": false, + "refId": "A" + } + ], + "title": "Storage per Second", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "leaves/s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 27 }, + "id": 19, + "options": { + "legend": { "calcs": ["mean", "lastNotNull"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_storage_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "downloaded/s", + "range": true, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", + "legendFormat": "inserted/s", + "range": true, + "refId": "B" + } + ], + "title": "Storage Rate Over Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 34 }, + "id": 105, + "title": "Healing", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 35 }, + "id": 20, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_state_leaves_healed{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "state", + "range": false, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "storage", + "range": false, + "refId": "B" + } + ], + "title": "Leaves Healed", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 10000, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 100 }, + { "color": "green", "value": 1000 } + ] + }, + "unit": "leaves/s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 35 }, + "id": 21, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m]) + rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", + "instant": true, + "legendFormat": "rate", + "range": false, + "refId": "A" + } + ], + "title": "Healing per Second", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null } + ] + }, + "unit": "leaves/s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 35 }, + "id": 22, + "options": { + "legend": { "calcs": ["mean", "lastNotNull"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m])", + "legendFormat": "state/s", + "range": true, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", + "legendFormat": "storage/s", + "range": true, + "refId": "B" + } + ], + "title": "Healing Rate Over Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 42 }, + "id": 106, + "title": "Bytecodes", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "green", "value": 90 } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 0, "y": 43 }, + "id": 23, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_bytecodes_total{instance=~\"$instance\"}, 1) * 100", + "instant": true, + "legendFormat": "progress", + "range": false, + "refId": "A" + } + ], + "title": "Bytecode Progress", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "max": 5000, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "green", "value": 500 } + ] + }, + "unit": "codes/s" + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 8, "y": 43 }, + "id": 24, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m])", + "instant": true, + "legendFormat": "rate", + "range": false, + "refId": "A" + } + ], + "title": "Bytecodes per Second", + "type": "gauge" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "blue", "value": null } + ] + } + }, + "overrides": [] + }, + "gridPos": { "h": 7, "w": 8, "x": 16, "y": 43 }, + "id": 25, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value_and_name" + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "downloaded", + "range": false, + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "editorMode": "code", + "expr": "ethrex_sync_bytecodes_total{instance=~\"$instance\"}", + "instant": true, + "legendFormat": "total", + "range": false, + "refId": "B" + } + ], + "title": "Bytecode Counts", + "type": "stat" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": ["ethrex", "snapsync"], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "${DS_PROMETHEUS}" + }, + "hide": 2, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "type": "datasource" + }, + { + "current": { + "text": "localhost:3701", + "value": "localhost:3701" + }, + "definition": "label_values(ethrex_sync_stage, instance)", + "includeAll": false, + "multi": false, + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(ethrex_sync_stage, instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "ethrex Snap Sync", + "uid": "ethrex-snapsync", + "version": 1 +} From 36e9d3d03a7a09a131530bd035151bac78f994f5 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 16:52:41 -0300 Subject: [PATCH 17/31] feat(l1): enable metrics in multisync containers, expose mainnet port 3701 --- tooling/sync/docker-compose.multisync.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tooling/sync/docker-compose.multisync.yaml b/tooling/sync/docker-compose.multisync.yaml index f5ab6888b7d..860ba1025e7 100644 --- a/tooling/sync/docker-compose.multisync.yaml +++ b/tooling/sync/docker-compose.multisync.yaml @@ -33,6 +33,8 @@ x-ethrex-common: ðrex-common ulimits: nofile: 1000000 restart: unless-stopped + # Metrics are exposed via --metrics in each container's command. + # Only mainnet maps to host port 3701 (scraped by centralized Prometheus). x-consensus-common: &consensus-common image: sigp/lighthouse:v8.0.1 @@ -82,6 +84,7 @@ services: --authrpc.jwtsecret /secrets/jwt.hex --syncmode snap --datadir /data + --metrics --metrics.addr 0.0.0.0 --metrics.port 3701 depends_on: setup-jwt-hoodi: condition: service_completed_successfully @@ -129,6 +132,7 @@ services: --authrpc.jwtsecret /secrets/jwt.hex --syncmode snap --datadir /data + --metrics --metrics.addr 0.0.0.0 --metrics.port 3701 depends_on: setup-jwt-sepolia: condition: service_completed_successfully @@ -166,6 +170,7 @@ services: container_name: ethrex-mainnet ports: - "8547:8545" # RPC on different host port + - "3701:3701" # Metrics (scraped by centralized Prometheus) volumes: - secrets-mainnet:/secrets - ethrex-mainnet:/data @@ -176,6 +181,7 @@ services: --authrpc.jwtsecret /secrets/jwt.hex --syncmode snap --datadir /data + --metrics --metrics.addr 0.0.0.0 --metrics.port 3701 depends_on: setup-jwt-mainnet: condition: service_completed_successfully @@ -223,6 +229,7 @@ services: --authrpc.jwtsecret /secrets/jwt.hex --syncmode snap --datadir /data + --metrics --metrics.addr 0.0.0.0 --metrics.port 3701 depends_on: setup-jwt-hoodi-2: condition: service_completed_successfully From ef39564539945e00f35a96c0e62db9b8745ff3e4 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 16:53:57 -0300 Subject: [PATCH 18/31] fix(l1): fix global declaration order in docker_monitor.py --- tooling/sync/docker_monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index 49be8bc56f7..196fe40dee0 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -903,6 +903,7 @@ def update_instance(inst: Instance, timeout_min: int) -> bool: def main(): + global WATCHED_PHASES p = argparse.ArgumentParser(description="Monitor Docker snapsync instances") p.add_argument("--networks", default="hoodi,sepolia,mainnet") p.add_argument("--timeout", type=int, default=SYNC_TIMEOUT) @@ -926,8 +927,7 @@ def main(): args = p.parse_args() # Apply CLI override for watched phases - global WATCHED_PHASES - WATCHED_PHASES = {p.strip() for p in args.watched_phases.split(",") if p.strip()} + WATCHED_PHASES = {ph.strip() for ph in args.watched_phases.split(",") if ph.strip()} # Resolve ethrex directory to absolute path ethrex_dir = os.path.abspath(args.ethrex_dir) From 9fef0cbe5eacbc2549bebd32f1f7e2e4d40cd637 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 17:18:24 -0300 Subject: [PATCH 19/31] =?UTF-8?q?fix(l1):=20expose=20pivot=5Ftimestamp=20g?= =?UTF-8?q?auge=20=E2=80=94=20Grafana=20computes=20age=20as=20time()=20-?= =?UTF-8?q?=20timestamp?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/blockchain/metrics/sync.rs | 6 + crates/networking/p2p/metrics.rs | 2 + crates/networking/p2p/network.rs | 6 + crates/networking/p2p/sync/snap_sync.rs | 6 + .../common_dashboards/snapsync_dashboard.json | 1292 ++++++++++++++--- 5 files changed, 1087 insertions(+), 225 deletions(-) diff --git a/crates/blockchain/metrics/sync.rs b/crates/blockchain/metrics/sync.rs index 36abb2b7717..dd94ea41504 100644 --- a/crates/blockchain/metrics/sync.rs +++ b/crates/blockchain/metrics/sync.rs @@ -15,6 +15,7 @@ pub struct MetricsSync { pub snap_peers: IntGauge, pub inflight_requests: IntGauge, pub pivot_age_seconds: IntGauge, + pub pivot_timestamp: IntGauge, // --- Progress counters (gauges set from METRICS atomics) --- // Use rate() in Grafana to derive throughput. @@ -75,6 +76,11 @@ impl MetricsSync { "Age of the current pivot block in seconds" ) .expect("Failed to create ethrex_sync_pivot_age_seconds"), + pivot_timestamp: register_int_gauge!( + "ethrex_sync_pivot_timestamp", + "Unix timestamp of the current pivot block (use time() - this for age in Grafana)" + ) + .expect("Failed to create ethrex_sync_pivot_timestamp"), // Progress (set periodically from METRICS atomics) headers_downloaded: register_int_gauge!( diff --git a/crates/networking/p2p/metrics.rs b/crates/networking/p2p/metrics.rs index e525aed8d06..35950ea4e21 100644 --- a/crates/networking/p2p/metrics.rs +++ b/crates/networking/p2p/metrics.rs @@ -57,6 +57,7 @@ pub struct Metrics { /* Snap Sync */ // Common pub sync_head_block: AtomicU64, + pub pivot_timestamp: AtomicU64, pub sync_head_hash: Arc>, pub current_step: Arc, @@ -708,6 +709,7 @@ impl Default for Metrics { /* Snap Sync */ // Common sync_head_block: AtomicU64::new(0), + pivot_timestamp: AtomicU64::new(0), sync_head_hash: Arc::new(Mutex::new(H256::default())), current_step: Arc::new(CurrentStep(AtomicU8::new(0))), diff --git a/crates/networking/p2p/network.rs b/crates/networking/p2p/network.rs index 1e988e10e45..1d9749e03a2 100644 --- a/crates/networking/p2p/network.rs +++ b/crates/networking/p2p/network.rs @@ -705,6 +705,12 @@ fn push_sync_prometheus_metrics(step: CurrentStepValue) { .pivot_block .set(METRICS.sync_head_block.load(Relaxed) as i64); + // Push raw pivot timestamp — Grafana computes age as time() - timestamp + let pivot_ts = METRICS.pivot_timestamp.load(Relaxed); + if pivot_ts > 0 { + METRICS_SYNC.pivot_timestamp.set(pivot_ts as i64); + } + match step { CurrentStepValue::DownloadingHeaders => { let total = METRICS.sync_head_block.load(Relaxed); diff --git a/crates/networking/p2p/sync/snap_sync.rs b/crates/networking/p2p/sync/snap_sync.rs index c5edd21e61b..1ed7b4b4d7f 100644 --- a/crates/networking/p2p/sync/snap_sync.rs +++ b/crates/networking/p2p/sync/snap_sync.rs @@ -308,6 +308,9 @@ pub async fn snap_sync( diag.pivot_age_seconds = Some(pivot_age); diag.staleness_threshold_seconds = (SNAP_LIMIT as u64) * SECONDS_PER_BLOCK; diag.sync_mode = "snap".to_string(); + METRICS + .pivot_timestamp + .store(pivot_header.timestamp, std::sync::atomic::Ordering::Relaxed); #[cfg(feature = "metrics")] ethrex_metrics::sync::METRICS_SYNC.set_pivot_age_seconds(pivot_age as i64); } @@ -827,6 +830,9 @@ pub async fn update_pivot( diag.pivot_timestamp = Some(pivot.timestamp); let pivot_age = current_unix_time().saturating_sub(pivot.timestamp); diag.pivot_age_seconds = Some(pivot_age); + METRICS + .pivot_timestamp + .store(pivot.timestamp, std::sync::atomic::Ordering::Relaxed); #[cfg(feature = "metrics")] ethrex_metrics::sync::METRICS_SYNC.set_pivot_age_seconds(pivot_age as i64); } diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json index 3d85d9b6673..28d654c0138 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json @@ -23,58 +23,190 @@ "panels": [ { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, "id": 100, "title": "Sync Overview", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [ - { "options": { "0": { "text": "Idle", "color": "text", "index": 0 } }, "type": "value" }, - { "options": { "1": { "text": "Headers", "color": "blue", "index": 1 } }, "type": "value" }, - { "options": { "2": { "text": "Account Ranges", "color": "orange", "index": 2 } }, "type": "value" }, - { "options": { "3": { "text": "Account Insertion", "color": "yellow", "index": 3 } }, "type": "value" }, - { "options": { "4": { "text": "Storage Ranges", "color": "purple", "index": 4 } }, "type": "value" }, - { "options": { "5": { "text": "Storage Insertion", "color": "light-purple", "index": 5 } }, "type": "value" }, - { "options": { "6": { "text": "State Healing", "color": "green", "index": 6 } }, "type": "value" }, - { "options": { "7": { "text": "Storage Healing", "color": "light-green", "index": 7 } }, "type": "value" }, - { "options": { "8": { "text": "Bytecodes", "color": "super-light-blue", "index": 8 } }, "type": "value" } + { + "options": { + "0": { + "text": "Idle", + "color": "text", + "index": 0 + } + }, + "type": "value" + }, + { + "options": { + "1": { + "text": "Headers", + "color": "blue", + "index": 1 + } + }, + "type": "value" + }, + { + "options": { + "2": { + "text": "Account Ranges", + "color": "orange", + "index": 2 + } + }, + "type": "value" + }, + { + "options": { + "3": { + "text": "Account Insertion", + "color": "yellow", + "index": 3 + } + }, + "type": "value" + }, + { + "options": { + "4": { + "text": "Storage Ranges", + "color": "purple", + "index": 4 + } + }, + "type": "value" + }, + { + "options": { + "5": { + "text": "Storage Insertion", + "color": "light-purple", + "index": 5 + } + }, + "type": "value" + }, + { + "options": { + "6": { + "text": "State Healing", + "color": "green", + "index": 6 + } + }, + "type": "value" + }, + { + "options": { + "7": { + "text": "Storage Healing", + "color": "light-green", + "index": 7 + } + }, + "type": "value" + }, + { + "options": { + "8": { + "text": "Bytecodes", + "color": "super-light-blue", + "index": 8 + } + }, + "type": "value" + } ], "thresholds": { "mode": "absolute", "steps": [ - { "color": "text", "value": null }, - { "color": "blue", "value": 1 }, - { "color": "orange", "value": 2 }, - { "color": "yellow", "value": 3 }, - { "color": "purple", "value": 4 }, - { "color": "light-purple", "value": 5 }, - { "color": "green", "value": 6 }, - { "color": "light-green", "value": 7 }, - { "color": "super-light-blue", "value": 8 } + { + "color": "text", + "value": null + }, + { + "color": "blue", + "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "yellow", + "value": 3 + }, + { + "color": "purple", + "value": 4 + }, + { + "color": "light-purple", + "value": 5 + }, + { + "color": "green", + "value": 6 + }, + { + "color": "light-green", + "value": 7 + }, + { + "color": "super-light-blue", + "value": 8 + } ] } }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 1 + }, "id": 1, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_stage{instance=~\"$instance\"}", "instant": true, @@ -87,35 +219,57 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "blue", "value": null } + { + "color": "blue", + "value": null + } ] }, "unit": "none" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 1 + }, "id": 2, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_pivot_block{instance=~\"$instance\"}", "instant": true, @@ -128,36 +282,64 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 5 }, - { "color": "green", "value": 20 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "green", + "value": 20 + } ] } }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 1 + }, "id": 3, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_p2p_peer_count{instance=~\"$instance\"}", "legendFormat": "peers", @@ -169,38 +351,66 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "decimals": 2, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 0.1 }, - { "color": "green", "value": 0.5 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.1 + }, + { + "color": "green", + "value": 0.5 + } ] }, "unit": "Ggas/s" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 1 + }, "id": 4, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "gigagas{instance=~\"$instance\"}", "legendFormat": "Ggas/s", @@ -213,42 +423,75 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, "id": 101, "title": "Peer Health", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 5 }, - { "color": "green", "value": 20 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "green", + "value": 20 + } ] } }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 0, "y": 6 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 6 + }, "id": 5, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_eligible_peers{instance=~\"$instance\"}", "instant": true, @@ -261,39 +504,67 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "yellow", "value": 600 }, - { "color": "red", "value": 1200 } + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 600 + }, + { + "color": "red", + "value": 1200 + } ] }, "unit": "s" }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 6, "y": 6 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 6 + }, "id": 6, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", - "expr": "ethrex_sync_pivot_age_seconds{instance=~\"$instance\"}", + "expr": "time() - ethrex_sync_pivot_timestamp{instance=~\"$instance\"} and ethrex_sync_pivot_timestamp{instance=~\"$instance\"} > 0", "instant": true, "legendFormat": "age", "range": false, @@ -304,36 +575,64 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "yellow", "value": 50 }, - { "color": "red", "value": 100 } + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 100 + } ] } }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 12, "y": 6 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 6 + }, "id": 7, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_inflight_requests{instance=~\"$instance\"}", "legendFormat": "inflight", @@ -345,34 +644,56 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null } + { + "color": "green", + "value": null + } ] } }, "overrides": [] }, - "gridPos": { "h": 4, "w": 6, "x": 18, "y": 6 }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 6 + }, "id": 8, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "value_and_name" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\"})", "instant": true, @@ -381,7 +702,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\", outcome=\"success\"})", "instant": true, @@ -390,7 +714,10 @@ "refId": "B" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\", outcome=\"failure\"})", "instant": true, @@ -404,38 +731,68 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 10 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, "id": 102, "title": "Header Download", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 50 }, - { "color": "green", "value": 90 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "green", + "value": 90 + } ] }, "unit": "percent" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 0, "y": 11 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 11 + }, "id": 9, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -443,7 +800,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_headers_total{instance=~\"$instance\"}, 1) * 100", "instant": true, @@ -456,32 +816,57 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 10000, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 100 }, - { "color": "green", "value": 1000 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "green", + "value": 1000 + } ] }, "unit": "headers/s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 6, "y": 11 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 11 + }, "id": 10, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -489,7 +874,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m])", "instant": true, @@ -502,37 +890,65 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "yellow", "value": 3600 }, - { "color": "red", "value": 86400 } + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 3600 + }, + { + "color": "red", + "value": 86400 + } ] }, "unit": "s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 12, "y": 11 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 11 + }, "id": 11, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "(ethrex_sync_headers_total{instance=~\"$instance\"} - ethrex_sync_headers_downloaded{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]) > 0", "instant": true, @@ -545,34 +961,56 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "blue", "value": null } + { + "color": "blue", + "value": null + } ] } }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 18, "y": 11 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 11 + }, "id": 12, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "value_and_name" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"}", "instant": true, @@ -581,7 +1019,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_headers_total{instance=~\"$instance\"}", "instant": true, @@ -595,38 +1036,68 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, "id": 103, "title": "Account Ranges", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 50 }, - { "color": "green", "value": 90 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "green", + "value": 90 + } ] }, "unit": "percent" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 0, "y": 19 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 19 + }, "id": 13, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -634,7 +1105,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"} / clamp_min(ethrex_sync_accounts_downloaded{instance=~\"$instance\"}, 1) * 100", "instant": true, @@ -647,32 +1121,57 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 10000, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 100 }, - { "color": "green", "value": 1000 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "green", + "value": 1000 + } ] }, "unit": "accounts/s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 6, "y": 19 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 19 + }, "id": 14, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -680,7 +1179,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m])", "instant": true, @@ -693,37 +1195,65 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "yellow", "value": 3600 }, - { "color": "red", "value": 86400 } + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 3600 + }, + { + "color": "red", + "value": 86400 + } ] }, "unit": "s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 12, "y": 19 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 19 + }, "id": 15, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "auto" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "(ethrex_sync_accounts_downloaded{instance=~\"$instance\"} - ethrex_sync_accounts_inserted{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]) > 0", "instant": true, @@ -736,34 +1266,56 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "blue", "value": null } + { + "color": "blue", + "value": null + } ] } }, "overrides": [] }, - "gridPos": { "h": 7, "w": 6, "x": 18, "y": 19 }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 19 + }, "id": 16, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "value_and_name" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_accounts_downloaded{instance=~\"$instance\"}", "instant": true, @@ -772,7 +1324,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"}", "instant": true, @@ -786,40 +1341,67 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, "id": 104, "title": "Storage Ranges", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "blue", "value": null } + { + "color": "blue", + "value": null + } ] } }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 0, "y": 27 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 27 + }, "id": 17, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "value_and_name" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_storage_downloaded{instance=~\"$instance\"}", "instant": true, @@ -828,7 +1410,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_storage_inserted{instance=~\"$instance\"}", "instant": true, @@ -841,32 +1426,57 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 50000, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 500 }, - { "color": "green", "value": 5000 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 500 + }, + { + "color": "green", + "value": 5000 + } ] }, "unit": "leaves/s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 8, "y": 27 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 27 + }, "id": 18, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -874,7 +1484,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", "instant": true, @@ -887,10 +1500,15 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, + "color": { + "mode": "palette-classic" + }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, @@ -901,38 +1519,71 @@ "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, - "scaleDistribution": { "type": "linear" }, + "scaleDistribution": { + "type": "linear" + }, "showPoints": "never", "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null } + { + "color": "green", + "value": null + } ] }, "unit": "leaves/s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 16, "y": 27 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 27 + }, "id": 19, "options": { - "legend": { "calcs": ["mean", "lastNotNull"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_storage_downloaded{instance=~\"$instance\"}[5m])", "legendFormat": "downloaded/s", @@ -940,7 +1591,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", "legendFormat": "inserted/s", @@ -953,40 +1607,67 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 34 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, "id": 105, "title": "Healing", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "blue", "value": null } + { + "color": "blue", + "value": null + } ] } }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 0, "y": 35 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 35 + }, "id": 20, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "value_and_name" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_state_leaves_healed{instance=~\"$instance\"}", "instant": true, @@ -995,7 +1676,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}", "instant": true, @@ -1008,32 +1692,57 @@ "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 10000, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 100 }, - { "color": "green", "value": 1000 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "green", + "value": 1000 + } ] }, "unit": "leaves/s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 8, "y": 35 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 35 + }, "id": 21, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -1041,7 +1750,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m]) + rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", "instant": true, @@ -1054,10 +1766,15 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "palette-classic" }, + "color": { + "mode": "palette-classic" + }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, @@ -1068,38 +1785,71 @@ "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, - "scaleDistribution": { "type": "linear" }, + "scaleDistribution": { + "type": "linear" + }, "showPoints": "never", "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null } + { + "color": "green", + "value": null + } ] }, "unit": "leaves/s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 16, "y": 35 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 35 + }, "id": 22, "options": { - "legend": { "calcs": ["mean", "lastNotNull"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "multi", "sort": "desc" } + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m])", "legendFormat": "state/s", @@ -1107,7 +1857,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", "legendFormat": "storage/s", @@ -1120,38 +1873,68 @@ }, { "collapsed": false, - "gridPos": { "h": 1, "w": 24, "x": 0, "y": 42 }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, "id": 106, "title": "Bytecodes", "type": "row" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 50 }, - { "color": "green", "value": 90 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "green", + "value": 90 + } ] }, "unit": "percent" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 0, "y": 43 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 43 + }, "id": 23, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -1159,7 +1942,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_bytecodes_total{instance=~\"$instance\"}, 1) * 100", "instant": true, @@ -1172,32 +1958,57 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "max": 5000, "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "yellow", "value": 50 }, - { "color": "green", "value": 500 } + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "green", + "value": 500 + } ] }, "unit": "codes/s" }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 8, "y": 43 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 43 + }, "id": 24, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" @@ -1205,7 +2016,10 @@ "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m])", "instant": true, @@ -1218,34 +2032,56 @@ "type": "gauge" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { + "mode": "thresholds" + }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "blue", "value": null } + { + "color": "blue", + "value": null + } ] } }, "overrides": [] }, - "gridPos": { "h": 7, "w": 8, "x": 16, "y": 43 }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 43 + }, "id": 25, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, "textMode": "value_and_name" }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}", "instant": true, @@ -1254,7 +2090,10 @@ "refId": "A" }, { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "editorMode": "code", "expr": "ethrex_sync_bytecodes_total{instance=~\"$instance\"}", "instant": true, @@ -1269,7 +2108,10 @@ ], "refresh": "10s", "schemaVersion": 39, - "tags": ["ethrex", "snapsync"], + "tags": [ + "ethrex", + "snapsync" + ], "templating": { "list": [ { From 8551e296ae19ff35f309673bdd5082eb51c0224e Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 17:33:35 -0300 Subject: [PATCH 20/31] feat(l1): add per-phase elapsed + ETA panels to Grafana dashboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ethrex_sync_phase_start_timestamp{phase} labeled gauge — set on each phase transition, persists for completed phases - Grafana computes elapsed as time() - timestamp, per phase - Pivot Age now uses time() - ethrex_sync_pivot_timestamp (live) - pivot_age_seconds also updated each push cycle for RPC/peer_top - Added ETA panels for headers, accounts, bytecodes (remaining/rate) - Added elapsed panels for all phases in their respective rows - Overview row: replaced Throughput (N/A during sync) with Phase Elapsed showing all active/completed phase timings --- crates/blockchain/metrics/sync.rs | 13 +- crates/networking/p2p/network.rs | 26 +- .../common_dashboards/snapsync_dashboard.json | 315 +++++++++++++++--- 3 files changed, 308 insertions(+), 46 deletions(-) diff --git a/crates/blockchain/metrics/sync.rs b/crates/blockchain/metrics/sync.rs index dd94ea41504..4323d7bf6d7 100644 --- a/crates/blockchain/metrics/sync.rs +++ b/crates/blockchain/metrics/sync.rs @@ -1,4 +1,7 @@ -use prometheus::{IntCounterVec, IntGauge, register_int_counter_vec, register_int_gauge}; +use prometheus::{ + IntCounterVec, IntGauge, IntGaugeVec, register_int_counter_vec, register_int_gauge, + register_int_gauge_vec, +}; use std::sync::LazyLock; // Metrics defined in this module register into the Prometheus default registry. @@ -16,6 +19,7 @@ pub struct MetricsSync { pub inflight_requests: IntGauge, pub pivot_age_seconds: IntGauge, pub pivot_timestamp: IntGauge, + pub phase_start_timestamp: IntGaugeVec, // --- Progress counters (gauges set from METRICS atomics) --- // Use rate() in Grafana to derive throughput. @@ -82,6 +86,13 @@ impl MetricsSync { ) .expect("Failed to create ethrex_sync_pivot_timestamp"), + phase_start_timestamp: register_int_gauge_vec!( + "ethrex_sync_phase_start_timestamp", + "Unix timestamp when each phase began (use time() - this for elapsed in Grafana)", + &["phase"] + ) + .expect("Failed to create ethrex_sync_phase_start_timestamp"), + // Progress (set periodically from METRICS atomics) headers_downloaded: register_int_gauge!( "ethrex_sync_headers_downloaded", diff --git a/crates/networking/p2p/network.rs b/crates/networking/p2p/network.rs index 1d9749e03a2..1a616353eae 100644 --- a/crates/networking/p2p/network.rs +++ b/crates/networking/p2p/network.rs @@ -399,6 +399,20 @@ pub async fn periodically_show_peer_stats_during_syncing( // Start new phase phase_start_time = std::time::Instant::now(); + // Record phase start timestamp for Grafana elapsed panels + #[cfg(feature = "metrics")] + { + let (_, phase_name) = phase_info(current_step); + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + ethrex_metrics::sync::METRICS_SYNC + .phase_start_timestamp + .with_label_values(&[phase_name]) + .set(now as i64); + } + // Capture metrics at phase start phase_start = PhaseCounters::capture_current(); prev_interval = phase_start; @@ -705,11 +719,21 @@ fn push_sync_prometheus_metrics(step: CurrentStepValue) { .pivot_block .set(METRICS.sync_head_block.load(Relaxed) as i64); - // Push raw pivot timestamp — Grafana computes age as time() - timestamp + // Push raw pivot timestamp — Grafana computes age via time() - timestamp let pivot_ts = METRICS.pivot_timestamp.load(Relaxed); if pivot_ts > 0 { METRICS_SYNC.pivot_timestamp.set(pivot_ts as i64); } + // Also update pivot_age_seconds for RPC/peer_top consumers + if pivot_ts > 0 { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + METRICS_SYNC + .pivot_age_seconds + .set(now.saturating_sub(pivot_ts) as i64); + } match step { CurrentStepValue::DownloadingHeaders => { diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json index 28d654c0138..28390da8a02 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json @@ -357,31 +357,9 @@ }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 0.1 - }, - { - "color": "green", - "value": 0.5 - } - ] - }, - "unit": "Ggas/s" - }, - "overrides": [] + "unit": "s", + "decimals": 0 + } }, "gridPos": { "h": 4, @@ -391,34 +369,24 @@ }, "id": 4, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "gigagas{instance=~\"$instance\"}", - "legendFormat": "Ggas/s", - "range": true, - "refId": "A" + "expr": "time() - ethrex_sync_phase_start_timestamp{phase=~\".+\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=~\".+\",instance=~\"$instance\"} > 0", + "legendFormat": "{{phase}}" } ], - "title": "Throughput Ggas/s", + "title": "Phase Elapsed", "type": "stat" }, { @@ -889,6 +857,80 @@ "title": "Headers per Second", "type": "gauge" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 11 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "(ethrex_sync_headers_total{instance=~\"$instance\"} - ethrex_sync_headers_downloaded{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]) > 0", + "legendFormat": "ETA" + } + ], + "title": "Header ETA", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 11 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"BLOCK HEADERS\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"BLOCK HEADERS\",instance=~\"$instance\"} > 0", + "legendFormat": "Elapsed" + } + ], + "title": "Header Elapsed", + "type": "stat" + }, { "datasource": { "type": "prometheus", @@ -1194,6 +1236,80 @@ "title": "Accounts per Second", "type": "gauge" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 19 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "(ethrex_sync_accounts_downloaded{instance=~\"$instance\"} - ethrex_sync_accounts_inserted{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]) > 0", + "legendFormat": "ETA" + } + ], + "title": "Account ETA", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 19 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"ACCOUNT RANGES\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"ACCOUNT RANGES\",instance=~\"$instance\"} > 0", + "legendFormat": "Elapsed" + } + ], + "title": "Account Elapsed", + "type": "stat" + }, { "datasource": { "type": "prometheus", @@ -1765,6 +1881,43 @@ "title": "Healing per Second", "type": "gauge" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 35 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"STATE HEALING\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"STATE HEALING\",instance=~\"$instance\"} > 0", + "legendFormat": "Elapsed" + } + ], + "title": "Healing Elapsed", + "type": "stat" + }, { "datasource": { "type": "prometheus", @@ -2031,6 +2184,80 @@ "title": "Bytecodes per Second", "type": "gauge" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 43 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "(ethrex_sync_bytecodes_total{instance=~\"$instance\"} - ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m]) > 0", + "legendFormat": "ETA" + } + ], + "title": "Bytecode ETA", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 43 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"BYTECODES\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"BYTECODES\",instance=~\"$instance\"} > 0", + "legendFormat": "Elapsed" + } + ], + "title": "Bytecode Elapsed", + "type": "stat" + }, { "datasource": { "type": "prometheus", From 05a659ffa88b850e8021c381a39092d22d78b1bd Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 17:41:32 -0300 Subject: [PATCH 21/31] fix(l1): push peer health metrics (eligible, snap, inflight) during sync, not only after --- crates/networking/p2p/network.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/crates/networking/p2p/network.rs b/crates/networking/p2p/network.rs index 1a616353eae..91e4938b6af 100644 --- a/crates/networking/p2p/network.rs +++ b/crates/networking/p2p/network.rs @@ -434,9 +434,21 @@ pub async fn periodically_show_peer_stats_during_syncing( ) .await; - // Push progress to Prometheus + // Push progress + peer health to Prometheus #[cfg(feature = "metrics")] - push_sync_prometheus_metrics(current_step); + { + push_sync_prometheus_metrics(current_step); + let diag = peer_table.get_peer_diagnostics().await.unwrap_or_default(); + let snap_peers = diag + .iter() + .filter(|p| p.capabilities.iter().any(|c| c.starts_with("snap/"))) + .count(); + let eligible = diag.iter().filter(|p| p.eligible).count(); + let inflight: i64 = diag.iter().map(|p| p.inflight_requests).sum(); + ethrex_metrics::sync::METRICS_SYNC.set_snap_peers(snap_peers as i64); + ethrex_metrics::sync::METRICS_SYNC.set_eligible_peers(eligible as i64); + ethrex_metrics::sync::METRICS_SYNC.set_inflight_requests(inflight); + } // Update previous interval counters for next rate calculation prev_interval = PhaseCounters::capture_current(); From 8ff1321ce8a0dc8231c0ed7761741c9155059582 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 17:43:18 -0300 Subject: [PATCH 22/31] =?UTF-8?q?refactor(l1):=20remove=20duplicate=20peer?= =?UTF-8?q?=20health=20push=20from=20after=5Fsync=20=E2=80=94=20during=5Fs?= =?UTF-8?q?yncing=20covers=20it?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/networking/p2p/network.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/crates/networking/p2p/network.rs b/crates/networking/p2p/network.rs index 91e4938b6af..21faaf3d972 100644 --- a/crates/networking/p2p/network.rs +++ b/crates/networking/p2p/network.rs @@ -831,16 +831,6 @@ pub async fn periodically_show_peer_stats_after_sync(peer_table: &PeerTable) { }) .count(); info!("Snap Peers: {snap_active_peers} / Total Peers: {active_peers}"); - #[cfg(feature = "metrics")] - { - ethrex_metrics::sync::METRICS_SYNC.set_snap_peers(snap_active_peers as i64); - // Compute eligible peers via diagnostics (which calls can_try_more_requests) - let diag = peer_table.get_peer_diagnostics().await.unwrap_or_default(); - let eligible = diag.iter().filter(|p| p.eligible).count(); - let inflight: i64 = diag.iter().map(|p| p.inflight_requests).sum(); - ethrex_metrics::sync::METRICS_SYNC.set_eligible_peers(eligible as i64); - ethrex_metrics::sync::METRICS_SYNC.set_inflight_requests(inflight); - } interval.tick().await; } } From f7f86703cb47012fb3d098d88dc72690c96cc9dc Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 17:52:21 -0300 Subject: [PATCH 23/31] =?UTF-8?q?refactor(l1):=20remove=20redundant=20prom?= =?UTF-8?q?etheus=20pushes=20from=20snap=5Fsync=20=E2=80=94=20push=20funct?= =?UTF-8?q?ion=20in=20network.rs=20covers=20them?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/networking/p2p/sync/snap_sync.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/crates/networking/p2p/sync/snap_sync.rs b/crates/networking/p2p/sync/snap_sync.rs index 1ed7b4b4d7f..90aee7c8054 100644 --- a/crates/networking/p2p/sync/snap_sync.rs +++ b/crates/networking/p2p/sync/snap_sync.rs @@ -124,8 +124,6 @@ pub async fn sync_cycle_snap( diag.current_phase = "headers".to_string(); diag.sync_mode = "snap".to_string(); } - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_current_phase(1); info!( "Syncing from current head {:?} to sync_head {:?}", current_head, sync_head @@ -311,8 +309,6 @@ pub async fn snap_sync( METRICS .pivot_timestamp .store(pivot_header.timestamp, std::sync::atomic::Ordering::Relaxed); - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_pivot_age_seconds(pivot_age as i64); } let state_root = pivot_header.state_root; @@ -337,8 +333,6 @@ pub async fn snap_sync( // account_state_snapshots_dir diagnostics.write().await.current_phase = "account_ranges".to_string(); - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_current_phase(2); info!("Starting to download account ranges from peers"); request_account_range( peers, @@ -362,8 +356,6 @@ pub async fn snap_sync( .load(std::sync::atomic::Ordering::Relaxed), ); } - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_current_phase(3); *METRICS.account_tries_insert_start_time.lock().await = Some(SystemTime::now()); METRICS .current_step @@ -391,8 +383,6 @@ pub async fn snap_sync( info!("Computed state root after request_account_rages: {computed_state_root:?}"); diagnostics.write().await.current_phase = "storage_ranges".to_string(); - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_current_phase(4); *METRICS.storage_tries_download_start_time.lock().await = Some(SystemTime::now()); // We start downloading the storage leafs. To do so, we need to be sure that the storage root // is correct. To do so, we always heal the state trie before requesting storage rates @@ -483,8 +473,6 @@ pub async fn snap_sync( *METRICS.storage_tries_download_end_time.lock().await = Some(SystemTime::now()); diagnostics.write().await.current_phase = "storage_insertion".to_string(); - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_current_phase(5); *METRICS.storage_tries_insert_start_time.lock().await = Some(SystemTime::now()); METRICS .current_step @@ -505,8 +493,6 @@ pub async fn snap_sync( } diagnostics.write().await.current_phase = "healing".to_string(); - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_current_phase(6); *METRICS.heal_start_time.lock().await = Some(SystemTime::now()); info!("Starting Healing Process"); let mut global_state_leafs_healed: u64 = 0; @@ -567,8 +553,6 @@ pub async fn snap_sync( let mut code_hashes_to_download = Vec::new(); diagnostics.write().await.current_phase = "bytecodes".to_string(); - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_current_phase(7); info!("Starting download code hashes from peers"); for entry in std::fs::read_dir(&code_hashes_dir) .map_err(|_| SyncError::CodeHashesSnapshotsDirNotFound)? @@ -833,8 +817,6 @@ pub async fn update_pivot( METRICS .pivot_timestamp .store(pivot.timestamp, std::sync::atomic::Ordering::Relaxed); - #[cfg(feature = "metrics")] - ethrex_metrics::sync::METRICS_SYNC.set_pivot_age_seconds(pivot_age as i64); } let block_headers = peers .request_block_headers(block_number + 1, pivot.hash()) From 0b860adcaaf6b190fc112acad116b8f85fb36dff Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 17:55:55 -0300 Subject: [PATCH 24/31] feat(l1): expose metrics ports for all three multisync chains (3701/3702/3703) --- tooling/sync/docker-compose.multisync.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tooling/sync/docker-compose.multisync.yaml b/tooling/sync/docker-compose.multisync.yaml index 860ba1025e7..99b9205d832 100644 --- a/tooling/sync/docker-compose.multisync.yaml +++ b/tooling/sync/docker-compose.multisync.yaml @@ -74,6 +74,7 @@ services: container_name: ethrex-hoodi ports: - "8545:8545" # RPC + - "3702:3701" # Metrics volumes: - secrets-hoodi:/secrets - ethrex-hoodi:/data @@ -122,6 +123,7 @@ services: container_name: ethrex-sepolia ports: - "8546:8545" # RPC on different host port + - "3703:3701" # Metrics volumes: - secrets-sepolia:/secrets - ethrex-sepolia:/data From 0507f5967d9b729027f28c86fc90724e96cf0e66 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 18:05:15 -0300 Subject: [PATCH 25/31] fix(l1): deduplicate dashboard panels, consistent 4-panel rows --- .../common_dashboards/snapsync_dashboard.json | 427 ++---------------- 1 file changed, 50 insertions(+), 377 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json index 28390da8a02..7f3742f33eb 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json @@ -931,151 +931,6 @@ "title": "Header Elapsed", "type": "stat" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 3600 - }, - { - "color": "red", - "value": 86400 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 11 - }, - "id": 11, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "12.2.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "(ethrex_sync_headers_total{instance=~\"$instance\"} - ethrex_sync_headers_downloaded{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]) > 0", - "instant": true, - "legendFormat": "ETA", - "range": false, - "refId": "A" - } - ], - "title": "Header ETA", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 11 - }, - "id": 12, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "pluginVersion": "12.2.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "downloaded", - "range": false, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_headers_total{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "total", - "range": false, - "refId": "B" - } - ], - "title": "Header Counts", - "type": "stat" - }, { "collapsed": false, "gridPos": { @@ -1310,151 +1165,6 @@ "title": "Account Elapsed", "type": "stat" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 3600 - }, - { - "color": "red", - "value": 86400 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 19 - }, - "id": 15, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "12.2.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "(ethrex_sync_accounts_downloaded{instance=~\"$instance\"} - ethrex_sync_accounts_inserted{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]) > 0", - "instant": true, - "legendFormat": "ETA", - "range": false, - "refId": "A" - } - ], - "title": "Account ETA", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 19 - }, - "id": 16, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "pluginVersion": "12.2.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_accounts_downloaded{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "downloaded", - "range": false, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "inserted", - "range": false, - "refId": "B" - } - ], - "title": "Account Counts", - "type": "stat" - }, { "collapsed": false, "gridPos": { @@ -1492,7 +1202,7 @@ }, "gridPos": { "h": 7, - "w": 8, + "w": 6, "x": 0, "y": 27 }, @@ -1577,8 +1287,8 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, + "w": 6, + "x": 6, "y": 27 }, "id": 18, @@ -1673,8 +1383,8 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 16, + "w": 6, + "x": 12, "y": 27 }, "id": 19, @@ -1721,6 +1431,43 @@ "title": "Storage Rate Over Time", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 27 + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"STORAGE RANGES\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"STORAGE RANGES\",instance=~\"$instance\"} > 0", + "legendFormat": "Elapsed" + } + ], + "title": "Storage Elapsed", + "type": "stat" + }, { "collapsed": false, "gridPos": { @@ -1758,7 +1505,7 @@ }, "gridPos": { "h": 7, - "w": 8, + "w": 6, "x": 0, "y": 35 }, @@ -1843,8 +1590,8 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, + "w": 6, + "x": 6, "y": 35 }, "id": 21, @@ -1976,8 +1723,8 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 16, + "w": 6, + "x": 18, "y": 35 }, "id": 22, @@ -2072,7 +1819,7 @@ }, "gridPos": { "h": 7, - "w": 8, + "w": 6, "x": 0, "y": 43 }, @@ -2146,8 +1893,8 @@ }, "gridPos": { "h": 7, - "w": 8, - "x": 8, + "w": 6, + "x": 6, "y": 43 }, "id": 24, @@ -2257,80 +2004,6 @@ ], "title": "Bytecode Elapsed", "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 43 - }, - "id": 25, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "pluginVersion": "12.2.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "downloaded", - "range": false, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_bytecodes_total{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "total", - "range": false, - "refId": "B" - } - ], - "title": "Bytecode Counts", - "type": "stat" } ], "refresh": "10s", From 964be80766c389e9aa400f5e5ae052c37b8917d1 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 18:38:23 -0300 Subject: [PATCH 26/31] =?UTF-8?q?fix(l1):=20consistent=20dashboard=20layou?= =?UTF-8?q?t=20=E2=80=94=20Progress+Counts,=20Rate,=20ETA,=20Elapsed=20per?= =?UTF-8?q?=20phase=20+=20full-width=20timeseries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common_dashboards/snapsync_dashboard.json | 1957 ++++++++--------- 1 file changed, 876 insertions(+), 1081 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json index 7f3742f33eb..187ffd17289 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json @@ -29,7 +29,6 @@ "x": 0, "y": 0 }, - "id": 100, "title": "Sync Overview", "type": "row" }, @@ -38,336 +37,182 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 1 + }, + "title": "Sync Stage", "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, "mappings": [ { "options": { "0": { - "text": "Idle", - "color": "text", - "index": 0 - } - }, - "type": "value" - }, - { - "options": { + "text": "Idle" + }, "1": { - "text": "Headers", - "color": "blue", - "index": 1 - } - }, - "type": "value" - }, - { - "options": { + "text": "Headers" + }, "2": { - "text": "Account Ranges", - "color": "orange", - "index": 2 - } - }, - "type": "value" - }, - { - "options": { + "text": "Account Ranges" + }, "3": { - "text": "Account Insertion", - "color": "yellow", - "index": 3 - } - }, - "type": "value" - }, - { - "options": { + "text": "Account Insertion" + }, "4": { - "text": "Storage Ranges", - "color": "purple", - "index": 4 - } - }, - "type": "value" - }, - { - "options": { + "text": "Storage Ranges" + }, "5": { - "text": "Storage Insertion", - "color": "light-purple", - "index": 5 - } - }, - "type": "value" - }, - { - "options": { + "text": "Storage Insertion" + }, "6": { - "text": "State Healing", - "color": "green", - "index": 6 - } - }, - "type": "value" - }, - { - "options": { + "text": "State Healing" + }, "7": { - "text": "Storage Healing", - "color": "light-green", - "index": 7 - } - }, - "type": "value" - }, - { - "options": { + "text": "Storage Healing" + }, "8": { - "text": "Bytecodes", - "color": "super-light-blue", - "index": 8 + "text": "Bytecodes" } }, "type": "value" } ], "thresholds": { - "mode": "absolute", "steps": [ { - "color": "text", + "color": "green", "value": null }, { - "color": "blue", + "color": "yellow", "value": 1 }, { "color": "orange", - "value": 2 - }, - { - "color": "yellow", "value": 3 }, { - "color": "purple", - "value": 4 - }, - { - "color": "light-purple", - "value": 5 - }, - { - "color": "green", + "color": "blue", "value": 6 - }, - { - "color": "light-green", - "value": 7 - }, - { - "color": "super-light-blue", - "value": 8 } ] + }, + "color": { + "mode": "thresholds" } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 1 + } }, - "id": 1, "options": { - "colorMode": "value", + "colorMode": "background", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", + "textMode": "value", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "ethrex_sync_stage{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" + "legendFormat": "Stage" } - ], - "title": "Sync Stage", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, + "type": "stat", + "title": "Pivot Block", "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, - "id": 2, + "fieldConfig": { + "defaults": { + "decimals": 0 + } + }, "options": { "colorMode": "value", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "ethrex_sync_pivot_block{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" + "legendFormat": "" } - ], - "title": "Pivot Block", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 5 - }, - { - "color": "green", - "value": 20 - } - ] - } - }, - "overrides": [] - }, + "type": "stat", + "title": "Peers", "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, - "id": 3, + "fieldConfig": { + "defaults": { + "decimals": 0 + } + }, "options": { "colorMode": "value", "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "ethrex_p2p_peer_count{instance=~\"$instance\"}", - "legendFormat": "peers", - "range": true, - "refId": "A" + "legendFormat": "" } - ], - "title": "Peers", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "fieldConfig": { - "defaults": { - "unit": "s", - "decimals": 0 - } - }, + "type": "stat", + "title": "Phase Elapsed", "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, - "id": 4, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } + }, "options": { "colorMode": "none", "graphMode": "none", @@ -379,15 +224,12 @@ ] } }, - "pluginVersion": "12.2.1", "targets": [ { "expr": "time() - ethrex_sync_phase_start_timestamp{phase=~\".+\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=~\".+\",instance=~\"$instance\"} > 0", "legendFormat": "{{phase}}" } - ], - "title": "Phase Elapsed", - "type": "stat" + ] }, { "collapsed": false, @@ -395,9 +237,8 @@ "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 6 }, - "id": 101, "title": "Peer Health", "type": "row" }, @@ -406,14 +247,17 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Eligible Peers", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 7 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", @@ -428,62 +272,46 @@ "value": 20 } ] + }, + "color": { + "mode": "thresholds" } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 6 + } }, - "id": 5, "options": { "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", + "graphMode": "area", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_eligible_peers{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "eligible", - "range": false, - "refId": "A" - } - ], - "title": "Eligible Peers", - "type": "stat" + "expr": "ethrex_sync_eligible_peers{instance=~\"$instance\"}" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Pivot Age", + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 7 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "unit": "s", + "decimals": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -499,203 +327,94 @@ } ] }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 6 + "color": { + "mode": "thresholds" + } + } }, - "id": 6, "options": { "colorMode": "value", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "time() - ethrex_sync_pivot_timestamp{instance=~\"$instance\"} and ethrex_sync_pivot_timestamp{instance=~\"$instance\"} > 0", - "instant": true, - "legendFormat": "age", - "range": false, - "refId": "A" - } - ], - "title": "Pivot Age", - "type": "stat" + "expr": "time() - ethrex_sync_pivot_timestamp{instance=~\"$instance\"} and ethrex_sync_pivot_timestamp{instance=~\"$instance\"} > 0" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 50 - }, - { - "color": "red", - "value": 100 - } - ] - } - }, - "overrides": [] - }, + "type": "stat", + "title": "Inflight Requests", "gridPos": { "h": 4, "w": 6, "x": 12, - "y": 6 + "y": 7 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } }, - "id": 7, "options": { "colorMode": "value", "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_inflight_requests{instance=~\"$instance\"}", - "legendFormat": "inflight", - "range": true, - "refId": "A" + "expr": "ethrex_sync_inflight_requests{instance=~\"$instance\"}" } - ], - "title": "Inflight Requests", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, + "type": "stat", + "title": "Pivot Updates", "gridPos": { "h": 4, "w": 6, "x": 18, - "y": 6 + "y": 7 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } }, - "id": 8, "options": { "colorMode": "value", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" + ] + } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\"})", - "instant": true, - "legendFormat": "total", - "range": false, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\", outcome=\"success\"})", - "instant": true, - "legendFormat": "success", - "range": false, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\", outcome=\"failure\"})", - "instant": true, - "legendFormat": "failure", - "range": false, - "refId": "C" - } - ], - "title": "Pivot Updates", - "type": "stat" + "expr": "sum(ethrex_sync_pivot_updates_total{instance=~\"$instance\"})" + } + ] }, { "collapsed": false, @@ -703,10 +422,9 @@ "h": 1, "w": 24, "x": 0, - "y": 10 + "y": 12 }, - "id": 102, - "title": "Header Download", + "title": "Headers", "type": "row" }, { @@ -714,16 +432,21 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Headers Progress", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 13 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 100, "min": 0, + "max": 100, + "unit": "percent", + "decimals": 1, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", @@ -731,154 +454,166 @@ }, { "color": "yellow", - "value": 50 + "value": 33 }, { "color": "green", - "value": 90 + "value": 66 } ] }, - "unit": "percent" + "color": { + "mode": "thresholds" + } + } + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] }, - "overrides": [] + "showThresholdLabels": false, + "showThresholdMarkers": true, + "orientation": "horizontal" }, + "targets": [ + { + "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_headers_total{instance=~\"$instance\"}, 1) * 100", + "legendFormat": "progress" + }, + { + "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"}", + "legendFormat": "downloaded", + "hide": true + }, + { + "expr": "ethrex_sync_headers_total{instance=~\"$instance\"}", + "legendFormat": "total", + "hide": true + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "stat", + "title": "", "gridPos": { - "h": 7, + "h": 2, "w": 6, "x": 0, - "y": 11 + "y": 17 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } }, - "id": 9, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_headers_total{instance=~\"$instance\"}, 1) * 100", - "instant": true, - "legendFormat": "progress", - "range": false, - "refId": "A" - } - ], - "title": "Header Progress", - "type": "gauge" + "expr": "ethrex_sync_headers_downloaded{instance=~\"$instance\"}", + "legendFormat": "downloaded" + }, + { + "expr": "ethrex_sync_headers_total{instance=~\"$instance\"}", + "legendFormat": "total" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Headers Rate", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 13 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 10000, "min": 0, + "unit": "none", + "decimals": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", "value": null }, { - "color": "yellow", - "value": 100 + "color": "orange", + "value": 500 }, { "color": "green", - "value": 1000 + "value": 1500 } ] }, - "unit": "headers/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 11 + "color": { + "mode": "thresholds" + } + } }, - "id": 10, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "showThresholdLabels": false, "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m])", - "instant": true, - "legendFormat": "rate", - "range": false, - "refId": "A" - } - ], - "title": "Headers per Second", - "type": "gauge" + "legendFormat": "/s" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Headers ETA", + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 13 + }, "fieldConfig": { "defaults": { "unit": "s", "decimals": 0 } }, - "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 11 - }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -890,32 +625,31 @@ "expr": "(ethrex_sync_headers_total{instance=~\"$instance\"} - ethrex_sync_headers_downloaded{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m]) > 0", "legendFormat": "ETA" } - ], - "title": "Header ETA", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Headers Elapsed", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 13 + }, "fieldConfig": { "defaults": { "unit": "s", "decimals": 0 } }, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 11 - }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -927,9 +661,41 @@ "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"BLOCK HEADERS\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"BLOCK HEADERS\",instance=~\"$instance\"} > 0", "legendFormat": "Elapsed" } - ], - "title": "Header Elapsed", - "type": "stat" + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "timeseries", + "title": "Headers Rate Over Time", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 19 + }, + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "drawStyle": "line", + "fillOpacity": 10 + } + } + }, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "headers/s" + } + ] }, { "collapsed": false, @@ -937,10 +703,9 @@ "h": 1, "w": 24, "x": 0, - "y": 18 + "y": 24 }, - "id": 103, - "title": "Account Ranges", + "title": "Accounts", "type": "row" }, { @@ -948,16 +713,21 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Accounts Progress", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 25 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 100, "min": 0, + "max": 100, + "unit": "percent", + "decimals": 1, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", @@ -965,154 +735,166 @@ }, { "color": "yellow", - "value": 50 + "value": 33 }, { "color": "green", - "value": 90 + "value": 66 } ] }, - "unit": "percent" + "color": { + "mode": "thresholds" + } + } + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] }, - "overrides": [] + "showThresholdLabels": false, + "showThresholdMarkers": true, + "orientation": "horizontal" }, + "targets": [ + { + "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"} / clamp_min(ethrex_sync_accounts_downloaded{instance=~\"$instance\"}, 1) * 100", + "legendFormat": "progress" + }, + { + "expr": "ethrex_sync_accounts_downloaded{instance=~\"$instance\"}", + "legendFormat": "downloaded", + "hide": true + }, + { + "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"}", + "legendFormat": "inserted", + "hide": true + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "stat", + "title": "", "gridPos": { - "h": 7, + "h": 2, "w": 6, "x": 0, - "y": 19 + "y": 29 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } }, - "id": 13, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"} / clamp_min(ethrex_sync_accounts_downloaded{instance=~\"$instance\"}, 1) * 100", - "instant": true, - "legendFormat": "progress", - "range": false, - "refId": "A" - } - ], - "title": "Account Progress", - "type": "gauge" + "expr": "ethrex_sync_accounts_downloaded{instance=~\"$instance\"}", + "legendFormat": "downloaded" + }, + { + "expr": "ethrex_sync_accounts_inserted{instance=~\"$instance\"}", + "legendFormat": "inserted" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Accounts Rate", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 25 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 10000, "min": 0, + "unit": "none", + "decimals": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", "value": null }, { - "color": "yellow", + "color": "orange", "value": 100 }, { "color": "green", - "value": 1000 + "value": 500 } ] }, - "unit": "accounts/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 19 + "color": { + "mode": "thresholds" + } + } }, - "id": 14, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "showThresholdLabels": false, "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m])", - "instant": true, - "legendFormat": "rate", - "range": false, - "refId": "A" - } - ], - "title": "Accounts per Second", - "type": "gauge" + "legendFormat": "/s" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Accounts ETA", + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 25 + }, "fieldConfig": { "defaults": { "unit": "s", "decimals": 0 } }, - "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 19 - }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1124,32 +906,31 @@ "expr": "(ethrex_sync_accounts_downloaded{instance=~\"$instance\"} - ethrex_sync_accounts_inserted{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m]) > 0", "legendFormat": "ETA" } - ], - "title": "Account ETA", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Accounts Elapsed", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 25 + }, "fieldConfig": { "defaults": { "unit": "s", "decimals": 0 } }, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 19 - }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1161,9 +942,45 @@ "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"ACCOUNT RANGES\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"ACCOUNT RANGES\",instance=~\"$instance\"} > 0", "legendFormat": "Elapsed" } - ], - "title": "Account Elapsed", - "type": "stat" + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "timeseries", + "title": "Accounts Rate Over Time", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 31 + }, + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "drawStyle": "line", + "fillOpacity": 10 + } + } + }, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(ethrex_sync_accounts_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "download/s" + }, + { + "expr": "rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m])", + "legendFormat": "insert/s" + } + ] }, { "collapsed": false, @@ -1171,10 +988,9 @@ "h": 1, "w": 24, "x": 0, - "y": 26 + "y": 36 }, - "id": 104, - "title": "Storage Ranges", + "title": "Storage", "type": "row" }, { @@ -1182,277 +998,224 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Storage Progress", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 37 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "min": 0, + "max": 100, + "unit": "percent", + "decimals": 1, "thresholds": { - "mode": "absolute", "steps": [ { - "color": "blue", + "color": "red", "value": null + }, + { + "color": "yellow", + "value": 33 + }, + { + "color": "green", + "value": 66 } ] + }, + "color": { + "mode": "thresholds" } + } + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "orientation": "horizontal" + }, + "targets": [ + { + "expr": "ethrex_sync_storage_inserted{instance=~\"$instance\"} / clamp_min(ethrex_sync_storage_downloaded{instance=~\"$instance\"}, 1) * 100", + "legendFormat": "progress" }, - "overrides": [] + { + "expr": "ethrex_sync_storage_downloaded{instance=~\"$instance\"}", + "legendFormat": "downloaded", + "hide": true + }, + { + "expr": "ethrex_sync_storage_inserted{instance=~\"$instance\"}", + "legendFormat": "inserted", + "hide": true + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "", "gridPos": { - "h": 7, + "h": 2, "w": 6, "x": 0, - "y": 27 + "y": 41 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } }, - "id": 17, "options": { - "colorMode": "value", + "colorMode": "none", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", + "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, - "textMode": "value_and_name" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "ethrex_sync_storage_downloaded{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "downloaded", - "range": false, - "refId": "A" + "legendFormat": "downloaded" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "ethrex_sync_storage_inserted{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "inserted", - "range": false, - "refId": "B" + "legendFormat": "inserted" } - ], - "title": "Storage Leaves", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Storage Rate", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 37 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 50000, "min": 0, + "unit": "none", + "decimals": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", "value": null }, { - "color": "yellow", + "color": "orange", "value": 500 }, { "color": "green", - "value": 5000 + "value": 2000 } ] }, - "unit": "leaves/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 27 + "color": { + "mode": "thresholds" + } + } }, - "id": 18, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "showThresholdLabels": false, "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", - "instant": true, - "legendFormat": "rate", - "range": false, - "refId": "A" - } - ], - "title": "Storage per Second", - "type": "gauge" + "legendFormat": "/s" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "leaves/s" - }, - "overrides": [] - }, + "type": "stat", + "title": "Storage ETA", "gridPos": { - "h": 7, + "h": 6, "w": 6, "x": 12, - "y": 27 + "y": 37 + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } }, - "id": 19, "options": { - "legend": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { "calcs": [ - "mean", "lastNotNull" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" + ] } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(ethrex_sync_storage_downloaded{instance=~\"$instance\"}[5m])", - "legendFormat": "downloaded/s", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", - "legendFormat": "inserted/s", - "range": true, - "refId": "B" + "expr": "(ethrex_sync_storage_downloaded{instance=~\"$instance\"} - ethrex_sync_storage_inserted{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m]) > 0", + "legendFormat": "ETA" } - ], - "title": "Storage Rate Over Time", - "type": "timeseries" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Storage Elapsed", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 37 + }, "fieldConfig": { "defaults": { "unit": "s", "decimals": 0 } }, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 27 - }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1464,9 +1227,45 @@ "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"STORAGE RANGES\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"STORAGE RANGES\",instance=~\"$instance\"} > 0", "legendFormat": "Elapsed" } - ], - "title": "Storage Elapsed", - "type": "stat" + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "timeseries", + "title": "Storage Rate Over Time", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 43 + }, + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "drawStyle": "line", + "fillOpacity": 10 + } + } + }, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(ethrex_sync_storage_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "download/s" + }, + { + "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", + "legendFormat": "insert/s" + } + ] }, { "collapsed": false, @@ -1474,182 +1273,201 @@ "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 48 + }, + "title": "Healing", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "stat", + "title": "Leaves Healed", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 49 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } }, - "id": 105, - "title": "Healing", - "type": "row" + "targets": [ + { + "expr": "ethrex_sync_state_leaves_healed{instance=~\"$instance\"} + ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}", + "legendFormat": "total" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - } - }, - "overrides": [] - }, + "type": "stat", + "title": "", "gridPos": { - "h": 7, + "h": 2, "w": 6, "x": 0, - "y": 35 + "y": 53 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } }, - "id": 20, "options": { - "colorMode": "value", + "colorMode": "none", "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", + "justifyMode": "center", + "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, - "textMode": "value_and_name" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "ethrex_sync_state_leaves_healed{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "state", - "range": false, - "refId": "A" + "legendFormat": "state" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}", - "instant": true, - "legendFormat": "storage", - "range": false, - "refId": "B" + "legendFormat": "storage" } - ], - "title": "Leaves Healed", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Healing Rate", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 49 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 10000, "min": 0, + "unit": "none", + "decimals": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", "value": null }, { - "color": "yellow", + "color": "orange", "value": 100 }, { "color": "green", - "value": 1000 + "value": 500 } ] }, - "unit": "leaves/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 35 + "color": { + "mode": "thresholds" + } + } }, - "id": 21, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "showThresholdLabels": false, "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m]) + rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", - "instant": true, - "legendFormat": "rate", - "range": false, - "refId": "A" - } - ], - "title": "Healing per Second", - "type": "gauge" + "legendFormat": "/s" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Healing Rate (avg)", + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 49 + }, "fieldConfig": { "defaults": { - "unit": "s", "decimals": 0 } }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "targets": [ + { + "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m]) + rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", + "legendFormat": "heals/s" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "stat", + "title": "Healing Elapsed", "gridPos": { - "h": 5, + "h": 6, "w": 6, - "x": 12, - "y": 35 + "x": 18, + "y": 49 + }, + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 0 + } }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1661,115 +1479,45 @@ "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"STATE HEALING\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"STATE HEALING\",instance=~\"$instance\"} > 0", "legendFormat": "Elapsed" } - ], - "title": "Healing Elapsed", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "timeseries", + "title": "Healing Rate Over Time", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 55 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, + "unit": "none", "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "leaves/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 35 + "fillOpacity": 10 + } + } }, - "id": 22, "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "desc" + "mode": "multi" } }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m])", - "legendFormat": "state/s", - "range": true, - "refId": "A" + "legendFormat": "state heals/s" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", - "legendFormat": "storage/s", - "range": true, - "refId": "B" + "legendFormat": "storage heals/s" } - ], - "title": "Healing Rate Over Time", - "type": "timeseries" + ] }, { "collapsed": false, @@ -1777,9 +1525,8 @@ "h": 1, "w": 24, "x": 0, - "y": 42 + "y": 60 }, - "id": 106, "title": "Bytecodes", "type": "row" }, @@ -1788,16 +1535,21 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Bytecodes Progress", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 61 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 100, "min": 0, + "max": 100, + "unit": "percent", + "decimals": 1, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", @@ -1805,154 +1557,166 @@ }, { "color": "yellow", - "value": 50 + "value": 33 }, { "color": "green", - "value": 90 + "value": 66 } ] }, - "unit": "percent" + "color": { + "mode": "thresholds" + } + } + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "orientation": "horizontal" + }, + "targets": [ + { + "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_bytecodes_total{instance=~\"$instance\"}, 1) * 100", + "legendFormat": "progress" + }, + { + "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}", + "legendFormat": "downloaded", + "hide": true }, - "overrides": [] + { + "expr": "ethrex_sync_bytecodes_total{instance=~\"$instance\"}", + "legendFormat": "total", + "hide": true + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "", "gridPos": { - "h": 7, + "h": 2, "w": 6, "x": 0, - "y": 43 + "y": 65 + }, + "fieldConfig": { + "defaults": { + "decimals": 0 + } }, - "id": 23, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"} / clamp_min(ethrex_sync_bytecodes_total{instance=~\"$instance\"}, 1) * 100", - "instant": true, - "legendFormat": "progress", - "range": false, - "refId": "A" - } - ], - "title": "Bytecode Progress", - "type": "gauge" + "expr": "ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}", + "legendFormat": "downloaded" + }, + { + "expr": "ethrex_sync_bytecodes_total{instance=~\"$instance\"}", + "legendFormat": "total" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "gauge", + "title": "Bytecodes Rate", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 61 + }, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 5000, "min": 0, + "unit": "none", + "decimals": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "red", "value": null }, { - "color": "yellow", + "color": "orange", "value": 50 }, { "color": "green", - "value": 500 + "value": 200 } ] }, - "unit": "codes/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 43 + "color": { + "mode": "thresholds" + } + } }, - "id": 24, "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "showThresholdLabels": false, "showThresholdMarkers": true, - "sizing": "auto" + "orientation": "horizontal" }, - "pluginVersion": "12.2.1", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", "expr": "rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m])", - "instant": true, - "legendFormat": "rate", - "range": false, - "refId": "A" - } - ], - "title": "Bytecodes per Second", - "type": "gauge" + "legendFormat": "/s" + } + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Bytecodes ETA", + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 61 + }, "fieldConfig": { "defaults": { "unit": "s", "decimals": 0 } }, - "gridPos": { - "h": 5, - "w": 6, - "x": 12, - "y": 43 - }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1964,32 +1728,31 @@ "expr": "(ethrex_sync_bytecodes_total{instance=~\"$instance\"} - ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}) / clamp_min(rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m]), 0.001) and rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m]) > 0", "legendFormat": "ETA" } - ], - "title": "Bytecode ETA", - "type": "stat" + ] }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "type": "stat", + "title": "Bytecodes Elapsed", + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 61 + }, "fieldConfig": { "defaults": { "unit": "s", "decimals": 0 } }, - "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 43 - }, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", - "textMode": "value_and_name", "reduceOptions": { "calcs": [ "lastNotNull" @@ -2001,9 +1764,41 @@ "expr": "time() - ethrex_sync_phase_start_timestamp{phase=\"BYTECODES\",instance=~\"$instance\"} and ethrex_sync_phase_start_timestamp{phase=\"BYTECODES\",instance=~\"$instance\"} > 0", "legendFormat": "Elapsed" } - ], - "title": "Bytecode Elapsed", - "type": "stat" + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "timeseries", + "title": "Bytecodes Rate Over Time", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 67 + }, + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "drawStyle": "line", + "fillOpacity": 10 + } + } + }, + "options": { + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "codes/s" + } + ] } ], "refresh": "10s", From 4d29538b4b297297f5fbafc6fd9a8957becab314 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 20:37:10 -0300 Subject: [PATCH 27/31] feat(l1): add composite Sync Rates Overview chart to dashboard top section --- .../common_dashboards/snapsync_dashboard.json | 153 +++++++++++++----- 1 file changed, 113 insertions(+), 40 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json index 187ffd17289..431fac3f60d 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json @@ -231,13 +231,86 @@ } ] }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "type": "timeseries", + "title": "Sync Rates Overview", + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 5 + }, + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 2, + "showPoints": "never" + } + } + }, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + }, + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": [ + "lastNotNull", + "max" + ] + } + }, + "targets": [ + { + "expr": "rate(ethrex_sync_headers_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "headers/s" + }, + { + "expr": "rate(ethrex_sync_accounts_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "accounts dl/s" + }, + { + "expr": "rate(ethrex_sync_accounts_inserted{instance=~\"$instance\"}[5m])", + "legendFormat": "accounts ins/s" + }, + { + "expr": "rate(ethrex_sync_storage_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "storage dl/s" + }, + { + "expr": "rate(ethrex_sync_storage_inserted{instance=~\"$instance\"}[5m])", + "legendFormat": "storage ins/s" + }, + { + "expr": "rate(ethrex_sync_state_leaves_healed{instance=~\"$instance\"}[5m])", + "legendFormat": "state heals/s" + }, + { + "expr": "rate(ethrex_sync_storage_leaves_healed{instance=~\"$instance\"}[5m])", + "legendFormat": "storage heals/s" + }, + { + "expr": "rate(ethrex_sync_bytecodes_downloaded{instance=~\"$instance\"}[5m])", + "legendFormat": "bytecodes/s" + } + ] + }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 6 + "y": 12 }, "title": "Peer Health", "type": "row" @@ -253,7 +326,7 @@ "h": 4, "w": 6, "x": 0, - "y": 7 + "y": 13 }, "fieldConfig": { "defaults": { @@ -305,7 +378,7 @@ "h": 4, "w": 6, "x": 6, - "y": 7 + "y": 13 }, "fieldConfig": { "defaults": { @@ -359,7 +432,7 @@ "h": 4, "w": 6, "x": 12, - "y": 7 + "y": 13 }, "fieldConfig": { "defaults": { @@ -393,7 +466,7 @@ "h": 4, "w": 6, "x": 18, - "y": 7 + "y": 13 }, "fieldConfig": { "defaults": { @@ -422,7 +495,7 @@ "h": 1, "w": 24, "x": 0, - "y": 12 + "y": 18 }, "title": "Headers", "type": "row" @@ -438,7 +511,7 @@ "h": 4, "w": 6, "x": 0, - "y": 13 + "y": 19 }, "fieldConfig": { "defaults": { @@ -505,7 +578,7 @@ "h": 2, "w": 6, "x": 0, - "y": 17 + "y": 23 }, "fieldConfig": { "defaults": { @@ -546,7 +619,7 @@ "h": 6, "w": 6, "x": 6, - "y": 13 + "y": 19 }, "fieldConfig": { "defaults": { @@ -602,7 +675,7 @@ "h": 6, "w": 6, "x": 12, - "y": 13 + "y": 19 }, "fieldConfig": { "defaults": { @@ -638,7 +711,7 @@ "h": 6, "w": 6, "x": 18, - "y": 13 + "y": 19 }, "fieldConfig": { "defaults": { @@ -674,7 +747,7 @@ "h": 5, "w": 24, "x": 0, - "y": 19 + "y": 25 }, "fieldConfig": { "defaults": { @@ -703,7 +776,7 @@ "h": 1, "w": 24, "x": 0, - "y": 24 + "y": 30 }, "title": "Accounts", "type": "row" @@ -719,7 +792,7 @@ "h": 4, "w": 6, "x": 0, - "y": 25 + "y": 31 }, "fieldConfig": { "defaults": { @@ -786,7 +859,7 @@ "h": 2, "w": 6, "x": 0, - "y": 29 + "y": 35 }, "fieldConfig": { "defaults": { @@ -827,7 +900,7 @@ "h": 6, "w": 6, "x": 6, - "y": 25 + "y": 31 }, "fieldConfig": { "defaults": { @@ -883,7 +956,7 @@ "h": 6, "w": 6, "x": 12, - "y": 25 + "y": 31 }, "fieldConfig": { "defaults": { @@ -919,7 +992,7 @@ "h": 6, "w": 6, "x": 18, - "y": 25 + "y": 31 }, "fieldConfig": { "defaults": { @@ -955,7 +1028,7 @@ "h": 5, "w": 24, "x": 0, - "y": 31 + "y": 37 }, "fieldConfig": { "defaults": { @@ -988,7 +1061,7 @@ "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 42 }, "title": "Storage", "type": "row" @@ -1004,7 +1077,7 @@ "h": 4, "w": 6, "x": 0, - "y": 37 + "y": 43 }, "fieldConfig": { "defaults": { @@ -1071,7 +1144,7 @@ "h": 2, "w": 6, "x": 0, - "y": 41 + "y": 47 }, "fieldConfig": { "defaults": { @@ -1112,7 +1185,7 @@ "h": 6, "w": 6, "x": 6, - "y": 37 + "y": 43 }, "fieldConfig": { "defaults": { @@ -1168,7 +1241,7 @@ "h": 6, "w": 6, "x": 12, - "y": 37 + "y": 43 }, "fieldConfig": { "defaults": { @@ -1204,7 +1277,7 @@ "h": 6, "w": 6, "x": 18, - "y": 37 + "y": 43 }, "fieldConfig": { "defaults": { @@ -1240,7 +1313,7 @@ "h": 5, "w": 24, "x": 0, - "y": 43 + "y": 49 }, "fieldConfig": { "defaults": { @@ -1273,7 +1346,7 @@ "h": 1, "w": 24, "x": 0, - "y": 48 + "y": 54 }, "title": "Healing", "type": "row" @@ -1289,7 +1362,7 @@ "h": 4, "w": 6, "x": 0, - "y": 49 + "y": 55 }, "fieldConfig": { "defaults": { @@ -1324,7 +1397,7 @@ "h": 2, "w": 6, "x": 0, - "y": 53 + "y": 59 }, "fieldConfig": { "defaults": { @@ -1365,7 +1438,7 @@ "h": 6, "w": 6, "x": 6, - "y": 49 + "y": 55 }, "fieldConfig": { "defaults": { @@ -1421,7 +1494,7 @@ "h": 6, "w": 6, "x": 12, - "y": 49 + "y": 55 }, "fieldConfig": { "defaults": { @@ -1456,7 +1529,7 @@ "h": 6, "w": 6, "x": 18, - "y": 49 + "y": 55 }, "fieldConfig": { "defaults": { @@ -1492,7 +1565,7 @@ "h": 5, "w": 24, "x": 0, - "y": 55 + "y": 61 }, "fieldConfig": { "defaults": { @@ -1525,7 +1598,7 @@ "h": 1, "w": 24, "x": 0, - "y": 60 + "y": 66 }, "title": "Bytecodes", "type": "row" @@ -1541,7 +1614,7 @@ "h": 4, "w": 6, "x": 0, - "y": 61 + "y": 67 }, "fieldConfig": { "defaults": { @@ -1608,7 +1681,7 @@ "h": 2, "w": 6, "x": 0, - "y": 65 + "y": 71 }, "fieldConfig": { "defaults": { @@ -1649,7 +1722,7 @@ "h": 6, "w": 6, "x": 6, - "y": 61 + "y": 67 }, "fieldConfig": { "defaults": { @@ -1705,7 +1778,7 @@ "h": 6, "w": 6, "x": 12, - "y": 61 + "y": 67 }, "fieldConfig": { "defaults": { @@ -1741,7 +1814,7 @@ "h": 6, "w": 6, "x": 18, - "y": 61 + "y": 67 }, "fieldConfig": { "defaults": { @@ -1777,7 +1850,7 @@ "h": 5, "w": 24, "x": 0, - "y": 67 + "y": 73 }, "fieldConfig": { "defaults": { From 54db019c934c9e38fd63eef6e21b852da4d2660f Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Tue, 14 Apr 2026 20:39:15 -0300 Subject: [PATCH 28/31] fix(l1): move composite chart legend to bottom for time axis alignment --- .../dashboards/common_dashboards/snapsync_dashboard.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json index 431fac3f60d..e0bda1ceb78 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/snapsync_dashboard.json @@ -261,8 +261,8 @@ "sort": "desc" }, "legend": { - "displayMode": "table", - "placement": "right", + "displayMode": "list", + "placement": "bottom", "calcs": [ "lastNotNull", "max" From 866ea60fafa85b378bca8dede45be9ac9b5b234f Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Wed, 15 Apr 2026 11:17:54 -0300 Subject: [PATCH 29/31] fix(l1): distinguish watched-phase tracing from real degradation in monitor wording --- tooling/sync/docker_monitor.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index 196fe40dee0..9172baf8538 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -175,22 +175,27 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: self.degraded[name] = True self.degraded_since[name] = now self.healthy_since[name] = 0 + # Distinguish intentional tracing (watched phase) from real issues + only_watched = all(r.startswith("watched_phase:") for r in reasons) event = { "timestamp": datetime.utcnow().isoformat() + "Z", "network": name, - "event_type": "degradation_start", + "event_type": "watched_phase_start" if only_watched else "degradation_start", "reasons": reasons, "eligible_peers": snapshot.get("peer_scores", {}).get("summary", {}).get("eligible_peers"), "phase": snapshot.get("sync_status", {}).get("current_phase"), } self.events.append(event) - print(f"⚠️ [{name}] Degradation detected: {', '.join(reasons)} — increasing poll frequency") + if only_watched: + print(f"👁️ [{name}] Watched phase active: {', '.join(reasons)} — increasing poll frequency") + else: + print(f"⚠️ [{name}] Degradation detected: {', '.join(reasons)} — increasing poll frequency") # Bump log level to TRACE for detailed peer comms if rpc_set_log_level(inst.rpc_url, LOG_LEVEL_DEGRADED): print(f"🔍 [{name}] Log level bumped to TRACE for peer diagnostics") else: print(f"⚠️ [{name}] Failed to bump log level") - # Dump snapshots on degradation + # Dump snapshots on degradation / watched phase self._dump_snapshots(name) else: # Healthy — check if we can exit degraded mode @@ -203,10 +208,10 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: event = { "timestamp": datetime.utcnow().isoformat() + "Z", "network": name, - "event_type": "degradation_end", + "event_type": "monitoring_normal", } self.events.append(event) - print(f"✅ [{name}] Degradation resolved — resuming normal poll frequency") + print(f"✅ [{name}] Monitoring back to normal — resuming default poll frequency") # Restore log level to normal if rpc_set_log_level(inst.rpc_url, LOG_LEVEL_NORMAL): print(f"📝 [{name}] Log level restored to DEBUG") @@ -256,10 +261,10 @@ def _dump_snapshots(self, name: str, force: bool = False) -> None: print(f"⚠️ [{name}] Failed to dump snapshots: {e}") def format_degradation_events(self) -> str: - """Format degradation events for the summary.txt.""" + """Format monitor events for the summary.txt.""" if not self.events: return "" - lines = ["\n Degradation Events:"] + lines = ["\n Monitor Events:"] for ev in self.events: ts = ev["timestamp"] net = ev.get("network", "?") From 905e368a4dc611548328bb9f52e8613add1e4795 Mon Sep 17 00:00:00 2001 From: Esteban Dimitroff Hodi Date: Wed, 15 Apr 2026 11:33:10 -0300 Subject: [PATCH 30/31] fix(l1): log reason changes even when already in alert state (monitor visibility) --- tooling/sync/docker_monitor.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tooling/sync/docker_monitor.py b/tooling/sync/docker_monitor.py index 9172baf8538..586afe73605 100644 --- a/tooling/sync/docker_monitor.py +++ b/tooling/sync/docker_monitor.py @@ -96,6 +96,7 @@ def __init__(self, instances: list): self.events: list[dict] = [] # degradation events across all networks self.dumped_for_run: dict[str, bool] = {inst.name: False for inst in instances} self._last_progress: dict[str, Optional[str]] = {inst.name: None for inst in instances} + self.last_reasons: dict[str, frozenset] = {inst.name: frozenset() for inst in instances} def poll_interval(self, name: str) -> float: return DIAGNOSTICS_DEGRADED_INTERVAL if self.degraded[name] else DIAGNOSTICS_NORMAL_INTERVAL @@ -170,8 +171,11 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: if phase in WATCHED_PHASES: reasons.append(f"watched_phase:{phase}") + reasons_set = frozenset(reasons) if reasons: - if not self.degraded[name]: + newly_degraded = not self.degraded[name] + reasons_changed = reasons_set != self.last_reasons.get(name, frozenset()) + if newly_degraded: self.degraded[name] = True self.degraded_since[name] = now self.healthy_since[name] = 0 @@ -195,6 +199,17 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: print(f"🔍 [{name}] Log level bumped to TRACE for peer diagnostics") else: print(f"⚠️ [{name}] Failed to bump log level") + elif reasons_changed: + # Already degraded but reasons changed — record and log + event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "network": name, + "event_type": "reasons_changed", + "reasons": reasons, + "phase": snapshot.get("sync_status", {}).get("current_phase"), + } + self.events.append(event) + print(f"🔄 [{name}] Monitor reasons changed: {', '.join(reasons)}") # Dump snapshots on degradation / watched phase self._dump_snapshots(name) else: @@ -217,6 +232,7 @@ def _check_alert_conditions(self, inst, snapshot: dict) -> None: print(f"📝 [{name}] Log level restored to DEBUG") else: print(f"⚠️ [{name}] Failed to restore log level") + self.last_reasons[name] = reasons_set def on_failure(self, inst, name: str) -> None: """Called when a network fails — do a final poll and dump snapshots.""" @@ -284,6 +300,7 @@ def reset(self) -> None: self.last_poll[name] = 0 self.dumped_for_run[name] = False self._last_progress[name] = None + self.last_reasons[name] = frozenset() self.events = [] From 5cf4d623e74ede035aa131f950ee3dbee27bafa7 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Mon, 20 Apr 2026 12:09:17 -0300 Subject: [PATCH 31/31] Use char-based truncation in REPL table formatter and combine diagnostics write locks into a single acquisition on sync error path. The byte-index slice in formatter.rs panics on multi-byte UTF-8 (e.g. emoji in peer client names). Switching to chars().take() avoids this. The two separate write-lock calls in sync.rs allowed an RPC reader to observe phase=idle without the corresponding error event. A single lock scope makes the phase reset and error push atomic. --- crates/networking/p2p/sync.rs | 23 +++++++++++++---------- tooling/repl/src/formatter.rs | 3 ++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 2d0fca5cbf3..6170a7e9849 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -161,22 +161,25 @@ impl Syncer { // If the error is irrecoverable, we exit ethrex Err(error) => { let recoverable = error.is_recoverable(); - self.diagnostics.write().await.current_phase = "idle".to_string(); debug!( error_type = %error, recoverable = recoverable, action = if recoverable { "retry" } else { "exit" }, "Sync cycle error classification" ); - self.diagnostics.write().await.push_error(SyncErrorEvent { - timestamp: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs(), - error_type: format!("{:?}", std::mem::discriminant(&error)), - error_message: error.to_string(), - recoverable, - }); + { + let mut diag = self.diagnostics.write().await; + diag.current_phase = "idle".to_string(); + diag.push_error(SyncErrorEvent { + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + error_type: format!("{:?}", std::mem::discriminant(&error)), + error_message: error.to_string(), + recoverable, + }); + } match recoverable { false => { // We exit the node, as we can't recover this error diff --git a/tooling/repl/src/formatter.rs b/tooling/repl/src/formatter.rs index e343a27be7e..db274706b3c 100644 --- a/tooling/repl/src/formatter.rs +++ b/tooling/repl/src/formatter.rs @@ -220,7 +220,8 @@ fn format_object_array_table(arr: &[Value]) -> String { .zip(&col_widths) .map(|(val, w)| { let truncated = if val.len() > *w { - format!("{}…", &val[..*w - 1]) + let s: String = val.chars().take(*w - 1).collect(); + format!("{s}…") } else { val.clone() };