Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/chain_sync/chain_follower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,12 +234,14 @@ pub async fn chain_follower<DB: Blockstore + Sync + Send + 'static>(

// Update the sync states
{
let mut status_report_guard = sync_status.write();
status_report_guard.update(
let old_status_report = sync_status.read().clone();
let new_status_report = old_status_report.update(
&state_manager,
current_active_forks,
stateless_mode,
);

sync_status.write().clone_from(&new_status_report);
}

for task in task_vec {
Expand Down
41 changes: 24 additions & 17 deletions src/chain_sync/sync_status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,36 +138,36 @@ impl SyncStatusReport {
}
}

/// Updates the sync status report based on the current state of the node and network.
/// This does not modify the existing report but returns a new one with updated information.
pub(crate) fn update<DB: Blockstore + Sync + Send + 'static>(
&mut self,
&self,
state_manager: &StateManager<DB>,
current_active_forks: Vec<ForkSyncInfo>,
active_forks: Vec<ForkSyncInfo>,
stateless_mode: bool,
) {
) -> Self {
let heaviest = state_manager.chain_store().heaviest_tipset();
let current_chain_head_epoch = heaviest.epoch();
self.current_head_key = Some(heaviest.key().clone());
self.current_head_epoch = current_chain_head_epoch;
let current_head_epoch = heaviest.epoch();
let current_head_key = Some(heaviest.key().clone());

let now = Utc::now();
let now_ts = now.timestamp() as u64;
let last_updated = Utc::now();
let last_updated_ts = last_updated.timestamp() as u64;
let seconds_per_epoch = state_manager.chain_config().block_delay_secs;
let network_head_epoch = calculate_expected_epoch(
now_ts,
last_updated_ts,
state_manager.chain_store().genesis_block_header().timestamp,
seconds_per_epoch,
);

self.network_head_epoch = network_head_epoch;
self.epochs_behind = network_head_epoch.saturating_sub(current_chain_head_epoch);
let epochs_behind = network_head_epoch.saturating_sub(current_head_epoch);
log::trace!(
"Sync status report: current head epoch: {}, network head epoch: {}, epochs behind: {}",
current_chain_head_epoch,
current_head_epoch,
network_head_epoch,
self.epochs_behind
epochs_behind
);
Comment thread
LesnyRumcajs marked this conversation as resolved.

let time_diff = now_ts.saturating_sub(heaviest.min_timestamp());
let time_diff = last_updated_ts.saturating_sub(heaviest.min_timestamp());
let status = match stateless_mode {
true => NodeSyncStatus::Offline,
false => {
Expand All @@ -179,9 +179,16 @@ impl SyncStatusReport {
}
};

self.status = status;
self.active_forks = current_active_forks;
self.last_updated = now;
Self {
node_start_time: self.node_start_time,
current_head_epoch,
current_head_key,
network_head_epoch,
epochs_behind,
status,
active_forks,
last_updated,
}
}

pub(crate) fn is_synced(&self) -> bool {
Expand Down
22 changes: 18 additions & 4 deletions src/health/endpoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ use crate::rpc::f3::F3IsRunning;
/// Query parameter for verbose responses
const VERBOSE_PARAM: &str = "verbose";

/// Maximum duration to wait for a request to complete. This value is kind of arbitrary, we need
/// the health probes to be snappy and unless running on a very slow machine, 2 seconds should be
/// plenty of time for a localhost check.
const MAX_REQ_DURATION_SECS: u64 = 2;

/// Liveness probes determine whether or not an application running in a container is in a healthy state. The idea behind a liveness probe is that it fails for prolonged period of time, then the application should be restarted.
/// In our case, we require:
/// - The node is not in an error state (i.e., boot-looping)
Expand Down Expand Up @@ -136,9 +141,12 @@ async fn check_rpc_server_running(state: &ForestState, acc: &mut MessageAccumula
if !state.config.client.enable_rpc {
acc.push_ok("rpc server disabled");
true
} else if tokio::net::TcpStream::connect(state.config.client.rpc_address)
.await
.is_ok()
} else if tokio::time::timeout(
std::time::Duration::from_secs(MAX_REQ_DURATION_SECS),
tokio::net::TcpStream::connect(state.config.client.rpc_address),
)
.await
.is_ok_and(|connected| connected.is_ok())
{
Comment thread
LesnyRumcajs marked this conversation as resolved.
acc.push_ok("rpc server running");
true
Expand All @@ -163,7 +171,13 @@ async fn check_f3_running(state: &ForestState, acc: &mut MessageAccumulator) ->
if !crate::f3::is_sidecar_ffi_enabled(&state.chain_config) {
acc.push_ok("f3 disabled");
true
} else if F3IsRunning::is_f3_running().await.unwrap_or_default() {
} else if tokio::time::timeout(
std::time::Duration::from_secs(MAX_REQ_DURATION_SECS),
F3IsRunning::is_f3_running(),
)
.await
.is_ok_and(|is_running| is_running.unwrap_or_default())
{
Comment thread
LesnyRumcajs marked this conversation as resolved.
acc.push_ok("f3 running");
true
} else if crate::f3::get_f3_sidecar_params(&state.chain_config).bootstrap_epoch
Expand Down
14 changes: 8 additions & 6 deletions src/libp2p/peer_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,15 @@ impl PeerManager {
/// duration.
pub fn log_failure(&self, peer: &PeerId, dur: Duration) {
trace!("logging failure for {peer}");
let mut peers = self.peers.write();
if !peers.bad_peers.contains(peer) {
metrics::PEER_FAILURE_TOTAL.inc();
let peer_stats = peers.full_peers.entry(*peer).or_default();
peer_stats.failures += 1;
log_time(peer_stats, dur);
if self.peers.read().bad_peers.contains(peer) {
return;
}

metrics::PEER_FAILURE_TOTAL.inc();
let mut peers = self.peers.write();
let peer_stats = peers.full_peers.entry(*peer).or_default();
peer_stats.failures += 1;
log_time(peer_stats, dur);
}

/// Removes a peer from the set and returns true if the value was present
Expand Down
Loading