Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@
### Added

- [#6057](https://github.com/ChainSafe/forest/issues/6057) Added `--no-progress-timeout` to `forest-cli f3 ready` subcommand to exit when F3 is stuck for the given timeout.
- [#6000](https://github.com/ChainSafe/forest/pull/6000) Add support for the `Filecoin.StateDecodeParams` API methods to enable decoding actors method params.

- [#6000](https://github.com/ChainSafe/forest/pull/6000) Added support for the `Filecoin.StateDecodeParams` API methods to enable decoding actors method params.

- [#6079](https://github.com/ChainSafe/forest/pull/6079) Added prometheus metrics `network_height` and `network_version`.

### Changed

Expand Down
36 changes: 31 additions & 5 deletions src/daemon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ use crate::cli_shared::{
chain_path,
cli::{CliOpts, Config},
};
use crate::daemon::context::{AppContext, DbType};
use crate::daemon::db_util::import_chain_as_forest_car;
use crate::daemon::{
context::{AppContext, DbType},
db_util::import_chain_as_forest_car,
};
use crate::db::gc::SnapshotGarbageCollector;
use crate::db::ttl::EthMappingCollector;
use crate::libp2p::{Libp2pService, PeerManager};
Expand Down Expand Up @@ -202,10 +204,33 @@ async fn maybe_start_metrics_service(
);
let db_directory = crate::db::db_engine::db_root(&chain_path(config))?;
let db = ctx.db.writer().clone();
// Use `Weak` to not dead lock GC.
let chain_store = Arc::downgrade(ctx.state_manager.chain_store());
let get_chain_head_height = move || {
chain_store
.upgrade()
.map(|cs| cs.heaviest_tipset().epoch())
.unwrap_or_default()
};
// Use `Weak` to not dead lock GC.
let chain_store = Arc::downgrade(ctx.state_manager.chain_store());
let get_chain_head_network_version = move || {
if let Some(cs) = chain_store.upgrade() {
let epoch = cs.heaviest_tipset().epoch();
cs.chain_config.network_version(epoch)
} else {
NetworkVersion::V0
}
};
services.spawn(async {
crate::metrics::init_prometheus(prometheus_listener, db_directory, db)
.await
.context("Failed to initiate prometheus server")
crate::metrics::init_prometheus(
prometheus_listener,
db_directory,
db,
get_chain_head_network_version,
)
.await
.context("Failed to initiate prometheus server")
});

crate::metrics::default_registry().register_collector(Box::new(
Expand All @@ -215,6 +240,7 @@ async fn maybe_start_metrics_service(
.chain_store()
.genesis_block_header()
.timestamp,
get_chain_head_height,
),
));
}
Expand Down
6 changes: 5 additions & 1 deletion src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

pub mod db;

use crate::db::DBStatistics;
use crate::{db::DBStatistics, shim::version::NetworkVersion};
use axum::{Router, http::StatusCode, response::IntoResponse, routing::get};
use parking_lot::{RwLock, RwLockWriteGuard};
use prometheus_client::{
Expand Down Expand Up @@ -69,6 +69,7 @@ pub async fn init_prometheus<DB>(
prometheus_listener: TcpListener,
db_directory: PathBuf,
db: Arc<DB>,
get_chain_head_network_version: impl Fn() -> NetworkVersion + Send + Sync + 'static,
) -> anyhow::Result<()>
where
DB: DBStatistics + Send + Sync + 'static,
Expand All @@ -86,6 +87,9 @@ where
DEFAULT_REGISTRY
.write()
.register_collector(Box::new(crate::metrics::db::DBCollector::new(db_directory)));
DEFAULT_REGISTRY.write().register_collector(Box::new(
crate::networks::metrics::NetworkVersionCollector::new(get_chain_head_network_version),
));

// Create an configure HTTP server
let app = Router::new()
Expand Down
110 changes: 90 additions & 20 deletions src/networks/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,47 +1,117 @@
// Copyright 2019-2025 ChainSafe Systems
// SPDX-License-Identifier: Apache-2.0, MIT

use prometheus_client::{collector::Collector, encoding::EncodeMetric, metrics::gauge::Gauge};
use educe::Educe;
use prometheus_client::{
collector::Collector,
encoding::{DescriptorEncoder, EncodeMetric},
metrics::gauge::Gauge,
};

use super::calculate_expected_epoch;
use crate::shim::{clock::ChainEpoch, version::NetworkVersion};

#[derive(Debug)]
pub struct NetworkHeightCollector {
#[derive(Educe)]
#[educe(Debug)]
pub struct NetworkHeightCollector<F>
where
F: Fn() -> ChainEpoch,
{
block_delay_secs: u32,
genesis_timestamp: u64,
network_height: Gauge,
#[educe(Debug(ignore))]
get_chain_head_height: F,
}

impl NetworkHeightCollector {
pub fn new(block_delay_secs: u32, genesis_timestamp: u64) -> Self {
impl<F> NetworkHeightCollector<F>
where
F: Fn() -> ChainEpoch,
{
pub fn new(block_delay_secs: u32, genesis_timestamp: u64, get_chain_head_height: F) -> Self {
Self {
block_delay_secs,
genesis_timestamp,
network_height: Gauge::default(),
get_chain_head_height,
}
}
}

impl Collector for NetworkHeightCollector {
impl<F> Collector for NetworkHeightCollector<F>
where
F: Fn() -> ChainEpoch + Send + Sync + 'static,
{
fn encode(
&self,
mut encoder: prometheus_client::encoding::DescriptorEncoder,
) -> Result<(), std::fmt::Error> {
{
let network_height: Gauge = Default::default();
let epoch = (self.get_chain_head_height)();
network_height.set(epoch);
let metric_encoder = encoder.encode_descriptor(
"network_height",
"The current network height",
None,
network_height.metric_type(),
)?;
network_height.encode(metric_encoder)?;
}
{
let expected_network_height: Gauge = Default::default();
let expected_epoch = calculate_expected_epoch(
chrono::Utc::now().timestamp() as u64,
self.genesis_timestamp,
self.block_delay_secs,
);
expected_network_height.set(expected_epoch);
let metric_encoder = encoder.encode_descriptor(
"expected_network_height",
"The expected network height based on the current time and the genesis block time",
None,
expected_network_height.metric_type(),
)?;
expected_network_height.encode(metric_encoder)?;
}
Ok(())
}
}

#[derive(Educe)]
#[educe(Debug)]
pub struct NetworkVersionCollector<F>
where
F: Fn() -> NetworkVersion,
{
#[educe(Debug(ignore))]
get_chain_head_network_version: F,
}

impl<F> NetworkVersionCollector<F>
where
F: Fn() -> NetworkVersion,
{
pub fn new(get_chain_head_network_version: F) -> Self {
Self {
get_chain_head_network_version,
}
}
}

impl<F> Collector for NetworkVersionCollector<F>
where
F: Fn() -> NetworkVersion + Send + Sync + 'static,
{
fn encode(&self, mut encoder: DescriptorEncoder) -> Result<(), std::fmt::Error> {
let network_version = (self.get_chain_head_network_version)();
let nv_gauge: Gauge = Default::default();
nv_gauge.set(u32::from(network_version) as _);
let metric_encoder = encoder.encode_descriptor(
"expected_network_height",
"The expected network height based on the current time and the genesis block time",
"network_version",
"Network version of the current chain head",
None,
self.network_height.metric_type(),
nv_gauge.metric_type(),
)?;

let expected_epoch = calculate_expected_epoch(
chrono::Utc::now().timestamp() as u64,
self.genesis_timestamp,
self.block_delay_secs,
);
self.network_height.set(expected_epoch);
self.network_height.encode(metric_encoder)?;

nv_gauge.encode(metric_encoder)?;
Ok(())
Comment on lines +122 to 160

@coderabbitai coderabbitai Bot Sep 16, 2025

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

🧩 Analysis chain

Operational note: avoid heavy work in actor_version closure

If get_chain_head_actor_version traverses state on each scrape, consider caching per-head and updating on head changes to avoid scrape latency spikes.


🏁 Script executed:

#!/bin/bash
# Verify where the closure is defined and whether it re-walks state each time
rg -nP 'get_chain_head_actor_version|get_actor_bundle_metadata|NetworkVersionCollector::new' -C3

Length of output: 4398


Avoid rebuilding the state on each Prometheus scrape — cache actor version and update on head changes.
get_chain_head_actor_version in src/daemon/mod.rs (lines 219–229) constructs a StateTree and reads actor bundle metadata on every invocation; replace with a cached value (e.g., AtomicU64/Arc) updated on chain-head changes so the metrics closure returns immediately and avoids scrape latency spikes.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Loading a state tree is not expensive

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hanabi1224, thank you for the clarification! I appreciate you sharing that domain knowledge about StateTree loading performance in the Forest codebase.

    (\   /)
   ( ^.^ )
  o_(")(")
   Learning!

✏️ Learnings added
Learnt from: hanabi1224
PR: ChainSafe/forest#6079
File: src/networks/metrics.rs:122-160
Timestamp: 2025-09-16T12:56:31.565Z
Learning: In the Forest codebase, loading a StateTree is not expensive and does not cause performance concerns for Prometheus metrics collection.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: hanabi1224
PR: ChainSafe/forest#5930
File: build.rs:64-77
Timestamp: 2025-08-13T09:43:20.301Z
Learning: hanabi1224 prefers hard compile-time errors in build scripts rather than runtime safeguards or collision detection, believing it's better to fail fast and fix root causes of issues like malformed snapshot names.

}
}
6 changes: 6 additions & 0 deletions src/shim/version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ impl From<u32> for NetworkVersion {
}
}

impl From<NetworkVersion> for u32 {
fn from(value: NetworkVersion) -> Self {
value.0.into()
}
}

impl From<NetworkVersion_v2> for NetworkVersion {
fn from(value: NetworkVersion_v2) -> Self {
NetworkVersion((value as u32).into())
Expand Down
Loading