-
Notifications
You must be signed in to change notification settings - Fork 196
feat: add prometheus metrics network_version and actor_version
#6079
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
ed05df0
d1c8129
7eda899
3537f0d
c49a095
4a5eb4f
3b5bd97
1bea7fe
2be478f
41a80d7
9965521
821be7d
5e3c574
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,47 +1,162 @@ | ||
| // Copyright 2019-2025 ChainSafe Systems | ||
| // SPDX-License-Identifier: Apache-2.0, MIT | ||
|
|
||
| use prometheus_client::{collector::Collector, encoding::EncodeMetric, metrics::gauge::Gauge}; | ||
| use std::sync::Arc; | ||
|
|
||
| use educe::Educe; | ||
| use prometheus_client::{ | ||
| collector::Collector, | ||
| encoding::{DescriptorEncoder, EncodeMetric}, | ||
| metrics::gauge::Gauge, | ||
| }; | ||
|
|
||
| use super::calculate_expected_epoch; | ||
| use crate::{networks::ChainConfig, shim::clock::ChainEpoch}; | ||
|
|
||
| #[derive(Debug)] | ||
| pub struct NetworkHeightCollector { | ||
| #[derive(Educe)] | ||
| #[educe(Debug)] | ||
| pub struct NetworkHeightCollector<F> | ||
| where | ||
| F: Fn() -> ChainEpoch, | ||
| { | ||
| block_delay_secs: u32, | ||
| genesis_timestamp: u64, | ||
| network_height: Gauge, | ||
| #[educe(Debug(ignore))] | ||
| get_chain_head_height: Arc<F>, | ||
| } | ||
|
|
||
| impl NetworkHeightCollector { | ||
| pub fn new(block_delay_secs: u32, genesis_timestamp: u64) -> Self { | ||
| impl<F> NetworkHeightCollector<F> | ||
| where | ||
| F: Fn() -> ChainEpoch, | ||
| { | ||
| pub fn new( | ||
| block_delay_secs: u32, | ||
| genesis_timestamp: u64, | ||
| get_chain_head_height: Arc<F>, | ||
| ) -> Self { | ||
| Self { | ||
| block_delay_secs, | ||
| genesis_timestamp, | ||
| network_height: Gauge::default(), | ||
| get_chain_head_height, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl Collector for NetworkHeightCollector { | ||
| impl<F> Collector for NetworkHeightCollector<F> | ||
| where | ||
| F: Fn() -> ChainEpoch + Send + Sync + 'static, | ||
| { | ||
| fn encode( | ||
| &self, | ||
| mut encoder: prometheus_client::encoding::DescriptorEncoder, | ||
| ) -> Result<(), std::fmt::Error> { | ||
| let metric_encoder = encoder.encode_descriptor( | ||
| "expected_network_height", | ||
| "The expected network height based on the current time and the genesis block time", | ||
| None, | ||
| self.network_height.metric_type(), | ||
| )?; | ||
|
|
||
| let expected_epoch = calculate_expected_epoch( | ||
| chrono::Utc::now().timestamp() as u64, | ||
| self.genesis_timestamp, | ||
| self.block_delay_secs, | ||
| ); | ||
| self.network_height.set(expected_epoch); | ||
| self.network_height.encode(metric_encoder)?; | ||
| { | ||
| let network_height: Gauge = Default::default(); | ||
| let epoch = (self.get_chain_head_height)(); | ||
| network_height.set(epoch); | ||
| let metric_encoder = encoder.encode_descriptor( | ||
| "network_height", | ||
| "The current network height", | ||
| None, | ||
| network_height.metric_type(), | ||
| )?; | ||
| network_height.encode(metric_encoder)?; | ||
| } | ||
| { | ||
| let expected_network_height: Gauge = Default::default(); | ||
| let expected_epoch = calculate_expected_epoch( | ||
| chrono::Utc::now().timestamp() as u64, | ||
| self.genesis_timestamp, | ||
| self.block_delay_secs, | ||
| ); | ||
| expected_network_height.set(expected_epoch); | ||
| let metric_encoder = encoder.encode_descriptor( | ||
| "expected_network_height", | ||
| "The expected network height based on the current time and the genesis block time", | ||
| None, | ||
| expected_network_height.metric_type(), | ||
| )?; | ||
| expected_network_height.encode(metric_encoder)?; | ||
| } | ||
| Ok(()) | ||
| } | ||
| } | ||
|
|
||
| #[derive(Educe)] | ||
| #[educe(Debug)] | ||
| pub struct NetworkVersionCollector<F1, F2> | ||
| where | ||
| F1: Fn() -> ChainEpoch, | ||
| F2: Fn() -> u64, | ||
| { | ||
| chain_config: Arc<ChainConfig>, | ||
| #[educe(Debug(ignore))] | ||
| get_chain_head_height: Arc<F1>, | ||
| #[educe(Debug(ignore))] | ||
| get_chain_head_actor_version: Arc<F2>, | ||
| } | ||
|
|
||
| impl<F1, F2> NetworkVersionCollector<F1, F2> | ||
| where | ||
| F1: Fn() -> ChainEpoch, | ||
| F2: Fn() -> u64, | ||
| { | ||
| pub fn new( | ||
| chain_config: Arc<ChainConfig>, | ||
| get_chain_head_height: Arc<F1>, | ||
| get_chain_head_actor_version: Arc<F2>, | ||
| ) -> Self { | ||
| Self { | ||
| chain_config, | ||
| get_chain_head_height, | ||
| get_chain_head_actor_version, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl<F1, F2> Collector for NetworkVersionCollector<F1, F2> | ||
| where | ||
| F1: Fn() -> ChainEpoch + Send + Sync + 'static, | ||
| F2: Fn() -> u64 + Send + Sync + 'static, | ||
| { | ||
| fn encode(&self, mut encoder: DescriptorEncoder) -> Result<(), std::fmt::Error> { | ||
| let epoch = (self.get_chain_head_height)(); | ||
| { | ||
| let network_version = self.chain_config.network_version(epoch); | ||
| let nv_gauge: Gauge = Default::default(); | ||
| nv_gauge.set(u32::from(network_version) as _); | ||
| let metric_encoder = encoder.encode_descriptor( | ||
| "network_version", | ||
| "Network version of the current chain head", | ||
| None, | ||
| nv_gauge.metric_type(), | ||
| )?; | ||
| nv_gauge.encode(metric_encoder)?; | ||
| } | ||
| { | ||
| let network_version_revision = self.chain_config.network_version_revision(epoch); | ||
| let nv_gauge: Gauge = Default::default(); | ||
| nv_gauge.set(network_version_revision); | ||
| let metric_encoder = encoder.encode_descriptor( | ||
| "network_version_revision", | ||
| "Network version revision of the current chain head", | ||
| None, | ||
| nv_gauge.metric_type(), | ||
| )?; | ||
| nv_gauge.encode(metric_encoder)?; | ||
| } | ||
| { | ||
| let actor_version = (self.get_chain_head_actor_version)(); | ||
| let av_gauge: Gauge = Default::default(); | ||
| av_gauge.set(actor_version as _); | ||
| let metric_encoder = encoder.encode_descriptor( | ||
| "actor_version", | ||
| "Actor version of the current chain head", | ||
| None, | ||
| av_gauge.metric_type(), | ||
| )?; | ||
| av_gauge.encode(metric_encoder)?; | ||
| } | ||
| Ok(()) | ||
|
Comment on lines
+122
to
160
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chainOperational note: avoid heavy work in actor_version closure If get_chain_head_actor_version traverses state on each scrape, consider caching per-head and updating on head changes to avoid scrape latency spikes. 🏁 Script executed: #!/bin/bash
# Verify where the closure is defined and whether it re-walks state each time
rg -nP 'get_chain_head_actor_version|get_actor_bundle_metadata|NetworkVersionCollector::new' -C3Length of output: 4398 Avoid rebuilding the state on each Prometheus scrape — cache actor version and update on head changes.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Loading a state tree is not expensive
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hanabi1224, thank you for the clarification! I appreciate you sharing that domain knowledge about StateTree loading performance in the Forest codebase. ✏️ Learnings added
🧠 Learnings used |
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It'd be good to explain the difference between those metrics in the docs (dedicated section).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just realized
network_heightis a duplicate ofhead_epoch, removed to avoid confusion.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@hanabi1224 It's not exactly a duplicate, is it? I mean, the values differ, and from what I saw,
head_epochis not available just when the node starts.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@LesnyRumcajs The metrics are the same, and the difference is due to how the value is updated. I can update
head_epochto use the collector approach which is more reliable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.