-
Notifications
You must be signed in to change notification settings - Fork 1.2k
misc/metrics: Track # connected nodes supporting specific protocol #2734
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7bb55ff
9260aa2
6af02ac
d3c8155
cb07db9
06d8ef6
d8d1c19
01a0a1d
c308868
a156ba4
06b1109
4264061
70076f4
5cbd457
cab8fb4
f3adea7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,12 +18,18 @@ | |
| // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| // DEALINGS IN THE SOFTWARE. | ||
|
|
||
| use libp2p_core::PeerId; | ||
| use prometheus_client::encoding::text::{EncodeMetric, Encoder}; | ||
| use prometheus_client::metrics::counter::Counter; | ||
| use prometheus_client::metrics::histogram::{exponential_buckets, Histogram}; | ||
| use prometheus_client::metrics::MetricType; | ||
| use prometheus_client::registry::Registry; | ||
| use std::collections::HashMap; | ||
| use std::iter; | ||
| use std::sync::{Arc, Mutex}; | ||
|
|
||
| pub struct Metrics { | ||
| protocols: Protocols, | ||
| error: Counter, | ||
| pushed: Counter, | ||
| received: Counter, | ||
|
|
@@ -36,6 +42,15 @@ impl Metrics { | |
| pub fn new(registry: &mut Registry) -> Self { | ||
| let sub_registry = registry.sub_registry_with_prefix("identify"); | ||
|
|
||
| let protocols = Protocols::default(); | ||
| sub_registry.register( | ||
| "protocols", | ||
| "Number of connected nodes supporting a specific protocol, with \ | ||
| \"unrecognized\" for each peer supporting one or more unrecognized \ | ||
| protocols", | ||
| Box::new(protocols.clone()), | ||
| ); | ||
|
|
||
| let error = Counter::default(); | ||
| sub_registry.register( | ||
| "errors", | ||
|
|
@@ -86,6 +101,7 @@ impl Metrics { | |
| ); | ||
|
|
||
| Self { | ||
| protocols, | ||
| error, | ||
| pushed, | ||
| received, | ||
|
|
@@ -96,27 +112,136 @@ impl Metrics { | |
| } | ||
| } | ||
|
|
||
| impl super::Recorder<libp2p_identify::IdentifyEvent> for super::Metrics { | ||
| impl super::Recorder<libp2p_identify::IdentifyEvent> for Metrics { | ||
| fn record(&self, event: &libp2p_identify::IdentifyEvent) { | ||
| match event { | ||
| libp2p_identify::IdentifyEvent::Error { .. } => { | ||
| self.identify.error.inc(); | ||
| self.error.inc(); | ||
| } | ||
| libp2p_identify::IdentifyEvent::Pushed { .. } => { | ||
| self.identify.pushed.inc(); | ||
| self.pushed.inc(); | ||
| } | ||
| libp2p_identify::IdentifyEvent::Received { info, .. } => { | ||
| self.identify.received.inc(); | ||
| self.identify | ||
| .received_info_protocols | ||
| libp2p_identify::IdentifyEvent::Received { peer_id, info, .. } => { | ||
| { | ||
| let mut protocols: Vec<String> = info | ||
| .protocols | ||
| .iter() | ||
| .filter(|p| { | ||
| let allowed_protocols: &[&[u8]] = &[ | ||
| #[cfg(feature = "dcutr")] | ||
| libp2p_dcutr::PROTOCOL_NAME, | ||
| // #[cfg(feature = "gossipsub")] | ||
| // #[cfg(not(target_os = "unknown"))] | ||
| // TODO: Add Gossipsub protocol name | ||
| libp2p_identify::PROTOCOL_NAME, | ||
| libp2p_identify::PUSH_PROTOCOL_NAME, | ||
| #[cfg(feature = "kad")] | ||
| libp2p_kad::protocol::DEFAULT_PROTO_NAME, | ||
| #[cfg(feature = "ping")] | ||
| libp2p_ping::PROTOCOL_NAME, | ||
| #[cfg(feature = "relay")] | ||
| libp2p_relay::v2::STOP_PROTOCOL_NAME, | ||
| #[cfg(feature = "relay")] | ||
| libp2p_relay::v2::HOP_PROTOCOL_NAME, | ||
| ]; | ||
|
|
||
| allowed_protocols.contains(&p.as_bytes()) | ||
| }) | ||
| .cloned() | ||
| .collect(); | ||
|
|
||
| // Signal via an additional label value that one or more | ||
| // protocols of the remote peer have not been recognized. | ||
| if protocols.len() < info.protocols.len() { | ||
| protocols.push("unrecognized".to_string()); | ||
| } | ||
|
|
||
| protocols.sort_unstable(); | ||
| protocols.dedup(); | ||
|
Comment on lines
+153
to
+160
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will flatten all Would it make sense to increment a separate counter for the number of unrecognized protocols?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We would then need to track the number of unrecognized protocols per peer. On each new identify message, we would have to subtract the previous amount and add the new amount to the counter. I am not saying this is impossible, just adds a bit of complexity which I am not sure is worth the benefit. In other words, is it relevant for a user to know how many, potentially duplicate, unrecognized protocols all connected peers offer as a sum? I was hoping for the addition to the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think this would only apply if we would track it as a gauge? A counter always increases anyway and you need to use functions like A spike in the unrecognized protocols rate could e.g. hint at a spam attack by a peer. I don't feel strongly about it though, happy to go either way :)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The rate on a counter would only tell how often you saw a unrecognized protocol, not how many unrecognized protocols connected peers offer. One could correlate this with the interval
True, though I think we would want to prevent such attack before it happens. In addition, given that this attack would potentially bring down our Prometheus instance, who is going to alert us of the spam attack?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Assuming a stable set of peers, the rate of unrecognised protocols should be stable too? Identify pushes might throw a little spanner in here and there but overall, I'd expect it to be meaningful.
We are preventing the attack by only increasing a counter instead of creating a label per protocol. Surely prometheus can't be brought down just because a counter increases at a massive rate? All I am saying is that increasing the counter by the number of unrecognised protocols should allow us to observe the attempt of an attack assuming that a non-malicious node will have a stable number of unrecognised protocols whereas an attack would likely max out the size limit of the incoming message with as many protocols as possible and thus increase the counter by more than ordinary behaviour. It is not massively useful outside of this I think so happy to go either way.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Oh error in reasoning on my end here. You are right. Sorry about that @thomaseizinger.
I will go without it. |
||
|
|
||
| self.protocols.add(*peer_id, protocols); | ||
| } | ||
|
|
||
| self.received.inc(); | ||
| self.received_info_protocols | ||
| .observe(info.protocols.len() as f64); | ||
| self.identify | ||
| .received_info_listen_addrs | ||
| self.received_info_listen_addrs | ||
| .observe(info.listen_addrs.len() as f64); | ||
| } | ||
| libp2p_identify::IdentifyEvent::Sent { .. } => { | ||
| self.identify.sent.inc(); | ||
| self.sent.inc(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleErr>> for Metrics { | ||
| fn record(&self, event: &libp2p_swarm::SwarmEvent<TBvEv, THandleErr>) { | ||
| if let libp2p_swarm::SwarmEvent::ConnectionClosed { | ||
| peer_id, | ||
| num_established, | ||
| .. | ||
| } = event | ||
| { | ||
| if *num_established == 0 { | ||
| self.protocols.remove(*peer_id) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(Default, Clone)] | ||
| struct Protocols { | ||
| peers: Arc<Mutex<HashMap<PeerId, Vec<String>>>>, | ||
| } | ||
|
|
||
| impl Protocols { | ||
| fn add(&self, peer: PeerId, protocols: Vec<String>) { | ||
| self.peers | ||
| .lock() | ||
| .expect("Lock not to be poisoned") | ||
| .insert(peer, protocols); | ||
| } | ||
|
|
||
| fn remove(&self, peer: PeerId) { | ||
| self.peers | ||
| .lock() | ||
| .expect("Lock not to be poisoned") | ||
| .remove(&peer); | ||
| } | ||
| } | ||
|
|
||
| impl EncodeMetric for Protocols { | ||
| fn encode(&self, mut encoder: Encoder) -> Result<(), std::io::Error> { | ||
| let count_by_protocol = self | ||
| .peers | ||
| .lock() | ||
| .expect("Lock not to be poisoned") | ||
| .iter() | ||
| .fold( | ||
| HashMap::<String, u64>::default(), | ||
| |mut acc, (_, protocols)| { | ||
| for protocol in protocols { | ||
| let count = acc.entry(protocol.to_string()).or_default(); | ||
| *count = *count + 1; | ||
| } | ||
| acc | ||
| }, | ||
| ); | ||
|
|
||
| for (protocol, count) in count_by_protocol { | ||
| encoder | ||
| .with_label_set(&("protocol", protocol)) | ||
| .no_suffix()? | ||
| .no_bucket()? | ||
| .encode_value(count)? | ||
| .no_exemplar()?; | ||
| } | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| fn metric_type(&self) -> MetricType { | ||
| MetricType::Gauge | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.