Skip to content

Commit df2a886

Browse files
committed
Revert libp2p metrics (#4870) (#5265)
Squashed commit of the following: commit 28fe6be Author: Jimmy Chen <[email protected]> Date: Tue Feb 20 10:04:56 2024 +1100 Revert "improve libp2p connected peer metrics (#4870)" This reverts commit 0c3fef5.
1 parent 1b2de0d commit df2a886

File tree

7 files changed

+100
-65
lines changed

7 files changed

+100
-65
lines changed

beacon_node/http_api/src/lib.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ use std::path::PathBuf;
6868
use std::pin::Pin;
6969
use std::sync::Arc;
7070
use sysinfo::{System, SystemExt};
71-
use system_health::{observe_nat, observe_system_health_bn};
71+
use system_health::observe_system_health_bn;
7272
use task_spawner::{Priority, TaskSpawner};
7373
use tokio::sync::{
7474
mpsc::{Sender, UnboundedSender},
@@ -3965,7 +3965,13 @@ pub fn serve<T: BeaconChainTypes>(
39653965
.and(warp::path::end())
39663966
.then(|task_spawner: TaskSpawner<T::EthSpec>| {
39673967
task_spawner.blocking_json_task(Priority::P1, move || {
3968-
Ok(api_types::GenericResponse::from(observe_nat()))
3968+
Ok(api_types::GenericResponse::from(
3969+
lighthouse_network::metrics::NAT_OPEN
3970+
.as_ref()
3971+
.map(|v| v.get())
3972+
.unwrap_or(0)
3973+
!= 0,
3974+
))
39693975
})
39703976
});
39713977

beacon_node/lighthouse_network/src/discovery/mod.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,13 +1003,11 @@ impl<TSpec: EthSpec> NetworkBehaviour for Discovery<TSpec> {
10031003
}
10041004
discv5::Event::SocketUpdated(socket_addr) => {
10051005
info!(self.log, "Address updated"; "ip" => %socket_addr.ip(), "udp_port" => %socket_addr.port());
1006-
// We have SOCKET_UPDATED messages. This occurs when discovery has a majority of
1007-
// users reporting an external port and our ENR gets updated.
1008-
// Which means we are able to do NAT traversal.
1009-
metrics::set_gauge_vec(&metrics::NAT_OPEN, &["discv5"], 1);
1010-
1006+
metrics::inc_counter(&metrics::ADDRESS_UPDATE_COUNT);
1007+
metrics::check_nat();
10111008
// Discv5 will have updated our local ENR. We save the updated version
10121009
// to disk.
1010+
10131011
if (self.update_ports.tcp4 && socket_addr.is_ipv4())
10141012
|| (self.update_ports.tcp6 && socket_addr.is_ipv6())
10151013
{

beacon_node/lighthouse_network/src/metrics.rs

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
11
pub use lighthouse_metrics::*;
22

33
lazy_static! {
4-
pub static ref NAT_OPEN: Result<IntGaugeVec> = try_create_int_gauge_vec(
4+
pub static ref NAT_OPEN: Result<IntCounter> = try_create_int_counter(
55
"nat_open",
6-
"An estimate indicating if the local node is reachable from external nodes",
7-
&["protocol"]
6+
"An estimate indicating if the local node is exposed to the internet."
87
);
98
pub static ref ADDRESS_UPDATE_COUNT: Result<IntCounter> = try_create_int_counter(
109
"libp2p_address_update_total",
1110
"Count of libp2p socked updated events (when our view of our IP address has changed)"
1211
);
13-
pub static ref PEERS_CONNECTED: Result<IntGaugeVec> =
14-
try_create_int_gauge_vec("libp2p_peers", "Count of libp2p peers currently connected", &["direction", "transport"]);
12+
pub static ref PEERS_CONNECTED: Result<IntGauge> = try_create_int_gauge(
13+
"libp2p_peers",
14+
"Count of libp2p peers currently connected"
15+
);
16+
17+
pub static ref TCP_PEERS_CONNECTED: Result<IntGauge> = try_create_int_gauge(
18+
"libp2p_tcp_peers",
19+
"Count of libp2p peers currently connected via TCP"
20+
);
21+
22+
pub static ref QUIC_PEERS_CONNECTED: Result<IntGauge> = try_create_int_gauge(
23+
"libp2p_quic_peers",
24+
"Count of libp2p peers currently connected via QUIC"
25+
);
26+
1527
pub static ref PEER_CONNECT_EVENT_COUNT: Result<IntCounter> = try_create_int_counter(
1628
"libp2p_peer_connect_event_total",
1729
"Count of libp2p peer connect events (not the current number of connected peers)"
@@ -20,10 +32,13 @@ lazy_static! {
2032
"libp2p_peer_disconnect_event_total",
2133
"Count of libp2p peer disconnect events"
2234
);
23-
pub static ref DISCOVERY_BYTES: Result<IntGaugeVec> = try_create_int_gauge_vec(
24-
"discovery_bytes",
25-
"The number of bytes sent and received in discovery",
26-
&["direction"]
35+
pub static ref DISCOVERY_SENT_BYTES: Result<IntGauge> = try_create_int_gauge(
36+
"discovery_sent_bytes",
37+
"The number of bytes sent in discovery"
38+
);
39+
pub static ref DISCOVERY_RECV_BYTES: Result<IntGauge> = try_create_int_gauge(
40+
"discovery_recv_bytes",
41+
"The number of bytes received in discovery"
2742
);
2843
pub static ref DISCOVERY_QUEUE: Result<IntGauge> = try_create_int_gauge(
2944
"discovery_queue_size",
@@ -120,6 +135,17 @@ lazy_static! {
120135
&["type"]
121136
);
122137

138+
/*
139+
* Inbound/Outbound peers
140+
*/
141+
/// The number of peers that dialed us.
142+
pub static ref NETWORK_INBOUND_PEERS: Result<IntGauge> =
143+
try_create_int_gauge("network_inbound_peers","The number of peers that are currently connected that have dialed us.");
144+
145+
/// The number of peers that we dialed us.
146+
pub static ref NETWORK_OUTBOUND_PEERS: Result<IntGauge> =
147+
try_create_int_gauge("network_outbound_peers","The number of peers that are currently connected that we dialed.");
148+
123149
/*
124150
* Peer Reporting
125151
*/
@@ -130,11 +156,31 @@ lazy_static! {
130156
);
131157
}
132158

159+
/// Checks if we consider the NAT open.
160+
///
161+
/// Conditions for an open NAT:
162+
/// 1. We have 1 or more SOCKET_UPDATED messages. This occurs when discovery has a majority of
163+
/// users reporting an external port and our ENR gets updated.
164+
/// 2. We have 0 SOCKET_UPDATED messages (can be true if the port was correct on boot), then we
165+
/// rely on whether we have any inbound messages. If we have no socket update messages, but
166+
/// manage to get at least one inbound peer, we are exposed correctly.
167+
pub fn check_nat() {
168+
// NAT is already deemed open.
169+
if NAT_OPEN.as_ref().map(|v| v.get()).unwrap_or(0) != 0 {
170+
return;
171+
}
172+
if ADDRESS_UPDATE_COUNT.as_ref().map(|v| v.get()).unwrap_or(0) != 0
173+
|| NETWORK_INBOUND_PEERS.as_ref().map(|v| v.get()).unwrap_or(0) != 0_i64
174+
{
175+
inc_counter(&NAT_OPEN);
176+
}
177+
}
178+
133179
pub fn scrape_discovery_metrics() {
134180
let metrics =
135181
discv5::metrics::Metrics::from(discv5::Discv5::<discv5::DefaultProtocolId>::raw_metrics());
136182
set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second);
137183
set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64);
138-
set_gauge_vec(&DISCOVERY_BYTES, &["inbound"], metrics.bytes_recv as i64);
139-
set_gauge_vec(&DISCOVERY_BYTES, &["outbound"], metrics.bytes_sent as i64);
184+
set_gauge(&DISCOVERY_SENT_BYTES, metrics.bytes_sent as i64);
185+
set_gauge(&DISCOVERY_RECV_BYTES, metrics.bytes_recv as i64);
140186
}

beacon_node/lighthouse_network/src/peer_manager/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,14 +726,29 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
726726
return;
727727
}
728728

729+
let mut connected_peer_count = 0;
730+
let mut inbound_connected_peers = 0;
731+
let mut outbound_connected_peers = 0;
729732
let mut clients_per_peer = HashMap::new();
730733

731734
for (_peer, peer_info) in self.network_globals.peers.read().connected_peers() {
735+
connected_peer_count += 1;
736+
if let PeerConnectionStatus::Connected { n_in, .. } = peer_info.connection_status() {
737+
if *n_in > 0 {
738+
inbound_connected_peers += 1;
739+
} else {
740+
outbound_connected_peers += 1;
741+
}
742+
}
732743
*clients_per_peer
733744
.entry(peer_info.client().kind.to_string())
734745
.or_default() += 1;
735746
}
736747

748+
metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peer_count);
749+
metrics::set_gauge(&metrics::NETWORK_INBOUND_PEERS, inbound_connected_peers);
750+
metrics::set_gauge(&metrics::NETWORK_OUTBOUND_PEERS, outbound_connected_peers);
751+
737752
for client_kind in ClientKind::iter() {
738753
let value = clients_per_peer.get(&client_kind.to_string()).unwrap_or(&0);
739754
metrics::set_gauge_vec(
@@ -838,8 +853,11 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
838853
// start a ping and status timer for the peer
839854
self.status_peers.insert(*peer_id);
840855

856+
let connected_peers = self.network_globals.connected_peers() as i64;
857+
841858
// increment prometheus metrics
842859
metrics::inc_counter(&metrics::PEER_CONNECT_EVENT_COUNT);
860+
metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peers);
843861

844862
true
845863
}

beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,6 @@ impl<TSpec: EthSpec> NetworkBehaviour for PeerManager<TSpec> {
154154
self.on_dial_failure(peer_id);
155155
}
156156
FromSwarm::ExternalAddrConfirmed(_) => {
157-
// We have an external address confirmed, means we are able to do NAT traversal.
158-
metrics::set_gauge_vec(&metrics::NAT_OPEN, &["libp2p"], 1);
159157
// TODO: we likely want to check this against our assumed external tcp
160158
// address
161159
}
@@ -245,25 +243,25 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
245243
self.events.push(PeerManagerEvent::MetaData(peer_id));
246244
}
247245

246+
// Check NAT if metrics are enabled
247+
if self.network_globals.local_enr.read().udp4().is_some() {
248+
metrics::check_nat();
249+
}
250+
248251
// increment prometheus metrics
249252
if self.metrics_enabled {
250253
let remote_addr = endpoint.get_remote_address();
251-
let direction = if endpoint.is_dialer() {
252-
"outbound"
253-
} else {
254-
"inbound"
255-
};
256254
match remote_addr.iter().find(|proto| {
257255
matches!(
258256
proto,
259257
multiaddr::Protocol::QuicV1 | multiaddr::Protocol::Tcp(_)
260258
)
261259
}) {
262260
Some(multiaddr::Protocol::QuicV1) => {
263-
metrics::inc_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "quic"]);
261+
metrics::inc_gauge(&metrics::QUIC_PEERS_CONNECTED);
264262
}
265263
Some(multiaddr::Protocol::Tcp(_)) => {
266-
metrics::inc_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "tcp"]);
264+
metrics::inc_gauge(&metrics::TCP_PEERS_CONNECTED);
267265
}
268266
Some(_) => unreachable!(),
269267
None => {
@@ -341,23 +339,17 @@ impl<TSpec: EthSpec> PeerManager<TSpec> {
341339
let remote_addr = endpoint.get_remote_address();
342340
// Update the prometheus metrics
343341
if self.metrics_enabled {
344-
let direction = if endpoint.is_dialer() {
345-
"outbound"
346-
} else {
347-
"inbound"
348-
};
349-
350342
match remote_addr.iter().find(|proto| {
351343
matches!(
352344
proto,
353345
multiaddr::Protocol::QuicV1 | multiaddr::Protocol::Tcp(_)
354346
)
355347
}) {
356348
Some(multiaddr::Protocol::QuicV1) => {
357-
metrics::dec_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "quic"]);
349+
metrics::dec_gauge(&metrics::QUIC_PEERS_CONNECTED);
358350
}
359351
Some(multiaddr::Protocol::Tcp(_)) => {
360-
metrics::dec_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "tcp"]);
352+
metrics::dec_gauge(&metrics::TCP_PEERS_CONNECTED);
361353
}
362354
// If it's an unknown protocol we already logged when connection was established.
363355
_ => {}

common/lighthouse_metrics/src/lib.rs

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -244,16 +244,6 @@ pub fn inc_counter_vec(int_counter_vec: &Result<IntCounterVec>, name: &[&str]) {
244244
}
245245
}
246246

247-
/// Sets the `int_counter_vec` with the given `name` to the `amount`,
248-
/// should only be called with `ammount`s equal or above the current value
249-
/// as per Prometheus spec, the `counter` type should only go up.
250-
pub fn set_counter_vec_by(int_counter_vec: &Result<IntCounterVec>, name: &[&str], amount: u64) {
251-
if let Some(counter) = get_int_counter(int_counter_vec, name) {
252-
counter.reset();
253-
counter.inc_by(amount);
254-
}
255-
}
256-
257247
pub fn inc_counter_vec_by(int_counter_vec: &Result<IntCounterVec>, name: &[&str], amount: u64) {
258248
if let Some(counter) = get_int_counter(int_counter_vec, name) {
259249
counter.inc_by(amount);

common/system_health/src/lib.rs

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -198,25 +198,6 @@ pub fn observe_system_health_vc(
198198
}
199199
}
200200

201-
/// Observes if NAT traversal is possible.
202-
pub fn observe_nat() -> bool {
203-
let discv5_nat = lighthouse_network::metrics::get_int_gauge(
204-
&lighthouse_network::metrics::NAT_OPEN,
205-
&["discv5"],
206-
)
207-
.map(|g| g.get() == 1)
208-
.unwrap_or_default();
209-
210-
let libp2p_nat = lighthouse_network::metrics::get_int_gauge(
211-
&lighthouse_network::metrics::NAT_OPEN,
212-
&["libp2p"],
213-
)
214-
.map(|g| g.get() == 1)
215-
.unwrap_or_default();
216-
217-
discv5_nat && libp2p_nat
218-
}
219-
220201
/// Observes the Beacon Node system health.
221202
pub fn observe_system_health_bn<TSpec: EthSpec>(
222203
sysinfo: Arc<RwLock<System>>,
@@ -242,7 +223,11 @@ pub fn observe_system_health_bn<TSpec: EthSpec>(
242223
.unwrap_or_else(|| (String::from("None"), 0, 0));
243224

244225
// Determine if the NAT is open or not.
245-
let nat_open = observe_nat();
226+
let nat_open = lighthouse_network::metrics::NAT_OPEN
227+
.as_ref()
228+
.map(|v| v.get())
229+
.unwrap_or(0)
230+
!= 0;
246231

247232
SystemHealthBN {
248233
system_health,

0 commit comments

Comments
 (0)