From abc09c557e832770fa77dd141e53d0c3cccba92a Mon Sep 17 00:00:00 2001 From: Steven Jin Xuan Date: Wed, 3 Sep 2025 11:27:03 -0400 Subject: [PATCH 1/4] Add open file metrics --- src/app.rs | 6 +++ src/metrics.rs | 1 + src/metrics/process.rs | 88 ++++++++++++++++++++++++++++++++++++++++++ tests/direct.rs | 72 +++++++++++++++++++++++----------- 4 files changed, 145 insertions(+), 22 deletions(-) create mode 100644 src/metrics/process.rs diff --git a/src/app.rs b/src/app.rs index 482cedd03c..01e214de21 100644 --- a/src/app.rs +++ b/src/app.rs @@ -77,6 +77,7 @@ pub async fn build_with_cert( // Register metrics. let mut registry = Registry::default(); + register_process_metrics(&mut registry); let istio_registry = metrics::sub_registry(&mut registry); let _ = metrics::meta::Metrics::new(istio_registry); let xds_metrics = xds::Metrics::new(istio_registry); @@ -250,6 +251,11 @@ pub async fn build_with_cert( }) } +fn register_process_metrics(registry: &mut Registry) { + #[cfg(unix)] + registry.register_collector(Box::new(metrics::process::ProcessMetrics::new())); +} + struct DataPlaneTask { block_shutdown: bool, fut: Pin> + Send + Sync + 'static>>, diff --git a/src/metrics.rs b/src/metrics.rs index d528ccd31f..96d76b8bd4 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -25,6 +25,7 @@ use crate::identity::Identity; pub mod meta; pub mod server; +pub mod process; use crate::strng::{RichStrng, Strng}; pub use server::*; diff --git a/src/metrics/process.rs b/src/metrics/process.rs new file mode 100644 index 0000000000..305dbdde9a --- /dev/null +++ b/src/metrics/process.rs @@ -0,0 +1,88 @@ +// Copyright Istio Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use nix::sys::resource::{Resource, getrlimit}; +use prometheus_client::collector::Collector; +use prometheus_client::encoding::{DescriptorEncoder, EncodeMetric}; +use prometheus_client::metrics; +use tracing::error; + +// Track open fds +#[derive(Debug)] +pub struct ProcessMetrics {} +const FD_PATH: &str = "/proc/self/fd"; + +impl ProcessMetrics { + pub fn new() -> Self { + Self {} + } + + fn encode_open_fds(&self, encoder: &mut DescriptorEncoder) -> Result<(), std::fmt::Error> { + // Count open fds by listing /proc/self/fd + let open_fds = match std::fs::read_dir(FD_PATH) { + Ok(entries) => entries.count() as u64, + Err(e) => { + error!("Failed to read {}: {}", FD_PATH, e); + 0 + } + }; + let gauge = metrics::gauge::ConstGauge::new(open_fds); + let metric_encoder = encoder.encode_descriptor( + "process_open_fds", + "Number of open file descriptors", + None, + gauge.metric_type(), + )?; + gauge.encode(metric_encoder)?; + Ok(()) + } + + fn encode_max_fds(&self, encoder: &mut DescriptorEncoder) -> Result<(), std::fmt::Error> { + let fds = match getrlimit(Resource::RLIMIT_NOFILE) { + Ok((soft_limit, _)) => soft_limit, + Err(e) => { + error!("Failed to get rlimit: {}", e); + return Ok(()); + } + }; + let gauge = metrics::gauge::ConstGauge::new(fds); + let metric_encoder = encoder.encode_descriptor( + "process_max_fds", + "Maximum number of file descriptors", + None, + gauge.metric_type(), + )?; + gauge.encode(metric_encoder)?; + Ok(()) + } +} + +impl Collector for ProcessMetrics { + fn encode(&self, mut encoder: DescriptorEncoder) -> Result<(), std::fmt::Error> { + match self.encode_open_fds(&mut encoder) { + Ok(_) => {} + Err(e) => { + error!("Failed to encode open fds: {}", e); + return Ok(()); + } + } + match self.encode_max_fds(&mut encoder) { + Ok(_) => {} + Err(e) => { + error!("Failed to encode max fds: {}", e); + } + } + Ok(()) + } +} diff --git a/tests/direct.rs b/tests/direct.rs index 4e9874ddeb..e1b7af3fdc 100644 --- a/tests/direct.rs +++ b/tests/direct.rs @@ -198,8 +198,36 @@ async fn test_quit_lifecycle() { .expect("app exits without error"); } +fn process_metrics_assertions(metrics: &ParsedMetrics) { + for metric in ["process_open_fds", "process_max_fds"] { + let metric = &(metric); + let m = metrics.query(metric, &Default::default()); + assert!(m.is_some(), "expected metric {metric}"); + assert!( + m.to_owned().unwrap().len() == 1, + "expected metric {metric} to have len(1)" + ); + let value = m.unwrap()[0].value.clone(); + match value { + prometheus_parse::Value::Gauge(v) => { + assert!( + v > 0.0, + "expected metric {metric} to be positive, was {value:?}", + ); + } + _ => { + panic!("unexpected metric type"); + } + } + } +} + +fn base_metrics_assertions(metrics: ParsedMetrics) { + process_metrics_assertions(&metrics); +} + async fn run_request_test(target: &str, node: &str) { - run_requests_test(target, node, 1, None, false).await + run_requests_test(target, node, 1, Some(base_metrics_assertions), false).await } async fn run_requests_test( @@ -264,27 +292,25 @@ async fn test_vip_request() { } fn on_demand_dns_assertions(metrics: ParsedMetrics) { - { - let metric = &("istio_on_demand_dns_total"); - let m = metrics.query(metric, &Default::default()); - assert!(m.is_some(), "expected metric {metric}"); - // expecting one cache hit and one cache miss - assert!( - m.to_owned().unwrap().len() == 1, - "expected metric {metric} to have len(1)" - ); - let value = m.unwrap()[0].value.clone(); - let expected = match *metric { - "istio_on_demand_dns_total" => prometheus_parse::Value::Untyped(2.0), - &_ => { - panic!("dev error; unexpected metric"); - } - }; - assert!( - value == expected, - "expected metric {metric} to be 1, was {value:?}", - ); - } + let metric = &("istio_on_demand_dns_total"); + let m = metrics.query(metric, &Default::default()); + assert!(m.is_some(), "expected metric {metric}"); + // expecting one cache hit and one cache miss + assert!( + m.to_owned().unwrap().len() == 1, + "expected metric {metric} to have len(1)" + ); + let value = m.unwrap()[0].value.clone(); + let expected = match *metric { + "istio_on_demand_dns_total" => prometheus_parse::Value::Untyped(2.0), + &_ => { + panic!("dev error; unexpected metric"); + } + }; + assert!( + value == expected, + "expected metric {metric} to be 1, was {value:?}", + ); } #[tokio::test] @@ -356,6 +382,8 @@ async fn test_stats_exist() { "istio_tcp_connections_closed", "istio_tcp_received_bytes", "istio_tcp_sent_bytes", + "process_max_fds", + "process_open_fds", ]); { for (name, doc) in metric_info { From 9e7bd20f16ce7bab33096a4acf2271d5071dae20 Mon Sep 17 00:00:00 2001 From: Steven Jin Xuan Date: Wed, 3 Sep 2025 11:36:35 -0400 Subject: [PATCH 2/4] Lint --- src/metrics/process.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/metrics/process.rs b/src/metrics/process.rs index 305dbdde9a..4c3e0bff23 100644 --- a/src/metrics/process.rs +++ b/src/metrics/process.rs @@ -68,6 +68,12 @@ impl ProcessMetrics { } } +impl Default for ProcessMetrics { + fn default() -> Self { + Self::new() + } +} + impl Collector for ProcessMetrics { fn encode(&self, mut encoder: DescriptorEncoder) -> Result<(), std::fmt::Error> { match self.encode_open_fds(&mut encoder) { From 482b2ecbdde77746731a6e6a2f56e739b3cf6f1d Mon Sep 17 00:00:00 2001 From: Steven Jin Xuan Date: Wed, 3 Sep 2025 13:40:24 -0400 Subject: [PATCH 3/4] use /dev/fd and subtract one from fd count --- src/metrics/process.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/metrics/process.rs b/src/metrics/process.rs index 4c3e0bff23..0e7c857484 100644 --- a/src/metrics/process.rs +++ b/src/metrics/process.rs @@ -21,7 +21,7 @@ use tracing::error; // Track open fds #[derive(Debug)] pub struct ProcessMetrics {} -const FD_PATH: &str = "/proc/self/fd"; +const FD_PATH: &str = "/dev/fd"; impl ProcessMetrics { pub fn new() -> Self { @@ -37,7 +37,8 @@ impl ProcessMetrics { 0 } }; - let gauge = metrics::gauge::ConstGauge::new(open_fds); + // exclude the fd used to read the directory + let gauge = metrics::gauge::ConstGauge::new(open_fds - 1); let metric_encoder = encoder.encode_descriptor( "process_open_fds", "Number of open file descriptors", From 4d0548f9bc62d4f5dbca9cc92c2fac4f716a9376 Mon Sep 17 00:00:00 2001 From: Steven Jin Xuan Date: Wed, 3 Sep 2025 16:51:59 -0400 Subject: [PATCH 4/4] import ordering --- src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metrics.rs b/src/metrics.rs index 96d76b8bd4..f6d63c6e1f 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -24,8 +24,8 @@ use tracing_core::field::Value; use crate::identity::Identity; pub mod meta; -pub mod server; pub mod process; +pub mod server; use crate::strng::{RichStrng, Strng}; pub use server::*;