From 5a9aeeea5e7ab28b2966ab75a9004378c4496727 Mon Sep 17 00:00:00 2001 From: katelyn martin Date: Thu, 5 Sep 2024 00:00:00 +0000 Subject: [PATCH] feat(rt): Expose tokio runtime metrics Tokio has an unstable feature supporting runtime metrics. This allows service operators to export metrics to systems like Prometheus, so that they can observe how their workloads are performing on the tokio runtime. This exposes information like the number of worker threads, queue depth, the number of tasks polled, and so on. `linkerd2-proxy` should expose these metrics. This uses the `kubert-prometheus-tokio` crate to register a `Runtime` metrics worker, and spawn a task to probe these metrics at a fixed, regular interval. see: if the `tokio_unstable` feature is not enabled, this will emit a debug event and do nothing. Signed-off-by: katelyn martin --- Cargo.lock | 37 +++++++++++++++++++++++++++++++++++++ linkerd2-proxy/Cargo.toml | 3 ++- linkerd2-proxy/src/main.rs | 10 ++++++++-- linkerd2-proxy/src/rt.rs | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5918f2a557..c640ad40a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -653,6 +653,17 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -674,6 +685,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -1020,6 +1032,18 @@ dependencies = [ "libc", ] +[[package]] +name = "kubert-prometheus-tokio" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a101fa3df488c89130664aaf4652986da49e204fb1725d089122f75b22ff6cbb" +dependencies = [ + "prometheus-client", + "tokio", + "tokio-metrics", + "tracing", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -2296,6 +2320,7 @@ version = "0.1.0" dependencies = [ "futures", "jemallocator", + "kubert-prometheus-tokio", "linkerd-app", "linkerd-meshtls", "linkerd-metrics", @@ -3378,6 +3403,18 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-metrics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eace09241d62c98b7eeb1107d4c5c64ca3bd7da92e8c218c153ab3a78f9be112" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", + "tokio-stream", +] + [[package]] name = "tokio-rustls" version = "0.24.1" diff --git a/linkerd2-proxy/Cargo.toml b/linkerd2-proxy/Cargo.toml index 91f87c9b81..4276e62aba 100644 --- a/linkerd2-proxy/Cargo.toml +++ b/linkerd2-proxy/Cargo.toml @@ -18,13 +18,14 @@ pprof = ["linkerd-app/pprof"] [dependencies] futures = { version = "0.3", default-features = false } -num_cpus = { version = "1", optional = true } +kubert-prometheus-tokio = { version = "0.1", features = ["rt"] } linkerd-app = { path = "../linkerd/app" } linkerd-metrics = { path = "../linkerd/metrics" } # We don't actually use code from this crate in `main`; it's here only so we can # control its feature flags. linkerd-meshtls = { path = "../linkerd/meshtls" } linkerd-signal = { path = "../linkerd/signal" } +num_cpus = { version = "1", optional = true } tokio = { version = "1", features = ["rt", "time", "net"] } tracing = "0.1" diff --git a/linkerd2-proxy/src/main.rs b/linkerd2-proxy/src/main.rs index b3a6370336..9d7bd68c4f 100644 --- a/linkerd2-proxy/src/main.rs +++ b/linkerd2-proxy/src/main.rs @@ -42,7 +42,7 @@ fn main() { vendor = BUILD_INFO.vendor, ); - let metrics = linkerd_metrics::prom::Registry::default(); + let mut metrics = linkerd_metrics::prom::Registry::default(); // Load configuration from the environment without binding ports. let config = match Config::try_from_env() { @@ -56,7 +56,13 @@ fn main() { // Builds a runtime with the appropriate number of cores: // `LINKERD2_PROXY_CORES` env or the number of available CPUs (as provided // by cgroups, when possible). - rt::build().block_on(async move { + let runtime = rt::build(); + + // Spawn a task to run in the background, exporting runtime metrics at a regular interval. + rt::spawn_metrics_exporter_onto(&mut metrics, runtime.handle().to_owned()); + + // Start the runtime, providing its entrypoint. + runtime.block_on(async move { let (shutdown_tx, mut shutdown_rx) = mpsc::unbounded_channel(); let shutdown_grace_period = config.shutdown_grace_period; diff --git a/linkerd2-proxy/src/rt.rs b/linkerd2-proxy/src/rt.rs index b88eb17cce..24caab53bb 100644 --- a/linkerd2-proxy/src/rt.rs +++ b/linkerd2-proxy/src/rt.rs @@ -64,3 +64,39 @@ pub(crate) fn build() -> Runtime { .build() .expect("failed to build basic runtime!") } + +// Spawns a task to scrape metrics for the given runtime at a regular interval. +/// +/// Note that this module requires unstable tokio functionality that must be +/// enabled via the `tokio_unstable` feature. When it is not enabled, no metrics +/// will be registered. +/// +/// `RUSTFLAGS="--cfg tokio_unstable"` must be set at build-time to use this feature. +pub fn spawn_metrics_exporter_onto( + registry: &mut linkerd_metrics::prom::Registry, + runtime: tokio::runtime::Handle, +) { + #[cfg(tokio_unstable)] + { + use {std::time::Duration, tracing::Instrument}; + + /// The fixed interval at which tokio runtime metrics are updated. + // + // TODO(kate): perhaps this could be configurable eventually. for now, it's hard-coded. + const INTERVAL: Duration = Duration::from_secs(1); + + let mut interval = tokio::time::interval(INTERVAL); + + let registry = registry.sub_registry_with_prefix("tokio_rt"); + let metrics = kubert_prometheus_tokio::Runtime::register(registry, runtime.clone()); + + runtime.spawn( + async move { metrics.updated(&mut interval).await } + .instrument(tracing::info_span!("kubert-prom-tokio-rt")), + ); + } + #[cfg(not(tokio_unstable))] + { + tracing::debug!("Tokio runtime metrics cannot be monitored without the tokio_unstable cfg"); + } +}