Skip to content

Commit

Permalink
feat(rt): expose tokio runtime metrics
Browse files Browse the repository at this point in the history
Tokio has an unstable feature supporting runtime metrics.

This allows service operators to export metrics to systems like
Prometheus, so that they can observe how their workloads are performing
on the tokio runtime. This exposes information like the number of worker
threads, queue depth, the number of tasks polled, and so on.

`linkerd2-proxy` should expose these metrics.

This uses the `kubert-prometheus-tokio` crate to register a `Runtime`
metrics worker, and spawn a task to probe these metrics at a fixed,
regular interval.

see: <https://github.com/olix0r/kubert/tree/main/kubert-prometheus-tokio>

if the `tokio_unstable` feature is not enabled, this will emit a debug
event and do nothing.
  • Loading branch information
cratelyn committed Sep 5, 2024
1 parent 6802c00 commit 662c146
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 3 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2275,6 +2275,7 @@ name = "linkerd2-proxy"
version = "0.1.0"
dependencies = [
"futures",
"kubert-prometheus-tokio",
"linkerd-app",
"linkerd-meshtls",
"linkerd-metrics",
Expand Down
3 changes: 2 additions & 1 deletion linkerd2-proxy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ pprof = ["linkerd-app/pprof"]

[dependencies]
futures = { version = "0.3", default-features = false }
num_cpus = { version = "1", optional = true }
kubert-prometheus-tokio = { version = "0.1", features = ["rt"] }
linkerd-app = { path = "../linkerd/app" }
linkerd-metrics = { path = "../linkerd/metrics" }
# We don't actually use code from this crate in `main`; it's here only so we can
# control its feature flags.
linkerd-meshtls = { path = "../linkerd/meshtls" }
linkerd-signal = { path = "../linkerd/signal" }
num_cpus = { version = "1", optional = true }
tokio = { version = "1", features = ["rt", "time", "net"] }
tracing = "0.1"

Expand Down
10 changes: 8 additions & 2 deletions linkerd2-proxy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ fn main() {
vendor = BUILD_INFO.vendor,
);

let metrics = linkerd_metrics::prom::Registry::default();
let mut metrics = linkerd_metrics::prom::Registry::default();

// Load configuration from the environment without binding ports.
let config = match Config::try_from_env() {
Expand All @@ -57,7 +57,13 @@ fn main() {
// Builds a runtime with the appropriate number of cores:
// `LINKERD2_PROXY_CORES` env or the number of available CPUs (as provided
// by cgroups, when possible).
rt::build().block_on(async move {
let runtime = rt::build();

// Spawn a task to run in the background, exporting runtime metrics at a regular interval.
rt::spawn_metrics_exporter_onto(&mut metrics, runtime.handle().to_owned());

// Start the runtime, providing its entrypoint.
runtime.block_on(async move {
let (shutdown_tx, mut shutdown_rx) = mpsc::unbounded_channel();
let shutdown_grace_period = config.shutdown_grace_period;

Expand Down
36 changes: 36 additions & 0 deletions linkerd2-proxy/src/rt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,39 @@ pub(crate) fn build() -> Runtime {
.build()
.expect("failed to build basic runtime!")
}

// Spawns a task to scrape metrics for the given runtime at a regular interval.
///
/// Note that this module requires unstable tokio functionality that must be
/// enabled via the `tokio_unstable` feature. When it is not enabled, no metrics
/// will be registered.
///
/// `RUSTFLAGS="--cfg tokio_unstable"` must be set at build-time to use this feature.
pub fn spawn_metrics_exporter_onto(
registry: &mut linkerd_metrics::prom::Registry,
runtime: tokio::runtime::Handle,
) {
#[cfg(tokio_unstable)]
{
use {std::time::Duration, tracing::Instrument};

/// The fixed interval at which tokio runtime metrics are updated.
//
// TODO(kate): perhaps this could be configurable eventually. for now, it's hard-coded.
const INTERVAL: Duration = Duration::from_secs(1);

let mut interval = tokio::time::interval(INTERVAL);

let registry = registry.sub_registry_with_prefix("tokio_rt");
let metrics = kubert_prometheus_tokio::Runtime::register(registry, runtime.clone());

runtime.spawn(
async move { metrics.updated(&mut interval).await }
.instrument(tracing::info_span!("kubert-prom-tokio-rt")),
);
}
#[cfg(not(tokio_unstable))]
{
tracing::debug!("Tokio runtime metrics cannot be monitored without the tokio_unstable cfg");
}
}

0 comments on commit 662c146

Please sign in to comment.