Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .changesets/feat_zelda_jemalloc_metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
### jemalloc metrics ([PR #7735](https://github.com/apollographql/router/pull/7735))

This PR adds the following new metrics when running the router on Linux with its default `global-allocator` feature:

- [apollo_router_jemalloc_active](https://jemalloc.net/jemalloc.3.html#stats.active): Total number of bytes in active pages allocated by the application.
- [apollo_router_jemalloc_allocated](https://jemalloc.net/jemalloc.3.html#stats.allocated): Total number of bytes allocated by the application.
- [apollo_router_jemalloc_mapped](https://jemalloc.net/jemalloc.3.html#stats.mapped): Total number of bytes in active extents mapped by the allocator.
- [apollo_router_jemalloc_metadata](https://jemalloc.net/jemalloc.3.html#stats.metadata): Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap-sensitive allocator metadata structures and internal allocations.
- [apollo_router_jemalloc_resident](https://jemalloc.net/jemalloc.3.html#stats.resident): Maximum number of bytes in physically resident data pages mapped by the allocator, comprising all pages dedicated to allocator metadata, pages backing active allocations, and unused dirty pages.
- [apollo_router_jemalloc_retained](https://jemalloc.net/jemalloc.3.html#stats.retained): Total number of bytes in virtual memory mappings that were retained rather than being returned to the operating system via e.g. `munmap(2)` or similar.

By [@Velfi](https://github.com/Velfi) in https://github.com/apollographql/router/pull/7735
14 changes: 13 additions & 1 deletion Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ dependencies = [
"tempfile",
"test-log",
"thiserror 1.0.63",
"tikv-jemalloc-ctl",
"tikv-jemallocator",
"time",
"tokio",
Expand Down Expand Up @@ -3350,7 +3351,7 @@ dependencies = [
"itoa",
"pin-project-lite",
"smallvec",
"socket2 0.4.10",
"socket2 0.5.7",
"tokio",
"tower-service",
"tracing",
Expand Down Expand Up @@ -6603,6 +6604,17 @@ dependencies = [
"tower 0.4.13",
]

[[package]]
name = "tikv-jemalloc-ctl"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b"
dependencies = [
"libc",
"paste",
"tikv-jemalloc-sys",
]

[[package]]
name = "tikv-jemalloc-sys"
version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
Expand Down
5 changes: 3 additions & 2 deletions apollo-router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ default = ["global-allocator"]
# [dependencies]
# apollo-router = {version = "1.20", default-features = false}
# ```
global-allocator = []
global-allocator = ["dep:tikv-jemallocator", "tikv-jemalloc-ctl/stats"]

# if you are doing heap profiling
dhat-heap = ["dhat"]
Expand Down Expand Up @@ -277,7 +277,8 @@ hyperlocal = { version = "0.8.0", default-features = false, features = [
] }

[target.'cfg(target_os = "linux")'.dependencies]
tikv-jemallocator = "0.6.0"
tikv-jemallocator = { version = "0.6.0", optional = true }
tikv-jemalloc-ctl = { version = "0.6.0", features = ["stats"], optional = true }

[dev-dependencies]
axum = { version = "0.6.20", features = [
Expand Down
39 changes: 37 additions & 2 deletions apollo-router/src/axum_factory/axum_http_server_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,27 @@ fn session_count_instrument() -> ObservableGauge<u64> {
.init()
}

#[cfg(all(
feature = "global-allocator",
not(feature = "dhat-heap"),
target_os = "linux"
))]
fn jemalloc_metrics_instruments() -> (tokio::task::JoinHandle<()>, Vec<ObservableGauge<u64>>) {
use crate::axum_factory::metrics::jemalloc;

(
jemalloc::start_epoch_advance_loop(),
vec![
jemalloc::create_active_gauge(),
jemalloc::create_allocated_gauge(),
jemalloc::create_metadata_gauge(),
jemalloc::create_mapped_gauge(),
jemalloc::create_resident_gauge(),
jemalloc::create_retained_gauge(),
],
)
}

struct ActiveSessionCountGuard;

impl ActiveSessionCountGuard {
Expand Down Expand Up @@ -644,9 +665,23 @@ where

// Tie the lifetime of the session count instrument to the lifetime of the router
// by moving it into a no-op layer.
let instrument = session_count_instrument();
let session_count_instrument = session_count_instrument();
#[cfg(all(
feature = "global-allocator",
not(feature = "dhat-heap"),
target_os = "linux"
))]
let (_epoch_advance_loop, jemalloc_instrument) = jemalloc_metrics_instruments();
// Tie the lifetime of the various instruments to the lifetime of the router
// by referencing them in a no-op layer.
router = router.layer(layer_fn(move |service| {
let _ = &instrument;
let _session_count_instrument = &session_count_instrument;
#[cfg(all(
feature = "global-allocator",
not(feature = "dhat-heap"),
target_os = "linux"
))]
let _jemalloc_instrument = &jemalloc_instrument;
service
}));

Expand Down
65 changes: 65 additions & 0 deletions apollo-router/src/axum_factory/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#[cfg(all(
feature = "global-allocator",
not(feature = "dhat-heap"),
target_os = "linux"
))]
pub(crate) mod jemalloc {
use std::time::Duration;

use opentelemetry::metrics::MeterProvider;
use opentelemetry::metrics::ObservableGauge;

use crate::metrics::meter_provider;

pub(crate) fn start_epoch_advance_loop() -> tokio::task::JoinHandle<()> {
tokio::spawn(async move {
loop {
if let Err(e) = tikv_jemalloc_ctl::epoch::advance() {
tracing::warn!("Failed to advance jemalloc epoch: {}", e);
}
tokio::time::sleep(Duration::from_millis(500)).await;
}
})
}

macro_rules! create_jemalloc_gauge {
($name:ident, $description:expr) => {
meter_provider()
.meter("apollo/router")
.u64_observable_gauge(concat!("apollo.router.jemalloc.", stringify!($name)))
.with_description($description)
.with_callback(|gauge| {
if let Ok(value) = tikv_jemalloc_ctl::stats::$name::read() {
gauge.observe(value as u64, &[]);
} else {
tracing::warn!("Failed to read jemalloc {} stats", stringify!($name));
}
})
.init()
};
}

pub(crate) fn create_active_gauge() -> ObservableGauge<u64> {
create_jemalloc_gauge!(active, "Total active bytes in jemalloc")
}

pub(crate) fn create_allocated_gauge() -> ObservableGauge<u64> {
create_jemalloc_gauge!(allocated, "Total bytes allocated by jemalloc")
}

pub(crate) fn create_metadata_gauge() -> ObservableGauge<u64> {
create_jemalloc_gauge!(metadata, "Total metadata bytes in jemalloc")
}

pub(crate) fn create_mapped_gauge() -> ObservableGauge<u64> {
create_jemalloc_gauge!(mapped, "Total mapped bytes in jemalloc")
}

pub(crate) fn create_resident_gauge() -> ObservableGauge<u64> {
create_jemalloc_gauge!(resident, "Total resident bytes in jemalloc")
}

pub(crate) fn create_retained_gauge() -> ObservableGauge<u64> {
create_jemalloc_gauge!(retained, "Total retained bytes in jemalloc")
}
}
1 change: 1 addition & 0 deletions apollo-router/src/axum_factory/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod axum_http_server_factory;
pub(crate) mod compression;
pub(crate) mod connection_handle;
mod listeners;
pub(crate) mod metrics;
#[cfg(test)]
pub(crate) mod tests;
pub(crate) mod utils;
Expand Down
37 changes: 37 additions & 0 deletions apollo-router/tests/integration/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#[cfg(all(
feature = "global-allocator",
not(feature = "dhat-heap"),
target_os = "linux"
))]
#[tokio::test(flavor = "multi_thread")]
async fn test_jemalloc_metrics_are_emitted() {
use super::common::IntegrationTest;

let mut router = IntegrationTest::builder()
.config(include_str!("fixtures/prometheus.router.yaml"))
.build()
.await;

router.start().await;
router.assert_started().await;
router.execute_default_query().await;

router
.assert_metrics_contains(r#"apollo_router_jemalloc_active"#, None)
.await;
router
.assert_metrics_contains(r#"apollo_router_jemalloc_allocated"#, None)
.await;
router
.assert_metrics_contains(r#"apollo_router_jemalloc_mapped"#, None)
.await;
router
.assert_metrics_contains(r#"apollo_router_jemalloc_metadata"#, None)
.await;
router
.assert_metrics_contains(r#"apollo_router_jemalloc_resident"#, None)
.await;
router
.assert_metrics_contains(r#"apollo_router_jemalloc_retained"#, None)
.await;
}
1 change: 1 addition & 0 deletions apollo-router/tests/integration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod docs;
mod file_upload;
mod introspection;
mod lifecycle;
mod metrics;
mod operation_limits;
mod operation_name;
mod query_planner;
Expand Down