diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index e629da5ba6..d3f5815051 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -34,6 +34,13 @@ in a previous payload. By [@Geal](https://github.com/geal) in https://github.com/apollographql/router/pull/2184 +## 🐛 Fixes + +### wait for opentelemetry tracer provider to shutdown ([PR #2191](https://github.com/apollographql/router/pull/2191)) + +When we drop Telemetry we spawn a thread to perform the global opentelemetry trace provider shutdown. The documentation of this function indicates that "This will invoke the shutdown method on all span processors. span processors should export remaining spans before return". We should give that process some time to complete (5 seconds currently) before returning from the `drop`. This will provide more opportunity for spans to be exported. + +By [@garypen](https://github.com/garypen) in https://github.com/apollographql/router/pull/2191 ## 🛠 Maintenance diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 070b775515..040728e435 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -6,6 +6,7 @@ use std::error::Error as Errors; use std::fmt; use std::sync::atomic::AtomicU8; use std::sync::atomic::Ordering; +use std::sync::mpsc; use std::sync::Arc; use std::time::Duration; use std::time::Instant; @@ -177,14 +178,34 @@ fn setup_metrics_exporter( Ok(builder) } +fn run_with_timeout(f: F, timeout: Duration) -> Result +where + F: FnOnce() -> T + Send + 'static, + T: Send + 'static, +{ + let (tx, rx) = mpsc::channel(); + std::thread::spawn(move || tx.send(f())); + + rx.recv_timeout(timeout) +} + +const TRACER_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5); + impl Drop for Telemetry { fn drop(&mut self) { ::tracing::debug!("dropping telemetry..."); let count = TELEMETRY_REFCOUNT.fetch_sub(1, Ordering::Relaxed); if count < 2 { - std::thread::spawn(|| { - opentelemetry::global::shutdown_tracer_provider(); - }); + // We don't want telemetry to drop until the shutdown completes, + // but we also don't want to wait forever. Let's allow 5 seconds + // for now. + // We log errors as warnings + if let Err(e) = run_with_timeout( + opentelemetry::global::shutdown_tracer_provider, + TRACER_SHUTDOWN_TIMEOUT, + ) { + ::tracing::warn!("tracer shutdown failed: {:?}", e); + } } } }