Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changesets/fix_bryn_up_down_counters.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### Fix OTLP metrics export to prevent UpDown counter drift ([PR #8174](https://github.com/apollographql/router/pull/8174))

Previously, when using OTLP metrics export with delta temporality configured, UpDown counters could exhibit drift issues where the counter values would become inaccurate over time. This happened because UpDown counters were incorrectly exported as deltas instead of cumulative values.

UpDownCounters will now always be exported as aggregate values as per the otel spec.

By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/8174
126 changes: 124 additions & 2 deletions apollo-router/src/plugins/telemetry/otlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,16 @@ pub(crate) struct CustomTemporalitySelector(
);

impl TemporalitySelector for CustomTemporalitySelector {
fn temporality(&self, _kind: InstrumentKind) -> opentelemetry_sdk::metrics::data::Temporality {
self.0
fn temporality(&self, kind: InstrumentKind) -> opentelemetry_sdk::metrics::data::Temporality {
// Up/down counters should always use cumulative temporality to ensure they are sent as aggregates
// rather than deltas, which prevents drift issues.
// See https://github.com/open-telemetry/opentelemetry-specification/blob/a1c13d59bb7d0fb086df2b3e1eaec9df9efef6cc/specification/metrics/sdk_exporters/otlp.md#additional-configuration for mor information
match kind {
InstrumentKind::UpDownCounter | InstrumentKind::ObservableUpDownCounter => {
opentelemetry_sdk::metrics::data::Temporality::Cumulative
}
_ => self.0,
}
}
}

Expand All @@ -314,8 +322,122 @@ impl From<&Temporality> for Box<dyn TemporalitySelector> {

#[cfg(test)]
mod tests {
use opentelemetry_sdk::metrics::data::Temporality as SdkTemporality;

use super::*;

#[test]
fn test_updown_counter_temporality_override() {
// Test that up/down counters always get cumulative temporality regardless of configuration
let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta);
let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative);

// UpDownCounter should always be cumulative
assert_eq!(
delta_selector.temporality(InstrumentKind::UpDownCounter),
SdkTemporality::Cumulative,
"UpDownCounter should always use cumulative temporality even with delta config"
);
assert_eq!(
cumulative_selector.temporality(InstrumentKind::UpDownCounter),
SdkTemporality::Cumulative,
"UpDownCounter should use cumulative temporality with cumulative config"
);

// ObservableUpDownCounter should always be cumulative
assert_eq!(
delta_selector.temporality(InstrumentKind::ObservableUpDownCounter),
SdkTemporality::Cumulative,
"ObservableUpDownCounter should always use cumulative temporality even with delta config"
);
assert_eq!(
cumulative_selector.temporality(InstrumentKind::ObservableUpDownCounter),
SdkTemporality::Cumulative,
"ObservableUpDownCounter should use cumulative temporality with cumulative config"
);
}

#[test]
fn test_counter_temporality_respects_config() {
// Test that regular counters respect the configured temporality
let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta);
let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative);

// Counter should respect configuration
assert_eq!(
delta_selector.temporality(InstrumentKind::Counter),
SdkTemporality::Delta,
"Counter should use delta temporality with delta config"
);
assert_eq!(
cumulative_selector.temporality(InstrumentKind::Counter),
SdkTemporality::Cumulative,
"Counter should use cumulative temporality with cumulative config"
);

// ObservableCounter should respect configuration
assert_eq!(
delta_selector.temporality(InstrumentKind::ObservableCounter),
SdkTemporality::Delta,
"ObservableCounter should use delta temporality with delta config"
);
assert_eq!(
cumulative_selector.temporality(InstrumentKind::ObservableCounter),
SdkTemporality::Cumulative,
"ObservableCounter should use cumulative temporality with cumulative config"
);
}

#[test]
fn test_gauge_temporality_respects_config() {
// Test that gauges respect the configured temporality (gauges are not forced to cumulative)
let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta);
let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative);

// Gauge should respect configuration
assert_eq!(
delta_selector.temporality(InstrumentKind::Gauge),
SdkTemporality::Delta,
"Gauge should use delta temporality with delta config"
);
assert_eq!(
cumulative_selector.temporality(InstrumentKind::Gauge),
SdkTemporality::Cumulative,
"Gauge should use cumulative temporality with cumulative config"
);

// ObservableGauge should respect configuration
assert_eq!(
delta_selector.temporality(InstrumentKind::ObservableGauge),
SdkTemporality::Delta,
"ObservableGauge should use delta temporality with delta config"
);
assert_eq!(
cumulative_selector.temporality(InstrumentKind::ObservableGauge),
SdkTemporality::Cumulative,
"ObservableGauge should use cumulative temporality with cumulative config"
);
}

#[test]
fn test_histogram_temporality_respects_config() {
// Test that histograms respect the configured temporality
let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta);
let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative);

// Histogram should respect configuration
assert_eq!(
delta_selector.temporality(InstrumentKind::Histogram),
SdkTemporality::Delta,
"Histogram should use delta temporality with delta config"
);
assert_eq!(
cumulative_selector.temporality(InstrumentKind::Histogram),
SdkTemporality::Cumulative,
"Histogram should use cumulative temporality with cumulative config"
);
}

#[test]
fn endpoint_grpc_defaulting_no_scheme() {
let url = Url::parse("api.apm.com:433").unwrap();
Expand Down