Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changesets/fix_bryn_datadog_exporter_span_kind.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### Datadog `span.kind` now populated ([PR #5609](https://github.com/apollographql/router/pull/5609))

Datadog traces use `span.kind` to differentiate between different types of spans.
This change ensures that the `span.kind` is correctly populated using the Open Telemetry span kind which has a 1-2-1 mapping to those set out in [dd-trace](https://github.com/DataDog/dd-trace-go/blob/main/ddtrace/ext/span_kind.go).

By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/5609
32 changes: 32 additions & 0 deletions .changesets/fix_bryn_datadog_exporter_span_metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
### Datadog span metrics are now supported ([PR #5609](https://github.com/apollographql/router/pull/5609))

When using the APM view in Datadog, span metrics will be displayed for any span that was a top level span or has the `_dd.measured` flag set.

Apollo Router now sets the `_dd.measured` flag by default for the following spans:

* `request`
* `router`
* `supergraph`
* `subgraph`
* `subgraph_request`
* `http_request`
* `query_planning`
* `execution`
* `query_parsing`

You can override this behaviour to enable or disable span metrics for any span by setting the `span_metrics` configuration in the Datadog exporter configuration.

```yaml
telemetry:
exporters:
tracing:
datadog:
enabled: true
span_metrics:
# Disable span metrics for supergraph
supergraph: false
# Enable span metrics for my_custom_span
my_custom_span: true
```

By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/5609
4 changes: 4 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ name = "apollo-router"
version = "1.51.0"
dependencies = [
"access-json",
"ahash",
"anyhow",
"apollo-compiler",
"apollo-federation",
Expand Down Expand Up @@ -526,6 +527,7 @@ dependencies = [
"indexmap 2.2.6",
"insta",
"itertools 0.12.1",
"itoa",
"jsonpath-rust",
"jsonpath_lib",
"jsonschema",
Expand Down Expand Up @@ -572,12 +574,14 @@ dependencies = [
"regex",
"reqwest",
"rhai",
"rmp",
"router-bridge",
"rstack",
"rust-embed",
"rustls",
"rustls-native-certs",
"rustls-pemfile",
"ryu",
"schemars",
"semver 1.0.23",
"serde",
Expand Down
10 changes: 9 additions & 1 deletion apollo-router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,11 @@ opentelemetry_sdk = { version = "0.20.0", default-features = false, features = [
] }
opentelemetry_api = "0.20.0"
opentelemetry-aws = "0.8.0"
opentelemetry-datadog = { version = "0.8.0", features = ["reqwest-client"] }
# START TEMP DATADOG Temporarily remove until we upgrade otel to the latest version
# This means including the rmp library
# opentelemetry-datadog = { version = "0.8.0", features = ["reqwest-client"] }
rmp = "0.8"
# END TEMP DATADOG
opentelemetry-http = "0.9.0"
opentelemetry-jaeger = { version = "0.19.0", features = [
"collector_client",
Expand Down Expand Up @@ -268,6 +272,9 @@ time = { version = "0.3.36", features = ["serde"] }
similar = { version = "2.5.0", features = ["inline"] }
console = "0.15.8"
bytesize = { version = "1.3.0", features = ["serde"] }
ahash = "0.8.11"
itoa = "1.0.9"
ryu = "1.0.15"

[target.'cfg(macos)'.dependencies]
uname = "0.1.1"
Expand Down Expand Up @@ -305,6 +312,7 @@ opentelemetry-proto = { version = "0.5.0", features = [
"gen-tonic-messages",
"with-serde",
] }
opentelemetry-datadog = { version = "0.8.0", features = ["reqwest-client"] }
p256 = "0.13.2"
rand_core = "0.6.4"
reqwest = { version = "0.11.27", default-features = false, features = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1468,6 +1468,24 @@ expression: "&schema"
"default": {},
"description": "Custom mapping to be used as the resource field in spans, defaults to: router -> http.route supergraph -> graphql.operation.name query_planning -> graphql.operation.name subgraph -> subgraph.name subgraph_request -> subgraph.name http_request -> http.route",
"type": "object"
},
"span_metrics": {
"additionalProperties": {
"type": "boolean"
},
"default": {
"execution": true,
"http_request": true,
"parse_query": true,
"query_planning": true,
"request": true,
"router": true,
"subgraph": true,
"subgraph_request": true,
"supergraph": true
},
"description": "Which spans will be eligible for span stats to be collected for viewing in the APM view. Defaults to true for `request`, `router`, `query_parsing`, `supergraph`, `execution`, `query_planning`, `subgraph`, `subgraph_request` and `http_request`.",
"type": "object"
}
},
"required": [
Expand Down
4 changes: 3 additions & 1 deletion apollo-router/src/plugins/telemetry/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ pub(crate) const REQUEST_SPAN_NAME: &str = "request";
pub(crate) const QUERY_PLANNING_SPAN_NAME: &str = "query_planning";
pub(crate) const HTTP_REQUEST_SPAN_NAME: &str = "http_request";
pub(crate) const SUBGRAPH_REQUEST_SPAN_NAME: &str = "subgraph_request";
pub(crate) const QUERY_PARSING_SPAN_NAME: &str = "parse_query";

pub(crate) const BUILT_IN_SPAN_NAMES: [&str; 8] = [
pub(crate) const BUILT_IN_SPAN_NAMES: [&str; 9] = [
REQUEST_SPAN_NAME,
ROUTER_SPAN_NAME,
SUPERGRAPH_SPAN_NAME,
Expand All @@ -28,4 +29,5 @@ pub(crate) const BUILT_IN_SPAN_NAMES: [&str; 8] = [
HTTP_REQUEST_SPAN_NAME,
QUERY_PLANNING_SPAN_NAME,
EXECUTION_SPAN_NAME,
QUERY_PARSING_SPAN_NAME,
];
2 changes: 1 addition & 1 deletion apollo-router/src/plugins/telemetry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,7 @@ impl Telemetry {
propagators.push(Box::<opentelemetry_zipkin::Propagator>::default());
}
if propagation.datadog || tracing.datadog.enabled() {
propagators.push(Box::<opentelemetry_datadog::DatadogPropagator>::default());
propagators.push(Box::<tracing::datadog_exporter::DatadogPropagator>::default());
}
if propagation.aws_xray {
propagators.push(Box::<opentelemetry_aws::XrayPropagator>::default());
Expand Down
117 changes: 111 additions & 6 deletions apollo-router/src/plugins/telemetry/tracing/datadog.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
//! Configuration for datadog tracing.

use std::collections::HashMap;
use std::fmt::Debug;
use std::fmt::Formatter;

use ahash::HashMap;
use ahash::HashMapExt;
use futures::future::BoxFuture;
use http::Uri;
use opentelemetry::sdk;
use opentelemetry::sdk::trace::BatchSpanProcessor;
use opentelemetry::sdk::trace::Builder;
use opentelemetry::Value;
use opentelemetry_api::trace::SpanContext;
use opentelemetry_api::trace::SpanKind;
use opentelemetry_api::Key;
use opentelemetry_api::KeyValue;
use opentelemetry_sdk::export::trace::ExportResult;
use opentelemetry_sdk::export::trace::SpanData;
use opentelemetry_sdk::export::trace::SpanExporter;
use opentelemetry_semantic_conventions::resource::SERVICE_NAME;
use opentelemetry_semantic_conventions::resource::SERVICE_VERSION;
use schemars::JsonSchema;
Expand All @@ -27,6 +37,9 @@ use crate::plugins::telemetry::consts::SUBGRAPH_REQUEST_SPAN_NAME;
use crate::plugins::telemetry::consts::SUBGRAPH_SPAN_NAME;
use crate::plugins::telemetry::consts::SUPERGRAPH_SPAN_NAME;
use crate::plugins::telemetry::endpoint::UriEndpoint;
use crate::plugins::telemetry::tracing::datadog_exporter;
use crate::plugins::telemetry::tracing::datadog_exporter::propagator::TRACE_STATE_MEASURE;
use crate::plugins::telemetry::tracing::datadog_exporter::propagator::TRACE_STATE_TRUE_VALUE;
use crate::plugins::telemetry::tracing::BatchProcessorConfig;
use crate::plugins::telemetry::tracing::SpanProcessorExt;
use crate::plugins::telemetry::tracing::TracingConfigurator;
Expand Down Expand Up @@ -79,6 +92,19 @@ pub(crate) struct Config {
/// http_request -> http.route
#[serde(default)]
resource_mapping: HashMap<String, String>,

/// Which spans will be eligible for span stats to be collected for viewing in the APM view.
/// Defaults to true for `request`, `router`, `query_parsing`, `supergraph`, `execution`, `query_planning`, `subgraph`, `subgraph_request` and `http_request`.
#[serde(default = "default_span_metrics")]
span_metrics: HashMap<String, bool>,
}

fn default_span_metrics() -> HashMap<String, bool> {
let mut map = HashMap::with_capacity(BUILT_IN_SPAN_NAMES.len());
for name in BUILT_IN_SPAN_NAMES {
map.insert(name.to_string(), true);
}
map
}

fn default_true() -> bool {
Expand Down Expand Up @@ -111,7 +137,7 @@ impl TracingConfigurator for Config {

let fixed_span_names = self.fixed_span_names;

let exporter = opentelemetry_datadog::new_pipeline()
let exporter = datadog_exporter::new_pipeline()
.with(
&self.endpoint.to_uri(&Uri::from_static(DEFAULT_ENDPOINT)),
|builder, e| builder.with_agent_endpoint(e.to_string().trim_end_matches('/')),
Expand Down Expand Up @@ -170,13 +196,92 @@ impl TracingConfigurator for Config {
.expect("cargo version is set as a resource default;qed")
.to_string(),
)
.with_http_client(reqwest::Client::builder().build()?)
.with_trace_config(common)
.build_exporter()?;

// Use the default span metrics and override with the ones from the config
let mut span_metrics = default_span_metrics();
span_metrics.extend(self.span_metrics.clone());

Ok(builder.with_span_processor(
BatchSpanProcessor::builder(exporter, opentelemetry::runtime::Tokio)
.with_batch_config(self.batch_processor.clone().into())
.build()
.filtered(),
BatchSpanProcessor::builder(
ExporterWrapper {
delegate: exporter,
span_metrics,
},
opentelemetry::runtime::Tokio,
)
.with_batch_config(self.batch_processor.clone().into())
.build()
.filtered(),
))
}
}

struct ExporterWrapper {
delegate: datadog_exporter::DatadogExporter,
span_metrics: HashMap<String, bool>,
}

impl Debug for ExporterWrapper {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.delegate.fmt(f)
}
}

impl SpanExporter for ExporterWrapper {
fn export(&mut self, mut batch: Vec<SpanData>) -> BoxFuture<'static, ExportResult> {
// Here we do some special processing of the spans before passing them to the delegate
// In particular we default the span.kind to the span kind, and also override the trace measure status if we need to.
for span in &mut batch {
// If the span metrics are enabled for this span, set the trace state to measuring.
// We do all this dancing to avoid allocating.
let original_span_name = span
.attributes
.get(&Key::from_static_str(OTEL_ORIGINAL_NAME))
.map(|v| v.as_str());
let final_span_name = if let Some(span_name) = &original_span_name {
span_name.as_ref()
} else {
span.name.as_ref()
};

// Unfortunately trace state is immutable, so we have to create a new one
if let Some(true) = self.span_metrics.get(final_span_name) {
let new_trace_state = span
.span_context
.trace_state()
.insert(TRACE_STATE_MEASURE, TRACE_STATE_TRUE_VALUE)
.expect("valid trace state");
span.span_context = SpanContext::new(
span.span_context.trace_id(),
span.span_context.span_id(),
span.span_context.trace_flags(),
span.span_context.is_remote(),
new_trace_state,
)
}

// Set the span kind https://github.com/DataDog/dd-trace-go/blob/main/ddtrace/ext/span_kind.go
let span_kind = match &span.span_kind {
SpanKind::Client => "client",
SpanKind::Server => "server",
SpanKind::Producer => "producer",
SpanKind::Consumer => "consumer",
SpanKind::Internal => "internal",
};
span.attributes
.insert(KeyValue::new("span.kind", span_kind));

// Note we do NOT set span.type as it isn't a good fit for otel.
}
self.delegate.export(batch)
}
fn shutdown(&mut self) {
self.delegate.shutdown()
}
fn force_flush(&mut self) -> BoxFuture<'static, ExportResult> {
self.delegate.force_flush()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
This is temporary interning of the datadog exporter until we update otel.
The newest version of the exporter does support setting span metrics, but we
can't upgrade until we upgrade Otel.

Once otel is upgraded, we can remove this code and use the exporter directly.
Loading