Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changesets/fix_rreg_fix_entities_errors_missing_service.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### Fix _entities Apollo Error Metrics Missing Service Attribute ([PR #8153](https://github.com/apollographql/router/pull/8153))

Error counting https://github.com/apollographql/router/pull/7712 introduced a bug where `_entities` errors from a subgraph fetch no longer reported a service (subgraph or connector) attribute. This erroneously categorized these errors as from the Router rather than their originating service in the Studio UI.

The attribute has been re-added, fixing this issue.

By [@rregitsky](https://github.com/rregitsky) in https://github.com/apollographql/router/pull/8153
12 changes: 12 additions & 0 deletions apollo-router/src/services/subgraph_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ use opentelemetry::Key;
use opentelemetry::KeyValue;
use rustls::RootCertStore;
use serde::Serialize;
use serde_json_bytes::Entry;
use serde_json_bytes::json;
use tokio::sync::oneshot;
use tokio_tungstenite::connect_async;
use tokio_tungstenite::connect_async_tls_with_config;
Expand Down Expand Up @@ -785,6 +787,14 @@ fn http_response_to_graphql_response(
}
};

// Any errors directly parsed from the response likely won't yet have the service name set,
// but we need it for telemetry error counting
for err in &mut graphql_response.errors {
if let Entry::Vacant(v) = err.extensions.entry("service") {
v.insert(json!(service_name));
}
}

// Add an error for response codes that are not 2xx
if !parts.status.is_success() {
let status = parts.status;
Expand Down Expand Up @@ -3261,6 +3271,7 @@ mod tests {
let error = graphql::Error::builder()
.message("error was encountered for test")
.extension_code("SOME_EXTENSION")
.extension("service", "test_service")
.build();
let mut json = serde_json::json!({
"data": {
Expand Down Expand Up @@ -3295,6 +3306,7 @@ mod tests {
let error = graphql::Error::builder()
.message("error was encountered for test")
.extension_code("SOME_EXTENSION")
.extension("service", "test_service")
.build();
let mut json = serde_json::json!({
"data": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,83 @@ async fn test_subgraph_layer_error_emits_metric() {
router.graceful_shutdown().await;
}

#[tokio::test(flavor = "multi_thread")]
async fn test_subgraph_layer_entities_error_emits_metric() {
if !graph_os_enabled() {
return;
}
let expected_service = "products";
let expected_error_code = "SUBGRAPH_CODE";
let expected_client_name = "CLIENT_NAME";
let expected_client_version = "v0.14";
let expected_path = "/_entities/0/name";

let mut router = IntegrationTest::builder()
.telemetry(Telemetry::Otlp { endpoint: None })
.config(
r#"
telemetry:
apollo:
experimental_otlp_metrics_protocol: http
batch_processor:
scheduled_delay: 10ms
errors:
preview_extended_error_metrics: enabled
"#,
)
.responder(
ResponseTemplate::new(200).set_body_json(
graphql::Response::builder()
.data(json!({"data": {"_entities": [{"name": null}]}}))
.errors(vec![
graphql::Error::builder()
.message("error in subgraph layer")
// Explicitly exclude setting service as it should get populated by subgraph_service
.extension_code(expected_error_code)
// Path must not have leading slash to match expected
.path("_entities/0/name")
.build(),
])
.build(),
),
)
.build()
.await;

router.start().await;
router.assert_started().await;

router
.execute_query(
Query::builder()
.header("apollographql-client-name", expected_client_name)
.header("apollographql-client-version", expected_client_version)
.build(),
)
.await;

let metrics = router
.wait_for_emitted_otel_metrics(Duration::from_millis(20))
.await;

assert!(!metrics.is_empty());
assert_metrics_contain(
&metrics,
Metric::builder()
.name("apollo.router.operations.error".to_string())
.attribute("graphql.operation.name", "ExampleQuery")
.attribute("graphql.operation.type", "query")
.attribute("apollo.client.name", expected_client_name)
.attribute("apollo.client.version", expected_client_version)
.attribute("graphql.error.extensions.code", expected_error_code)
.attribute("apollo.router.error.service", expected_service)
.attribute("graphql.error.path", expected_path)
.value(1)
.build(),
);
router.graceful_shutdown().await;
}

#[tokio::test(flavor = "multi_thread")]
async fn test_include_subgraph_error_disabled_does_not_redact_error_metrics() {
if !graph_os_enabled() {
Expand Down