From bce1c2292c1cfd83d6fcb44445caf2eeebda75ec Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Tue, 8 Apr 2025 13:46:34 -0400 Subject: [PATCH 01/46] WIP error counting pluging --- .../src/plugins/error_counting_metrics.rs | 228 ++++++++++++++++++ apollo-router/src/plugins/mod.rs | 1 + 2 files changed, 229 insertions(+) create mode 100644 apollo-router/src/plugins/error_counting_metrics.rs diff --git a/apollo-router/src/plugins/error_counting_metrics.rs b/apollo-router/src/plugins/error_counting_metrics.rs new file mode 100644 index 0000000000..8b6e66b2cc --- /dev/null +++ b/apollo-router/src/plugins/error_counting_metrics.rs @@ -0,0 +1,228 @@ +use std::sync::Arc; +use std::collections::HashMap; + +use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; +use crate::Context; +use crate::apollo_studio_interop::UsageReporting; +use crate::context::OPERATION_KIND; +use crate::context::OPERATION_NAME; +use crate::plugins::telemetry::CLIENT_NAME; +use crate::plugins::telemetry::CLIENT_VERSION; +use crate::query_planner::APOLLO_OPERATION_ID; +use crate::spec::GRAPHQL_PARSE_FAILURE_ERROR_KEY; +use crate::spec::GRAPHQL_UNKNOWN_OPERATION_NAME_ERROR_KEY; +use crate::spec::GRAPHQL_VALIDATION_FAILURE_ERROR_KEY; +use schemars::JsonSchema; +use serde::Deserialize; +use tower::BoxError; +use crate::query_planner::stats_report_key_hash; +use tower::ServiceExt; + +use crate::plugin::Plugin; +use crate::plugin::PluginInit; +use crate::register_plugin; +use crate::services::SubgraphResponse; +use crate::services::subgraph; +use crate::graphql; + + +use super::telemetry::apollo::ErrorsConfiguration; + +static REDACTED_ERROR_MESSAGE: &str = "Subgraph errors redacted"; + +register_plugin!("apollo", "error_counting_metrics", ErrorCountingMetrics); + +/// Configuration for exposing errors that originate from subgraphs +#[derive(Clone, Debug, JsonSchema, Default, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields, default)] +struct Config { + // TODO +} + +struct ErrorCountingMetrics { + config: Config, +} + +#[async_trait::async_trait] +impl Plugin for ErrorCountingMetrics { + type Config = Config; + + async fn new(init: PluginInit) -> Result { + Ok(ErrorCountingMetrics { + config: init.config, + }) + } + + fn subgraph_service(&self, name: &str, service: subgraph::BoxService) -> subgraph::BoxService { + // Search for subgraph in our configured subgraph map. If we can't find it, use the "all" value + + service + .map_response(move |mut response: SubgraphResponse| { + let errors = &mut response.response.body_mut().errors; + if !errors.is_empty() { + count_operation_errors( + &errors, + &response.context, + &self.apollo_telemetry_config.errors, + ); + } + // TODO value completion errors? + + // TODO count_operation_error_codes() invalid accept header case? May be impossible + // due to needing to remake the if/elseif or at minimum duplicating logic + + // We don't need to bother with `count_graphql_error()` call for free + // tier rate limiting b/c it doesn't emit a metric with context + // It will be called by `count_operation_errors()` though + response + }) // TODO use map_err? + .boxed() + } + +// TODO execution_service for connectors errors? +} + +fn count_operation_error_codes( + codes: &[&str], + context: &Context, + errors_config: &ErrorsConfiguration, +) { + let errors: Vec = codes + .iter() + .map(|c| { + graphql::Error::builder() + .message("") + .extension_code(*c) + .build() + }) + .collect(); + + count_operation_errors(&errors, context, errors_config); +} + +fn count_operation_errors( + errors: &[graphql::Error], + context: &Context, + errors_config: &ErrorsConfiguration, +) { + let unwrap_context_string = |context_key: &str| -> String { + context + .get::<_, String>(context_key) + .unwrap_or_default() + .unwrap_or_default() + }; + + let mut operation_id = unwrap_context_string(APOLLO_OPERATION_ID); + let mut operation_name = unwrap_context_string(OPERATION_NAME); + let operation_kind = unwrap_context_string(OPERATION_KIND); + let client_name = unwrap_context_string(CLIENT_NAME); + let client_version = unwrap_context_string(CLIENT_VERSION); + + // Try to get operation ID from the stats report key if it's not in context (e.g. on parse/validation error) + if operation_id.is_empty() { + let maybe_stats_report_key = context.extensions().with_lock(|lock| { + lock.get::>() + .map(|u| u.stats_report_key.clone()) + }); + if let Some(stats_report_key) = maybe_stats_report_key { + operation_id = stats_report_key_hash(stats_report_key.as_str()); + + // If the operation name is empty, it's possible it's an error and we can populate the name by skipping the + // first character of the stats report key ("#") and the last newline character. E.g. + // "## GraphQLParseFailure\n" will turn into "# GraphQLParseFailure". + if operation_name.is_empty() { + operation_name = match stats_report_key.as_str() { + GRAPHQL_PARSE_FAILURE_ERROR_KEY + | GRAPHQL_UNKNOWN_OPERATION_NAME_ERROR_KEY + | GRAPHQL_VALIDATION_FAILURE_ERROR_KEY => stats_report_key + .chars() + .skip(1) + .take(stats_report_key.len() - 2) + .collect(), + _ => "".to_string(), + } + } + } + } + + let mut map = HashMap::new(); + for error in errors { + let code = error.extensions.get("code").and_then(|c| c.as_str()); + let service = error + .extensions + .get("service") + .and_then(|s| s.as_str()) + .unwrap_or_default() + .to_string(); + let severity = error.extensions.get("severity").and_then(|s| s.as_str()); + let path = match &error.path { + None => "".into(), + Some(path) => path.to_string(), + }; + let entry = map.entry(code).or_insert(0u64); + *entry += 1; + + let send_otlp_errors = if service.is_empty() { + matches!( + errors_config.preview_extended_error_metrics, + ExtendedErrorMetricsMode::Enabled + ) + } else { + let subgraph_error_config = errors_config.subgraph.get_error_config(&service); + subgraph_error_config.send + && matches!( + errors_config.preview_extended_error_metrics, + ExtendedErrorMetricsMode::Enabled + ) + }; + + if send_otlp_errors { + let code_str = code.unwrap_or_default().to_string(); + let severity_str = severity + .unwrap_or(tracing::Level::ERROR.as_str()) + .to_string(); + u64_counter!( + "apollo.router.operations.error", + "Number of errors returned by operation", + 1, + "apollo.operation.id" = operation_id.clone(), + "graphql.operation.name" = operation_name.clone(), + "graphql.operation.type" = operation_kind.clone(), + "apollo.client.name" = client_name.clone(), + "apollo.client.version" = client_version.clone(), + "graphql.error.extensions.code" = code_str, + "graphql.error.extensions.severity" = severity_str, + "graphql.error.path" = path, + "apollo.router.error.service" = service + ); + } + } + + for (code, count) in map { + count_graphql_error(count, code); + } +} + +/// Shared counter for `apollo.router.graphql_error` for consistency +fn count_graphql_error(count: u64, code: Option<&str>) { + match code { + None => { + u64_counter!( + "apollo.router.graphql_error", + "Number of GraphQL error responses returned by the router", + count + ); + } + Some(code) => { + u64_counter!( + "apollo.router.graphql_error", + "Number of GraphQL error responses returned by the router", + count, + code = code.to_string() + ); + } + } +} + +#[cfg(test)] +mod test {} diff --git a/apollo-router/src/plugins/mod.rs b/apollo-router/src/plugins/mod.rs index 2648540590..5220a00e6f 100644 --- a/apollo-router/src/plugins/mod.rs +++ b/apollo-router/src/plugins/mod.rs @@ -46,3 +46,4 @@ pub(crate) mod telemetry; #[cfg(test)] pub(crate) mod test; pub(crate) mod traffic_shaping; +pub(crate) mod error_counting_metrics; From c73714f294d4f4277a00c813075eb59c325527c3 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 24 Apr 2025 12:55:55 -0400 Subject: [PATCH 02/46] switch to submodule for telemetry plugin --- .../src/plugins/error_counting_metrics.rs | 228 ------------------ .../src/plugins/telemetry/error_counter.rs | 212 ++++++++++++++++ apollo-router/src/plugins/telemetry/mod.rs | 8 + 3 files changed, 220 insertions(+), 228 deletions(-) delete mode 100644 apollo-router/src/plugins/error_counting_metrics.rs create mode 100644 apollo-router/src/plugins/telemetry/error_counter.rs diff --git a/apollo-router/src/plugins/error_counting_metrics.rs b/apollo-router/src/plugins/error_counting_metrics.rs deleted file mode 100644 index 8b6e66b2cc..0000000000 --- a/apollo-router/src/plugins/error_counting_metrics.rs +++ /dev/null @@ -1,228 +0,0 @@ -use std::sync::Arc; -use std::collections::HashMap; - -use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; -use crate::Context; -use crate::apollo_studio_interop::UsageReporting; -use crate::context::OPERATION_KIND; -use crate::context::OPERATION_NAME; -use crate::plugins::telemetry::CLIENT_NAME; -use crate::plugins::telemetry::CLIENT_VERSION; -use crate::query_planner::APOLLO_OPERATION_ID; -use crate::spec::GRAPHQL_PARSE_FAILURE_ERROR_KEY; -use crate::spec::GRAPHQL_UNKNOWN_OPERATION_NAME_ERROR_KEY; -use crate::spec::GRAPHQL_VALIDATION_FAILURE_ERROR_KEY; -use schemars::JsonSchema; -use serde::Deserialize; -use tower::BoxError; -use crate::query_planner::stats_report_key_hash; -use tower::ServiceExt; - -use crate::plugin::Plugin; -use crate::plugin::PluginInit; -use crate::register_plugin; -use crate::services::SubgraphResponse; -use crate::services::subgraph; -use crate::graphql; - - -use super::telemetry::apollo::ErrorsConfiguration; - -static REDACTED_ERROR_MESSAGE: &str = "Subgraph errors redacted"; - -register_plugin!("apollo", "error_counting_metrics", ErrorCountingMetrics); - -/// Configuration for exposing errors that originate from subgraphs -#[derive(Clone, Debug, JsonSchema, Default, Deserialize)] -#[serde(rename_all = "snake_case", deny_unknown_fields, default)] -struct Config { - // TODO -} - -struct ErrorCountingMetrics { - config: Config, -} - -#[async_trait::async_trait] -impl Plugin for ErrorCountingMetrics { - type Config = Config; - - async fn new(init: PluginInit) -> Result { - Ok(ErrorCountingMetrics { - config: init.config, - }) - } - - fn subgraph_service(&self, name: &str, service: subgraph::BoxService) -> subgraph::BoxService { - // Search for subgraph in our configured subgraph map. If we can't find it, use the "all" value - - service - .map_response(move |mut response: SubgraphResponse| { - let errors = &mut response.response.body_mut().errors; - if !errors.is_empty() { - count_operation_errors( - &errors, - &response.context, - &self.apollo_telemetry_config.errors, - ); - } - // TODO value completion errors? - - // TODO count_operation_error_codes() invalid accept header case? May be impossible - // due to needing to remake the if/elseif or at minimum duplicating logic - - // We don't need to bother with `count_graphql_error()` call for free - // tier rate limiting b/c it doesn't emit a metric with context - // It will be called by `count_operation_errors()` though - response - }) // TODO use map_err? - .boxed() - } - -// TODO execution_service for connectors errors? -} - -fn count_operation_error_codes( - codes: &[&str], - context: &Context, - errors_config: &ErrorsConfiguration, -) { - let errors: Vec = codes - .iter() - .map(|c| { - graphql::Error::builder() - .message("") - .extension_code(*c) - .build() - }) - .collect(); - - count_operation_errors(&errors, context, errors_config); -} - -fn count_operation_errors( - errors: &[graphql::Error], - context: &Context, - errors_config: &ErrorsConfiguration, -) { - let unwrap_context_string = |context_key: &str| -> String { - context - .get::<_, String>(context_key) - .unwrap_or_default() - .unwrap_or_default() - }; - - let mut operation_id = unwrap_context_string(APOLLO_OPERATION_ID); - let mut operation_name = unwrap_context_string(OPERATION_NAME); - let operation_kind = unwrap_context_string(OPERATION_KIND); - let client_name = unwrap_context_string(CLIENT_NAME); - let client_version = unwrap_context_string(CLIENT_VERSION); - - // Try to get operation ID from the stats report key if it's not in context (e.g. on parse/validation error) - if operation_id.is_empty() { - let maybe_stats_report_key = context.extensions().with_lock(|lock| { - lock.get::>() - .map(|u| u.stats_report_key.clone()) - }); - if let Some(stats_report_key) = maybe_stats_report_key { - operation_id = stats_report_key_hash(stats_report_key.as_str()); - - // If the operation name is empty, it's possible it's an error and we can populate the name by skipping the - // first character of the stats report key ("#") and the last newline character. E.g. - // "## GraphQLParseFailure\n" will turn into "# GraphQLParseFailure". - if operation_name.is_empty() { - operation_name = match stats_report_key.as_str() { - GRAPHQL_PARSE_FAILURE_ERROR_KEY - | GRAPHQL_UNKNOWN_OPERATION_NAME_ERROR_KEY - | GRAPHQL_VALIDATION_FAILURE_ERROR_KEY => stats_report_key - .chars() - .skip(1) - .take(stats_report_key.len() - 2) - .collect(), - _ => "".to_string(), - } - } - } - } - - let mut map = HashMap::new(); - for error in errors { - let code = error.extensions.get("code").and_then(|c| c.as_str()); - let service = error - .extensions - .get("service") - .and_then(|s| s.as_str()) - .unwrap_or_default() - .to_string(); - let severity = error.extensions.get("severity").and_then(|s| s.as_str()); - let path = match &error.path { - None => "".into(), - Some(path) => path.to_string(), - }; - let entry = map.entry(code).or_insert(0u64); - *entry += 1; - - let send_otlp_errors = if service.is_empty() { - matches!( - errors_config.preview_extended_error_metrics, - ExtendedErrorMetricsMode::Enabled - ) - } else { - let subgraph_error_config = errors_config.subgraph.get_error_config(&service); - subgraph_error_config.send - && matches!( - errors_config.preview_extended_error_metrics, - ExtendedErrorMetricsMode::Enabled - ) - }; - - if send_otlp_errors { - let code_str = code.unwrap_or_default().to_string(); - let severity_str = severity - .unwrap_or(tracing::Level::ERROR.as_str()) - .to_string(); - u64_counter!( - "apollo.router.operations.error", - "Number of errors returned by operation", - 1, - "apollo.operation.id" = operation_id.clone(), - "graphql.operation.name" = operation_name.clone(), - "graphql.operation.type" = operation_kind.clone(), - "apollo.client.name" = client_name.clone(), - "apollo.client.version" = client_version.clone(), - "graphql.error.extensions.code" = code_str, - "graphql.error.extensions.severity" = severity_str, - "graphql.error.path" = path, - "apollo.router.error.service" = service - ); - } - } - - for (code, count) in map { - count_graphql_error(count, code); - } -} - -/// Shared counter for `apollo.router.graphql_error` for consistency -fn count_graphql_error(count: u64, code: Option<&str>) { - match code { - None => { - u64_counter!( - "apollo.router.graphql_error", - "Number of GraphQL error responses returned by the router", - count - ); - } - Some(code) => { - u64_counter!( - "apollo.router.graphql_error", - "Number of GraphQL error responses returned by the router", - count, - code = code.to_string() - ); - } - } -} - -#[cfg(test)] -mod test {} diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs new file mode 100644 index 0000000000..ba8c6737a6 --- /dev/null +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -0,0 +1,212 @@ +use std::collections::HashMap; +use std::sync::Arc; +use serde_json_bytes::Value; +use crate::apollo_studio_interop::UsageReporting; +use crate::context::{OPERATION_KIND, OPERATION_NAME}; +use crate::{graphql, Context}; +use crate::plugins::telemetry::apollo::{ErrorsConfiguration, ExtendedErrorMetricsMode}; +use crate::plugins::telemetry::{CLIENT_NAME, CLIENT_VERSION}; +use crate::query_planner::APOLLO_OPERATION_ID; +use crate::services::{SupergraphResponse}; +use crate::services::router::ClientRequestAccepts; +use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; + +// TODO migrate subgraph and extended errors config +pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config: &ErrorsConfiguration) { + let context = response.context.clone(); + // TODO do we really need this? + let ClientRequestAccepts { + wildcard: accepts_wildcard, + json: accepts_json, + multipart_defer: accepts_multipart_defer, + multipart_subscription: accepts_multipart_subscription, + } = context + .extensions() + .with_lock(|lock| lock.get().cloned()) + .unwrap_or_default(); + + + if let Some(gql_response) = response.next_response().await { + if !gql_response.has_next.unwrap_or(false) + && !gql_response.subscribed.unwrap_or(false) + && (accepts_json || accepts_wildcard) + { + if !gql_response.errors.is_empty() { + count_operation_errors(&gql_response.errors, &context, &errors_config); + } + if let Some(value_completion) = gql_response.extensions.get(EXTENSIONS_VALUE_COMPLETION_KEY) { + // TODO inline this func? + count_value_completion_errors( + value_completion, + &context, + &errors_config, + ); + } + } else if accepts_multipart_defer || accepts_multipart_subscription { + // TODO can we combine this with above? + if !gql_response.errors.is_empty() { + count_operation_errors(&gql_response.errors, &context, &errors_config); + } + } else { + // TODO supposedly this is unreachable in router service. Will we be able to pick this up in a router service plugin callback instead? + // TODO I'm guessing no b/c at the plugin layer, we'd have to parse the response as json. + // TODO As is, this feels really bad b/c the error will be defined _AFTER_ we count it in router/service.rs + count_operation_error_codes( + &["INVALID_ACCEPT_HEADER"], + &context, + &errors_config, + ); + } + } + + // TODO router service plugin fn to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? Would need to parse json response +} + +fn count_operation_error_codes( + codes: &[&str], + context: &Context, + errors_config: &ErrorsConfiguration, +) { + let errors: Vec = codes + .iter() + .map(|c| { + graphql::Error::builder() + .message("") + .extension_code(*c) + .build() + }) + .collect(); + + count_operation_errors(&errors, context, errors_config); +} + +fn count_value_completion_errors( + value_completion: &Value, + context: &Context, + errors_config: &ErrorsConfiguration, +) { + if let Some(vc_array) = value_completion.as_array() { + let errors: Vec = vc_array + .iter() + .filter_map(graphql::Error::from_value_completion_value) + .collect(); + crate::metrics::count_operation_errors(&errors, context, errors_config); + } +} + +fn count_operation_errors( + errors: &[graphql::Error], + context: &Context, + errors_config: &ErrorsConfiguration, +) { + let unwrap_context_string = |context_key: &str| -> String { + context + .get::<_, String>(context_key) + .unwrap_or_default() + .unwrap_or_default() + }; + + let mut operation_id = unwrap_context_string(APOLLO_OPERATION_ID); + let mut operation_name = unwrap_context_string(OPERATION_NAME); + let operation_kind = unwrap_context_string(OPERATION_KIND); + let client_name = unwrap_context_string(CLIENT_NAME); + let client_version = unwrap_context_string(CLIENT_VERSION); + + let maybe_usage_reporting = context + .extensions() + .with_lock(|lock| lock.get::>().cloned()); + + if let Some(usage_reporting) = maybe_usage_reporting { + // Try to get operation ID from usage reporting if it's not in context (e.g. on parse/validation error) + if operation_id.is_empty() { + operation_id = usage_reporting.get_operation_id(); + } + + // Also try to get operation name from usage reporting if it's not in context + if operation_name.is_empty() { + operation_name = usage_reporting.get_operation_name(); + } + } + + let mut map = HashMap::new(); + for error in errors { + let code = error.extensions.get("code").and_then(|c| match c { + Value::String(s) => Some(s.as_str().to_owned()), + Value::Bool(b) => Some(format!("{b}")), + Value::Number(n) => Some(n.to_string()), + Value::Null | Value::Array(_) | Value::Object(_) => None, + }); + let service = error + .extensions + .get("service") + .and_then(|s| s.as_str()) + .unwrap_or_default() + .to_string(); + let severity = error.extensions.get("severity").and_then(|s| s.as_str()); + let path = match &error.path { + None => "".into(), + Some(path) => path.to_string(), + }; + let entry = map.entry(code.clone()).or_insert(0u64); + *entry += 1; + + let send_otlp_errors = if service.is_empty() { + matches!( + errors_config.preview_extended_error_metrics, + ExtendedErrorMetricsMode::Enabled + ) + } else { + let subgraph_error_config = errors_config.subgraph.get_error_config(&service); + subgraph_error_config.send + && matches!( + errors_config.preview_extended_error_metrics, + ExtendedErrorMetricsMode::Enabled + ) + }; + + if send_otlp_errors { + let severity_str = severity + .unwrap_or(tracing::Level::ERROR.as_str()) + .to_string(); + u64_counter!( + "apollo.router.operations.error", + "Number of errors returned by operation", + 1, + "apollo.operation.id" = operation_id.clone(), + "graphql.operation.name" = operation_name.clone(), + "graphql.operation.type" = operation_kind.clone(), + "apollo.client.name" = client_name.clone(), + "apollo.client.version" = client_version.clone(), + "graphql.error.extensions.code" = code.unwrap_or_default(), + "graphql.error.extensions.severity" = severity_str, + "graphql.error.path" = path, + "apollo.router.error.service" = service + ); + } + } + + for (code, count) in map { + count_graphql_error(count, code.as_deref()); + } +} + +/// Shared counter for `apollo.router.graphql_error` for consistency +fn count_graphql_error(count: u64, code: Option<&str>) { + match code { + None => { + u64_counter!( + "apollo.router.graphql_error", + "Number of GraphQL error responses returned by the router", + count + ); + } + Some(code) => { + u64_counter!( + "apollo.router.graphql_error", + "Number of GraphQL error responses returned by the router", + count, + code = code.to_string() + ); + } + } +} \ No newline at end of file diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 10295418c9..8d4a6a9930 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -113,6 +113,7 @@ use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_OK; use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME; use crate::plugins::telemetry::consts::ROUTER_SPAN_NAME; use crate::plugins::telemetry::dynamic_attribute::SpanDynAttribute; +use crate::plugins::telemetry::error_counter::count_errors; use crate::plugins::telemetry::fmt_layer::create_fmt_layer; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; @@ -154,6 +155,7 @@ pub(crate) mod config_new; pub(crate) mod consts; pub(crate) mod dynamic_attribute; mod endpoint; +mod error_counter; mod error_handler; mod fmt_layer; pub(crate) mod formatters; @@ -642,6 +644,12 @@ impl PluginPrivate for Telemetry { resp }) + .map_response(move |mut resp: SupergraphResponse| { + // TODO make sure this doesn't override the above map_response + count_errors(resp, config); + + resp + }) .map_future_with_request_data( move |req: &SupergraphRequest| { let custom_attributes = config From a61fcf7675df3062c195eadd504955ca031522de Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 24 Apr 2025 13:51:48 -0400 Subject: [PATCH 03/46] fix err config passthrough --- apollo-router/src/plugins/telemetry/error_counter.rs | 9 +++++---- apollo-router/src/plugins/telemetry/mod.rs | 6 ++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index ba8c6737a6..7c84de63b1 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -12,9 +12,9 @@ use crate::services::router::ClientRequestAccepts; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; // TODO migrate subgraph and extended errors config -pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config: &ErrorsConfiguration) { +pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { let context = response.context.clone(); - // TODO do we really need this? + // TODO do we really need this? let ClientRequestAccepts { wildcard: accepts_wildcard, json: accepts_json, @@ -58,10 +58,11 @@ pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config ); } } - - // TODO router service plugin fn to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? Would need to parse json response + response } +// TODO router service plugin fn to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? Would need to parse json response + fn count_operation_error_codes( codes: &[&str], context: &Context, diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 8d4a6a9930..f85d199ef4 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -644,11 +644,9 @@ impl PluginPrivate for Telemetry { resp }) - .map_response(move |mut resp: SupergraphResponse| { + .map_response(async move |resp: SupergraphResponse| { // TODO make sure this doesn't override the above map_response - count_errors(resp, config); - - resp + count_errors(resp, &config.apollo.errors).await; }) .map_future_with_request_data( move |req: &SupergraphRequest| { From 6e69ef06f1dcc4caf57f5932be7eec0d21b0d075 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 24 Apr 2025 14:09:23 -0400 Subject: [PATCH 04/46] remove old fns and refs. Move unit tests --- apollo-router/src/metrics/mod.rs | 497 ------------------ .../src/plugins/license_enforcement/mod.rs | 6 +- apollo-router/src/plugins/mod.rs | 1 - .../src/plugins/telemetry/error_counter.rs | 398 +++++++++++++- apollo-router/src/services/router/service.rs | 47 -- 5 files changed, 396 insertions(+), 553 deletions(-) diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs index b84549ee89..f61fd07e51 100644 --- a/apollo-router/src/metrics/mod.rs +++ b/apollo-router/src/metrics/mod.rs @@ -1338,120 +1338,6 @@ macro_rules! assert_histogram_not_exists { }; } -pub(crate) fn count_operation_error_codes( - codes: &[&str], - context: &Context, - errors_config: &ErrorsConfiguration, -) { - let errors: Vec = codes - .iter() - .map(|c| { - graphql::Error::builder() - .message("") - .extension_code(*c) - .build() - }) - .collect(); - - count_operation_errors(&errors, context, errors_config); -} - -pub(crate) fn count_operation_errors( - errors: &[graphql::Error], - context: &Context, - errors_config: &ErrorsConfiguration, -) { - let unwrap_context_string = |context_key: &str| -> String { - context - .get::<_, String>(context_key) - .unwrap_or_default() - .unwrap_or_default() - }; - - let mut operation_id = unwrap_context_string(APOLLO_OPERATION_ID); - let mut operation_name = unwrap_context_string(OPERATION_NAME); - let operation_kind = unwrap_context_string(OPERATION_KIND); - let client_name = unwrap_context_string(CLIENT_NAME); - let client_version = unwrap_context_string(CLIENT_VERSION); - - let maybe_usage_reporting = context - .extensions() - .with_lock(|lock| lock.get::>().cloned()); - - if let Some(usage_reporting) = maybe_usage_reporting { - // Try to get operation ID from usage reporting if it's not in context (e.g. on parse/validation error) - if operation_id.is_empty() { - operation_id = usage_reporting.get_operation_id(); - } - - // Also try to get operation name from usage reporting if it's not in context - if operation_name.is_empty() { - operation_name = usage_reporting.get_operation_name(); - } - } - - let mut map = HashMap::new(); - for error in errors { - let code = error.extensions.get("code").and_then(|c| match c { - Value::String(s) => Some(s.as_str().to_owned()), - Value::Bool(b) => Some(format!("{b}")), - Value::Number(n) => Some(n.to_string()), - Value::Null | Value::Array(_) | Value::Object(_) => None, - }); - let service = error - .extensions - .get("service") - .and_then(|s| s.as_str()) - .unwrap_or_default() - .to_string(); - let severity = error.extensions.get("severity").and_then(|s| s.as_str()); - let path = match &error.path { - None => "".into(), - Some(path) => path.to_string(), - }; - let entry = map.entry(code.clone()).or_insert(0u64); - *entry += 1; - - let send_otlp_errors = if service.is_empty() { - matches!( - errors_config.preview_extended_error_metrics, - ExtendedErrorMetricsMode::Enabled - ) - } else { - let subgraph_error_config = errors_config.subgraph.get_error_config(&service); - subgraph_error_config.send - && matches!( - errors_config.preview_extended_error_metrics, - ExtendedErrorMetricsMode::Enabled - ) - }; - - if send_otlp_errors { - let severity_str = severity - .unwrap_or(tracing::Level::ERROR.as_str()) - .to_string(); - u64_counter!( - "apollo.router.operations.error", - "Number of errors returned by operation", - 1, - "apollo.operation.id" = operation_id.clone(), - "graphql.operation.name" = operation_name.clone(), - "graphql.operation.type" = operation_kind.clone(), - "apollo.client.name" = client_name.clone(), - "apollo.client.version" = client_version.clone(), - "graphql.error.extensions.code" = code.unwrap_or_default(), - "graphql.error.extensions.severity" = severity_str, - "graphql.error.path" = path, - "apollo.router.error.service" = service - ); - } - } - - for (code, count) in map { - count_graphql_error(count, code.as_deref()); - } -} - /// Shared counter for `apollo.router.graphql_error` for consistency pub(crate) fn count_graphql_error(count: u64, code: Option<&str>) { match code { @@ -1556,25 +1442,10 @@ impl FutureMetricsExt for T where T: Future {} mod test { use opentelemetry::KeyValue; use opentelemetry::metrics::MeterProvider; - use serde_json_bytes::Value; - use serde_json_bytes::json; - - use crate::Context; - use crate::context::OPERATION_KIND; - use crate::context::OPERATION_NAME; - use crate::graphql; - use crate::json_ext::Path; use crate::metrics::FutureMetricsExt; use crate::metrics::aggregation::MeterProviderType; - use crate::metrics::count_operation_error_codes; - use crate::metrics::count_operation_errors; use crate::metrics::meter_provider; use crate::metrics::meter_provider_internal; - use crate::plugins::telemetry::CLIENT_NAME; - use crate::plugins::telemetry::CLIENT_VERSION; - use crate::plugins::telemetry::apollo::ErrorsConfiguration; - use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; - use crate::query_planner::APOLLO_OPERATION_ID; fn assert_unit(name: &str, unit: &str) { let collected_metrics = crate::metrics::collect_metrics(); @@ -1900,372 +1771,4 @@ mod test { .await; } - #[tokio::test] - async fn test_count_operation_error_codes_with_extended_config_enabled() { - async { - let config = ErrorsConfiguration { - preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, - ..Default::default() - }; - - let context = Context::default(); - let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); - let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); - let _ = context.insert(OPERATION_KIND, "query".to_string()); - let _ = context.insert(CLIENT_NAME, "client-1".to_string()); - let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - - count_operation_error_codes( - &["GRAPHQL_VALIDATION_FAILED", "MY_CUSTOM_ERROR", "400"], - &context, - &config, - ); - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - - assert_counter!( - "apollo.router.graphql_error", - 1, - code = "GRAPHQL_VALIDATION_FAILED" - ); - assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); - assert_counter!("apollo.router.graphql_error", 1, code = "400"); - } - .with_metrics() - .await; - } - - #[tokio::test] - async fn test_count_operation_error_codes_with_extended_config_disabled() { - async { - let config = ErrorsConfiguration { - preview_extended_error_metrics: ExtendedErrorMetricsMode::Disabled, - ..Default::default() - }; - - let context = Context::default(); - count_operation_error_codes( - &["GRAPHQL_VALIDATION_FAILED", "MY_CUSTOM_ERROR", "400"], - &context, - &config, - ); - - assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - - assert_counter!( - "apollo.router.graphql_error", - 1, - code = "GRAPHQL_VALIDATION_FAILED" - ); - assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); - assert_counter!("apollo.router.graphql_error", 1, code = "400"); - } - .with_metrics() - .await; - } - - #[tokio::test] - async fn test_count_operation_errors_with_extended_config_enabled() { - async { - let config = ErrorsConfiguration { - preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, - ..Default::default() - }; - - let context = Context::default(); - let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); - let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); - let _ = context.insert(OPERATION_KIND, "query".to_string()); - let _ = context.insert(CLIENT_NAME, "client-1".to_string()); - let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - - let error = graphql::Error::builder() - .message("some error") - .extension_code("SOME_ERROR_CODE") - .extension("service", "mySubgraph") - .path(Path::from("obj/field")) - .build(); - - count_operation_errors(&[error], &context, &config); - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "SOME_ERROR_CODE", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); - - assert_counter!("apollo.router.graphql_error", 1, code = "SOME_ERROR_CODE"); - } - .with_metrics() - .await; - } - - #[tokio::test] - async fn test_count_operation_errors_with_all_json_types_and_extended_config_enabled() { - async { - let config = ErrorsConfiguration { - preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, - ..Default::default() - }; - - let context = Context::default(); - let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); - let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); - let _ = context.insert(OPERATION_KIND, "query".to_string()); - let _ = context.insert(CLIENT_NAME, "client-1".to_string()); - let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - - let codes = [ - json!("VALID_ERROR_CODE"), - json!(400), - json!(true), - Value::Null, - json!(["code1", "code2"]), - json!({"inner": "myCode"}), - ]; - - let errors = codes.map(|code| { - graphql::Error::from_value(json!( - { - "message": "error occurred", - "extensions": { - "code": code, - "service": "mySubgraph" - }, - "path": ["obj", "field"] - } - )) - .unwrap() - }); - - count_operation_errors(&errors, &context, &config); - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "VALID_ERROR_CODE", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); - - assert_counter!("apollo.router.graphql_error", 1, code = "VALID_ERROR_CODE"); - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); - - assert_counter!("apollo.router.graphql_error", 1, code = "400"); - - // Code is ignored for null, arrays, and objects - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "true", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); - - assert_counter!("apollo.router.graphql_error", 1, code = "true"); - - assert_counter!( - "apollo.router.operations.error", - 3, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); - - assert_counter!("apollo.router.graphql_error", 3); - } - .with_metrics() - .await; - } - - #[tokio::test] - async fn test_count_operation_errors_with_duplicate_errors_and_extended_config_enabled() { - async { - let config = ErrorsConfiguration { - preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, - ..Default::default() - }; - - let context = Context::default(); - let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); - let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); - let _ = context.insert(OPERATION_KIND, "query".to_string()); - let _ = context.insert(CLIENT_NAME, "client-1".to_string()); - let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - - let codes = [ - json!("VALID_ERROR_CODE"), - Value::Null, - json!("VALID_ERROR_CODE"), - Value::Null, - ]; - - let errors = codes.map(|code| { - graphql::Error::from_value(json!( - { - "message": "error occurred", - "extensions": { - "code": code, - "service": "mySubgraph" - }, - "path": ["obj", "field"] - } - )) - .unwrap() - }); - - count_operation_errors(&errors, &context, &config); - - assert_counter!( - "apollo.router.operations.error", - 2, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "VALID_ERROR_CODE", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); - - assert_counter!("apollo.router.graphql_error", 2, code = "VALID_ERROR_CODE"); - - assert_counter!( - "apollo.router.operations.error", - 2, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); - - assert_counter!("apollo.router.graphql_error", 2); - } - .with_metrics() - .await; - } } diff --git a/apollo-router/src/plugins/license_enforcement/mod.rs b/apollo-router/src/plugins/license_enforcement/mod.rs index 52a9c6cd24..e884277f05 100644 --- a/apollo-router/src/plugins/license_enforcement/mod.rs +++ b/apollo-router/src/plugins/license_enforcement/mod.rs @@ -18,7 +18,6 @@ use tower::load_shed::error::Overloaded; use crate::graphql; use crate::layers::ServiceBuilderExt; -use crate::metrics::count_graphql_error; use crate::plugin::PluginInit; use crate::plugin::PluginPrivate; use crate::services::RouterResponse; @@ -73,12 +72,9 @@ impl PluginPrivate for LicenseEnforcement { match response { Ok(ok) => Ok(ok), Err(err) if err.is::() => { - let extension_code = "ROUTER_FREE_PLAN_RATE_LIMIT_REACHED"; - count_graphql_error(1u64, Some(extension_code)); - let error = graphql::Error::builder() .message("Your request has been rate limited. You've reached the limits for the Free plan. Consider upgrading to a higher plan for increased limits.") - .extension_code(extension_code) + .extension_code("ROUTER_FREE_PLAN_RATE_LIMIT_REACHED") .build(); Ok(RouterResponse::error_builder() .status_code(StatusCode::SERVICE_UNAVAILABLE) diff --git a/apollo-router/src/plugins/mod.rs b/apollo-router/src/plugins/mod.rs index 7a52e61c3b..dbb6672b37 100644 --- a/apollo-router/src/plugins/mod.rs +++ b/apollo-router/src/plugins/mod.rs @@ -47,4 +47,3 @@ pub(crate) mod telemetry; #[cfg(test)] pub(crate) mod test; pub(crate) mod traffic_shaping; -pub(crate) mod error_counting_metrics; diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 7c84de63b1..98d235ea29 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use std::sync::Arc; -use serde_json_bytes::Value; +use serde_json_bytes::{Value}; use crate::apollo_studio_interop::UsageReporting; use crate::context::{OPERATION_KIND, OPERATION_NAME}; use crate::{graphql, Context}; @@ -11,7 +11,7 @@ use crate::services::{SupergraphResponse}; use crate::services::router::ClientRequestAccepts; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; -// TODO migrate subgraph and extended errors config +// TODO call this for subgraph service (pre redaction), supergraph service, and _MAYBE_ router service (service unavail and invalid headers) pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { let context = response.context.clone(); // TODO do we really need this? @@ -27,10 +27,13 @@ pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config if let Some(gql_response) = response.next_response().await { + // TODO make mapping to add to response context to avoid double counting + if !gql_response.has_next.unwrap_or(false) && !gql_response.subscribed.unwrap_or(false) && (accepts_json || accepts_wildcard) { + // TODO ensure free plan is captured if !gql_response.errors.is_empty() { count_operation_errors(&gql_response.errors, &context, &errors_config); } @@ -91,7 +94,7 @@ fn count_value_completion_errors( .iter() .filter_map(graphql::Error::from_value_completion_value) .collect(); - crate::metrics::count_operation_errors(&errors, context, errors_config); + count_operation_errors(&errors, context, errors_config); } } @@ -210,4 +213,393 @@ fn count_graphql_error(count: u64, code: Option<&str>) { ); } } +} + + +#[cfg(test)] +mod test { + use serde_json_bytes::Value; + use serde_json_bytes::json; + + use crate::Context; + use crate::context::OPERATION_KIND; + use crate::context::OPERATION_NAME; + use crate::graphql; + use crate::json_ext::Path; + use crate::metrics::FutureMetricsExt; + use crate::plugins::telemetry::CLIENT_NAME; + use crate::plugins::telemetry::CLIENT_VERSION; + use crate::plugins::telemetry::apollo::ErrorsConfiguration; + use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; + use crate::plugins::telemetry::error_counter::{count_operation_error_codes, count_operation_errors}; + use crate::query_planner::APOLLO_OPERATION_ID; + + #[tokio::test] + async fn test_count_operation_error_codes_with_extended_config_enabled() { + async { + let config = ErrorsConfiguration { + preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, + ..Default::default() + }; + + let context = Context::default(); + let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); + let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); + let _ = context.insert(OPERATION_KIND, "query".to_string()); + let _ = context.insert(CLIENT_NAME, "client-1".to_string()); + let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + + count_operation_error_codes( + &["GRAPHQL_VALIDATION_FAILED", "MY_CUSTOM_ERROR", "400"], + &context, + &config, + ); + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "400", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + + assert_counter!( + "apollo.router.graphql_error", + 1, + code = "GRAPHQL_VALIDATION_FAILED" + ); + assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); + assert_counter!("apollo.router.graphql_error", 1, code = "400"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_count_operation_error_codes_with_extended_config_disabled() { + async { + let config = ErrorsConfiguration { + preview_extended_error_metrics: ExtendedErrorMetricsMode::Disabled, + ..Default::default() + }; + + let context = Context::default(); + count_operation_error_codes( + &["GRAPHQL_VALIDATION_FAILED", "MY_CUSTOM_ERROR", "400"], + &context, + &config, + ); + + assert_counter_not_exists!( + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "", + "graphql.operation.name" = "", + "graphql.operation.type" = "", + "apollo.client.name" = "", + "apollo.client.version" = "", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + assert_counter_not_exists!( + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "", + "graphql.operation.name" = "", + "graphql.operation.type" = "", + "apollo.client.name" = "", + "apollo.client.version" = "", + "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + assert_counter_not_exists!( + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "", + "graphql.operation.name" = "", + "graphql.operation.type" = "", + "apollo.client.name" = "", + "apollo.client.version" = "", + "graphql.error.extensions.code" = "400", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + + assert_counter!( + "apollo.router.graphql_error", + 1, + code = "GRAPHQL_VALIDATION_FAILED" + ); + assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); + assert_counter!("apollo.router.graphql_error", 1, code = "400"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_count_operation_errors_with_extended_config_enabled() { + async { + let config = ErrorsConfiguration { + preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, + ..Default::default() + }; + + let context = Context::default(); + let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); + let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); + let _ = context.insert(OPERATION_KIND, "query".to_string()); + let _ = context.insert(CLIENT_NAME, "client-1".to_string()); + let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + + let error = graphql::Error::builder() + .message("some error") + .extension_code("SOME_ERROR_CODE") + .extension("service", "mySubgraph") + .path(Path::from("obj/field")) + .build(); + + count_operation_errors(&[error], &context, &config); + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "SOME_ERROR_CODE", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); + + assert_counter!("apollo.router.graphql_error", 1, code = "SOME_ERROR_CODE"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_count_operation_errors_with_all_json_types_and_extended_config_enabled() { + async { + let config = ErrorsConfiguration { + preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, + ..Default::default() + }; + + let context = Context::default(); + let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); + let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); + let _ = context.insert(OPERATION_KIND, "query".to_string()); + let _ = context.insert(CLIENT_NAME, "client-1".to_string()); + let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + + let codes = [ + json!("VALID_ERROR_CODE"), + json!(400), + json!(true), + Value::Null, + json!(["code1", "code2"]), + json!({"inner": "myCode"}), + ]; + + let errors = codes.map(|code| { + graphql::Error::from_value(json!( + { + "message": "error occurred", + "extensions": { + "code": code, + "service": "mySubgraph" + }, + "path": ["obj", "field"] + } + )) + .unwrap() + }); + + count_operation_errors(&errors, &context, &config); + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "VALID_ERROR_CODE", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); + + assert_counter!("apollo.router.graphql_error", 1, code = "VALID_ERROR_CODE"); + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "400", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); + + assert_counter!("apollo.router.graphql_error", 1, code = "400"); + + // Code is ignored for null, arrays, and objects + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "true", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); + + assert_counter!("apollo.router.graphql_error", 1, code = "true"); + + assert_counter!( + "apollo.router.operations.error", + 3, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); + + assert_counter!("apollo.router.graphql_error", 3); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_count_operation_errors_with_duplicate_errors_and_extended_config_enabled() { + async { + let config = ErrorsConfiguration { + preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, + ..Default::default() + }; + + let context = Context::default(); + let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); + let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); + let _ = context.insert(OPERATION_KIND, "query".to_string()); + let _ = context.insert(CLIENT_NAME, "client-1".to_string()); + let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + + let codes = [ + json!("VALID_ERROR_CODE"), + Value::Null, + json!("VALID_ERROR_CODE"), + Value::Null, + ]; + + let errors = codes.map(|code| { + graphql::Error::from_value(json!( + { + "message": "error occurred", + "extensions": { + "code": code, + "service": "mySubgraph" + }, + "path": ["obj", "field"] + } + )) + .unwrap() + }); + + count_operation_errors(&errors, &context, &config); + + assert_counter!( + "apollo.router.operations.error", + 2, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "VALID_ERROR_CODE", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); + + assert_counter!("apollo.router.graphql_error", 2, code = "VALID_ERROR_CODE"); + + assert_counter!( + "apollo.router.operations.error", + 2, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); + + assert_counter!("apollo.router.graphql_error", 2); + } + .with_metrics() + .await; + } } \ No newline at end of file diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs index b9668cc76f..807a6d43d0 100644 --- a/apollo-router/src/services/router/service.rs +++ b/apollo-router/src/services/router/service.rs @@ -50,8 +50,6 @@ use crate::http_ext; use crate::json_ext::Value; use crate::layers::DEFAULT_BUFFER_SIZE; use crate::layers::ServiceBuilderExt; -use crate::metrics::count_operation_error_codes; -use crate::metrics::count_operation_errors; #[cfg(test)] use crate::plugin::test::MockSupergraphService; use crate::plugins::telemetry::apollo::Config as ApolloTelemetryConfig; @@ -340,23 +338,6 @@ impl RouterService { && !response.subscribed.unwrap_or(false) && (accepts_json || accepts_wildcard) { - if !response.errors.is_empty() { - count_operation_errors( - &response.errors, - &context, - &self.apollo_telemetry_config.errors, - ); - } - if let Some(value_completion) = - response.extensions.get(EXTENSIONS_VALUE_COMPLETION_KEY) - { - Self::count_value_completion_errors( - value_completion, - &context, - &self.apollo_telemetry_config.errors, - ); - } - parts .headers .insert(CONTENT_TYPE, APPLICATION_JSON_HEADER_VALUE.clone()); @@ -390,14 +371,6 @@ impl RouterService { ); } - if !response.errors.is_empty() { - count_operation_errors( - &response.errors, - &context, - &self.apollo_telemetry_config.errors, - ); - } - // Useful when you're using a proxy like nginx which enable proxy_buffering by default (http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_buffering) parts.headers.insert( ACCEL_BUFFERING_HEADER_NAME.clone(), @@ -422,12 +395,6 @@ impl RouterService { Ok(RouterResponse { response, context }) } else { - count_operation_error_codes( - &["INVALID_ACCEPT_HEADER"], - &context, - &self.apollo_telemetry_config.errors, - ); - // this should be unreachable due to a previous check, but just to be sure... Ok(router::Response::error_builder() .error( @@ -868,20 +835,6 @@ impl RouterService { }; Ok(graphql_requests) } - - fn count_value_completion_errors( - value_completion: &Value, - context: &Context, - errors_config: &ErrorsConfiguration, - ) { - if let Some(vc_array) = value_completion.as_array() { - let errors: Vec = vc_array - .iter() - .filter_map(graphql::Error::from_value_completion_value) - .collect(); - count_operation_errors(&errors, context, errors_config); - } - } } #[derive(Clone)] From cd6d85b95a83fbcc5e1eed4d81f4bb0736f17c03 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 24 Apr 2025 15:49:28 -0400 Subject: [PATCH 05/46] fix response borrowing and async --- .../src/plugins/telemetry/error_counter.rs | 38 +++++++++++++------ apollo-router/src/plugins/telemetry/mod.rs | 10 +++-- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 98d235ea29..94c936564c 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -1,5 +1,8 @@ use std::collections::HashMap; use std::sync::Arc; +use futures::future::ready; +use futures::stream::once; +use futures::StreamExt; use serde_json_bytes::{Value}; use crate::apollo_studio_interop::UsageReporting; use crate::context::{OPERATION_KIND, OPERATION_NAME}; @@ -12,8 +15,10 @@ use crate::services::router::ClientRequestAccepts; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; // TODO call this for subgraph service (pre redaction), supergraph service, and _MAYBE_ router service (service unavail and invalid headers) -pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { +pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { let context = response.context.clone(); + let errors_config = errors_config.clone(); + // TODO do we really need this? let ClientRequestAccepts { wildcard: accepts_wildcard, @@ -25,19 +30,19 @@ pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config .with_lock(|lock| lock.get().cloned()) .unwrap_or_default(); - - if let Some(gql_response) = response.next_response().await { + let (parts, stream) = response.response.into_parts(); + let stream = stream.inspect(move |resp| { // TODO make mapping to add to response context to avoid double counting - if !gql_response.has_next.unwrap_or(false) - && !gql_response.subscribed.unwrap_or(false) + if !resp.has_next.unwrap_or(false) + && !resp.subscribed.unwrap_or(false) && (accepts_json || accepts_wildcard) { // TODO ensure free plan is captured - if !gql_response.errors.is_empty() { - count_operation_errors(&gql_response.errors, &context, &errors_config); + if !resp.errors.is_empty() { + count_operation_errors(&resp.errors, &context, &errors_config); } - if let Some(value_completion) = gql_response.extensions.get(EXTENSIONS_VALUE_COMPLETION_KEY) { + if let Some(value_completion) = resp.extensions.get(EXTENSIONS_VALUE_COMPLETION_KEY) { // TODO inline this func? count_value_completion_errors( value_completion, @@ -47,8 +52,8 @@ pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config } } else if accepts_multipart_defer || accepts_multipart_subscription { // TODO can we combine this with above? - if !gql_response.errors.is_empty() { - count_operation_errors(&gql_response.errors, &context, &errors_config); + if !resp.errors.is_empty() { + count_operation_errors(&resp.errors, &context, &errors_config); } } else { // TODO supposedly this is unreachable in router service. Will we be able to pick this up in a router service plugin callback instead? @@ -60,8 +65,17 @@ pub(crate) async fn count_errors(mut response: SupergraphResponse, errors_config &errors_config, ); } - } - response + }); + + let (first_response, rest) = StreamExt::into_future(stream).await; + let response = http::Response::from_parts( + parts, + once(ready(first_response.unwrap_or_default())) + .chain(rest) + .boxed(), + ); + + SupergraphResponse { context, response } } // TODO router service plugin fn to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? Would need to parse json response diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index f85d199ef4..29e7b6cee7 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -644,10 +644,6 @@ impl PluginPrivate for Telemetry { resp }) - .map_response(async move |resp: SupergraphResponse| { - // TODO make sure this doesn't override the above map_response - count_errors(resp, &config.apollo.errors).await; - }) .map_future_with_request_data( move |req: &SupergraphRequest| { let custom_attributes = config @@ -701,6 +697,7 @@ impl PluginPrivate for Telemetry { async move { let span = Span::current(); let mut result: Result = fut.await; + add_query_attributes(&ctx, &mut custom_attributes); add_cost_attributes(&ctx, &mut custom_attributes); span.set_span_dyn_attributes(custom_attributes); @@ -732,6 +729,11 @@ impl PluginPrivate for Telemetry { custom_graphql_instruments.on_error(err, &ctx); } } + + if let Ok(resp) = result { + result = Ok(count_errors(resp, &config.apollo.errors).await); + } + result = Self::update_otel_metrics( config.clone(), ctx.clone(), From ca64aa2bf4630f3e9dec814f5ccbbca6d642c946 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 25 Apr 2025 10:46:48 -0400 Subject: [PATCH 06/46] fix context move --- .../src/plugins/telemetry/error_counter.rs | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 94c936564c..a3c88db89d 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -18,20 +18,21 @@ use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { let context = response.context.clone(); let errors_config = errors_config.clone(); - - // TODO do we really need this? - let ClientRequestAccepts { - wildcard: accepts_wildcard, - json: accepts_json, - multipart_defer: accepts_multipart_defer, - multipart_subscription: accepts_multipart_subscription, - } = context - .extensions() - .with_lock(|lock| lock.get().cloned()) - .unwrap_or_default(); let (parts, stream) = response.response.into_parts(); + // Clone context again to avoid move issues let stream = stream.inspect(move |resp| { + // TODO do we really need this? + let ClientRequestAccepts { + wildcard: accepts_wildcard, + json: accepts_json, + multipart_defer: accepts_multipart_defer, + multipart_subscription: accepts_multipart_subscription, + } = context + .extensions() + .with_lock(|lock| lock.get().cloned()) + .unwrap_or_default(); + // TODO make mapping to add to response context to avoid double counting if !resp.has_next.unwrap_or(false) From 3baf0e08b440ed43a04f50ea453427939b5c799e Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 25 Apr 2025 14:58:22 -0400 Subject: [PATCH 07/46] use context keeping track of prev counts --- apollo-router/src/context/mod.rs | 2 + .../src/plugins/telemetry/error_counter.rs | 91 +++++++++++++------ 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/apollo-router/src/context/mod.rs b/apollo-router/src/context/mod.rs index 2fed681e81..1b86d41637 100644 --- a/apollo-router/src/context/mod.rs +++ b/apollo-router/src/context/mod.rs @@ -74,6 +74,8 @@ pub(crate) const OPERATION_KIND: &str = "apollo::supergraph::operation_kind"; pub(crate) const DEPRECATED_OPERATION_KIND: &str = "operation_kind"; /// The key to know if the response body contains at least 1 GraphQL error pub(crate) const CONTAINS_GRAPHQL_ERROR: &str = "apollo::telemetry::contains_graphql_error"; +/// The key to a map of errors that were already counted in a previous layer +pub(crate) const COUNTED_ERRORS: &str = "apollo::telemetry::counted_errors"; /// Holds [`Context`] entries. pub(crate) type Entries = Arc>; diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index a3c88db89d..0354ec8ced 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -1,18 +1,20 @@ -use std::collections::HashMap; -use std::sync::Arc; -use futures::future::ready; -use futures::stream::once; -use futures::StreamExt; -use serde_json_bytes::{Value}; use crate::apollo_studio_interop::UsageReporting; -use crate::context::{OPERATION_KIND, OPERATION_NAME}; -use crate::{graphql, Context}; +use crate::context::{COUNTED_ERRORS, OPERATION_KIND, OPERATION_NAME}; +use crate::graphql::Error; use crate::plugins::telemetry::apollo::{ErrorsConfiguration, ExtendedErrorMetricsMode}; use crate::plugins::telemetry::{CLIENT_NAME, CLIENT_VERSION}; use crate::query_planner::APOLLO_OPERATION_ID; -use crate::services::{SupergraphResponse}; use crate::services::router::ClientRequestAccepts; +use crate::services::SupergraphResponse; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; +use crate::{graphql, Context}; +use futures::future::ready; +use futures::stream::once; +use futures::StreamExt; +use serde::{Deserialize, Serialize}; +use serde_json_bytes::Value; +use std::collections::HashMap; +use std::sync::Arc; // TODO call this for subgraph service (pre redaction), supergraph service, and _MAYBE_ router service (service unavail and invalid headers) pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { @@ -32,8 +34,6 @@ pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &E .extensions() .with_lock(|lock| lock.get().cloned()) .unwrap_or_default(); - - // TODO make mapping to add to response context to avoid double counting if !resp.has_next.unwrap_or(false) && !resp.subscribed.unwrap_or(false) @@ -66,17 +66,34 @@ pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &E &errors_config, ); } + + context + .insert(COUNTED_ERRORS, to_map(resp.errors.clone())) + .expect("Unable to insert errors into context."); }); + let (first_response, rest) = StreamExt::into_future(stream).await; - let response = http::Response::from_parts( + let new_response = http::Response::from_parts( parts, once(ready(first_response.unwrap_or_default())) .chain(rest) .boxed(), ); - SupergraphResponse { context, response } + SupergraphResponse { context: response.context, response: new_response } +} + + +fn to_map(errors: Vec) -> HashMap, u64> { + let mut map: HashMap, u64> = HashMap::new(); + errors.into_iter().for_each(|error| { + map.entry(get_code(&error)) + .and_modify(|count| { *count += 1 }) + .or_insert(1); + }); + + map } // TODO router service plugin fn to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? Would need to parse json response @@ -114,10 +131,16 @@ fn count_value_completion_errors( } fn count_operation_errors( - errors: &[graphql::Error], + errors: &[Error], context: &Context, errors_config: &ErrorsConfiguration, ) { + let previously_counted_errors_map: HashMap, u64> = context + .get(COUNTED_ERRORS) + .ok() + .flatten() + .unwrap_or(HashMap::new()); + let unwrap_context_string = |context_key: &str| -> String { context .get::<_, String>(context_key) @@ -147,14 +170,22 @@ fn count_operation_errors( } } - let mut map = HashMap::new(); + let mut map = previously_counted_errors_map.clone(); for error in errors { - let code = error.extensions.get("code").and_then(|c| match c { - Value::String(s) => Some(s.as_str().to_owned()), - Value::Bool(b) => Some(format!("{b}")), - Value::Number(n) => Some(n.to_string()), - Value::Null | Value::Array(_) | Value::Object(_) => None, - }); + let code = get_code(&error); + + // If we already counted this error in a previous layer, then skip counting it again + if let Some(count) = map.get_mut(&code) { + *count = count.saturating_sub(1); + if *count == 0 { + map.remove(&code); + } + continue; + } + + // If we haven't seen this error before, or we see more occurrences than we've counted + // before, then count the error + let service = error .extensions .get("service") @@ -209,7 +240,15 @@ fn count_operation_errors( } } -/// Shared counter for `apollo.router.graphql_error` for consistency +fn get_code(error: &Error) -> Option { + error.extensions.get("code").and_then(|c| match c { + Value::String(s) => Some(s.as_str().to_owned()), + Value::Bool(b) => Some(format!("{b}")), + Value::Number(n) => Some(n.to_string()), + Value::Null | Value::Array(_) | Value::Object(_) => None, + }) +} + fn count_graphql_error(count: u64, code: Option<&str>) { match code { None => { @@ -233,21 +272,21 @@ fn count_graphql_error(count: u64, code: Option<&str>) { #[cfg(test)] mod test { - use serde_json_bytes::Value; use serde_json_bytes::json; + use serde_json_bytes::Value; - use crate::Context; use crate::context::OPERATION_KIND; use crate::context::OPERATION_NAME; use crate::graphql; use crate::json_ext::Path; use crate::metrics::FutureMetricsExt; - use crate::plugins::telemetry::CLIENT_NAME; - use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; use crate::plugins::telemetry::error_counter::{count_operation_error_codes, count_operation_errors}; + use crate::plugins::telemetry::CLIENT_NAME; + use crate::plugins::telemetry::CLIENT_VERSION; use crate::query_planner::APOLLO_OPERATION_ID; + use crate::Context; #[tokio::test] async fn test_count_operation_error_codes_with_extended_config_enabled() { From a2637dbdebf093420c73228ec368da24dd253c59 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 25 Apr 2025 15:29:55 -0400 Subject: [PATCH 08/46] moved count_graphql_error. existing unit tests passing. --- .../src/plugins/telemetry/error_counter.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 0354ec8ced..49dc30761e 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -170,22 +170,21 @@ fn count_operation_errors( } } - let mut map = previously_counted_errors_map.clone(); + let mut diff_map = previously_counted_errors_map.clone(); for error in errors { let code = get_code(&error); // If we already counted this error in a previous layer, then skip counting it again - if let Some(count) = map.get_mut(&code) { + if let Some(count) = diff_map.get_mut(&code) { *count = count.saturating_sub(1); if *count == 0 { - map.remove(&code); + diff_map.remove(&code); } continue; } // If we haven't seen this error before, or we see more occurrences than we've counted // before, then count the error - let service = error .extensions .get("service") @@ -197,8 +196,6 @@ fn count_operation_errors( None => "".into(), Some(path) => path.to_string(), }; - let entry = map.entry(code.clone()).or_insert(0u64); - *entry += 1; let send_otlp_errors = if service.is_empty() { matches!( @@ -227,16 +224,13 @@ fn count_operation_errors( "graphql.operation.type" = operation_kind.clone(), "apollo.client.name" = client_name.clone(), "apollo.client.version" = client_version.clone(), - "graphql.error.extensions.code" = code.unwrap_or_default(), + "graphql.error.extensions.code" = code.clone().unwrap_or_default(), "graphql.error.extensions.severity" = severity_str, "graphql.error.path" = path, "apollo.router.error.service" = service ); } - } - - for (code, count) in map { - count_graphql_error(count, code.as_deref()); + count_graphql_error(1, code.as_deref()); } } From 189f356f18c308371de8670e169296f93076919d Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 28 Apr 2025 13:01:47 -0400 Subject: [PATCH 09/46] working count_errors test --- .../src/plugins/telemetry/error_counter.rs | 144 ++++++++++++------ 1 file changed, 100 insertions(+), 44 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 49dc30761e..1d72f4679e 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -11,10 +11,10 @@ use crate::{graphql, Context}; use futures::future::ready; use futures::stream::once; use futures::StreamExt; -use serde::{Deserialize, Serialize}; use serde_json_bytes::Value; use std::collections::HashMap; use std::sync::Arc; +use serde::de::DeserializeOwned; // TODO call this for subgraph service (pre redaction), supergraph service, and _MAYBE_ router service (service unavail and invalid headers) pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { @@ -85,10 +85,10 @@ pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &E } -fn to_map(errors: Vec) -> HashMap, u64> { - let mut map: HashMap, u64> = HashMap::new(); +fn to_map(errors: Vec) -> HashMap { + let mut map: HashMap = HashMap::new(); errors.into_iter().for_each(|error| { - map.entry(get_code(&error)) + map.entry(get_code(&error).unwrap_or_default()) .and_modify(|count| { *count += 1 }) .or_insert(1); }); @@ -135,24 +135,13 @@ fn count_operation_errors( context: &Context, errors_config: &ErrorsConfiguration, ) { - let previously_counted_errors_map: HashMap, u64> = context - .get(COUNTED_ERRORS) - .ok() - .flatten() - .unwrap_or(HashMap::new()); + let previously_counted_errors_map: HashMap = unwrap_from_context(context, COUNTED_ERRORS); - let unwrap_context_string = |context_key: &str| -> String { - context - .get::<_, String>(context_key) - .unwrap_or_default() - .unwrap_or_default() - }; - - let mut operation_id = unwrap_context_string(APOLLO_OPERATION_ID); - let mut operation_name = unwrap_context_string(OPERATION_NAME); - let operation_kind = unwrap_context_string(OPERATION_KIND); - let client_name = unwrap_context_string(CLIENT_NAME); - let client_version = unwrap_context_string(CLIENT_VERSION); + let mut operation_id: String = unwrap_from_context(context, APOLLO_OPERATION_ID); + let mut operation_name: String = unwrap_from_context(context, OPERATION_NAME); + let operation_kind: String = unwrap_from_context(context, OPERATION_KIND); + let client_name: String = unwrap_from_context(context, CLIENT_NAME); + let client_version: String = unwrap_from_context(context, CLIENT_VERSION); let maybe_usage_reporting = context .extensions() @@ -172,7 +161,7 @@ fn count_operation_errors( let mut diff_map = previously_counted_errors_map.clone(); for error in errors { - let code = get_code(&error); + let code = get_code(&error).unwrap_or_default(); // If we already counted this error in a previous layer, then skip counting it again if let Some(count) = diff_map.get_mut(&code) { @@ -224,16 +213,24 @@ fn count_operation_errors( "graphql.operation.type" = operation_kind.clone(), "apollo.client.name" = client_name.clone(), "apollo.client.version" = client_version.clone(), - "graphql.error.extensions.code" = code.clone().unwrap_or_default(), + "graphql.error.extensions.code" = code.clone(), "graphql.error.extensions.severity" = severity_str, "graphql.error.path" = path, "apollo.router.error.service" = service ); } - count_graphql_error(1, code.as_deref()); + count_graphql_error(1, code); } } +fn unwrap_from_context(context: &Context, key: &str) -> V +{ + context + .get::<_, V>(key) // -> Option> + .unwrap_or_default() // -> Result (defaults to Ok(T::default())) + .unwrap_or_default() // -> T (defaults on Err) +} + fn get_code(error: &Error) -> Option { error.extensions.get("code").and_then(|c| match c { Value::String(s) => Some(s.as_str().to_owned()), @@ -243,44 +240,103 @@ fn get_code(error: &Error) -> Option { }) } -fn count_graphql_error(count: u64, code: Option<&str>) { - match code { - None => { - u64_counter!( - "apollo.router.graphql_error", - "Number of GraphQL error responses returned by the router", - count - ); - } - Some(code) => { - u64_counter!( - "apollo.router.graphql_error", - "Number of GraphQL error responses returned by the router", - count, - code = code.to_string() - ); - } - } +fn count_graphql_error(count: u64, code: String) { + // TODO ensure an empty string matches when we used a None optional before + u64_counter!( + "apollo.router.graphql_error", + "Number of GraphQL error responses returned by the router", + count, + code = code + ); } #[cfg(test)] mod test { + use http::StatusCode; use serde_json_bytes::json; use serde_json_bytes::Value; - use crate::context::OPERATION_KIND; + use crate::context::{COUNTED_ERRORS, OPERATION_KIND}; use crate::context::OPERATION_NAME; use crate::graphql; use crate::json_ext::Path; use crate::metrics::FutureMetricsExt; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; - use crate::plugins::telemetry::error_counter::{count_operation_error_codes, count_operation_errors}; + use crate::plugins::telemetry::error_counter::{count_errors, count_operation_error_codes, count_operation_errors}; use crate::plugins::telemetry::CLIENT_NAME; use crate::plugins::telemetry::CLIENT_VERSION; use crate::query_planner::APOLLO_OPERATION_ID; + use crate::services::SupergraphResponse; use crate::Context; + use crate::services::router::ClientRequestAccepts; + + #[tokio::test] + async fn test_count_errors_with_no_previously_counted_errors() { + async { + let config = ErrorsConfiguration { + preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, + ..Default::default() + }; + + let context = Context::default(); + + context.extensions() + .with_lock(|lock| lock.insert(ClientRequestAccepts{ + multipart_defer: false, + multipart_subscription: false, + json: true, + wildcard: false, + })); + + let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); + let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); + let _ = context.insert(OPERATION_KIND, "query".to_string()); + let _ = context.insert(CLIENT_NAME, "client-1".to_string()); + let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + + let new_response = count_errors( + SupergraphResponse::fake_builder() + .header("Accept", "application/json") + .context(context) + .status_code(StatusCode::BAD_REQUEST) + .errors(vec![ + graphql::Error::builder() + .message("You did a bad request.") + .extension_code("GRAPHQL_VALIDATION_FAILED") + .build(), + ]) + .build() + .unwrap(), + &config, + ).await; + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + + assert_counter!( + "apollo.router.graphql_error", + 1, + code = "GRAPHQL_VALIDATION_FAILED" + ); + + assert_eq!(new_response.context.get_json_value(COUNTED_ERRORS), Some(json!({"GRAPHQL_VALIDATION_FAILED": 1}))) + } + .with_metrics() + .await; + } #[tokio::test] async fn test_count_operation_error_codes_with_extended_config_enabled() { From a6f3c0053b97f5258c277f6f98f1dcedb3605cc3 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 28 Apr 2025 13:32:03 -0400 Subject: [PATCH 10/46] count_errors test with prev counted --- .../src/plugins/telemetry/error_counter.rs | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 1d72f4679e..2d9ec76974 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -338,6 +338,99 @@ mod test { .await; } + #[tokio::test] + async fn test_count_errors_with_previously_counted_errors() { + async { + let config = ErrorsConfiguration { + preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, + ..Default::default() + }; + + let context = Context::default(); + + context.extensions() + .with_lock(|lock| lock.insert(ClientRequestAccepts{ + multipart_defer: false, + multipart_subscription: false, + json: true, + wildcard: false, + })); + + let _ = context.insert(COUNTED_ERRORS, json!({"GRAPHQL_VALIDATION_FAILED": 1})); + + let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); + let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); + let _ = context.insert(OPERATION_KIND, "query".to_string()); + let _ = context.insert(CLIENT_NAME, "client-1".to_string()); + let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + + let new_response = count_errors( + SupergraphResponse::fake_builder() + .header("Accept", "application/json") + .context(context) + .status_code(StatusCode::BAD_REQUEST) + .error( + graphql::Error::builder() + .message("You did a bad request.") + .extension_code("GRAPHQL_VALIDATION_FAILED") + .build() + ) + .error( + graphql::Error::builder() + .message("Customer error text") + .extension_code("CUSTOM_ERROR") + .build() ) + .build() + .unwrap(), + &config, + ).await; + + assert_counter!( + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "CUSTOM_ERROR", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + + assert_counter!( + "apollo.router.graphql_error", + 1, + code = "CUSTOM_ERROR" + ); + + assert_counter_not_exists!( + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); + + assert_counter_not_exists!( + "apollo.router.graphql_error", + u64, + code = "GRAPHQL_VALIDATION_FAILED" + ); + + assert_eq!(new_response.context.get_json_value(COUNTED_ERRORS), Some(json!({"GRAPHQL_VALIDATION_FAILED": 1, "CUSTOM_ERROR": 1}))) + } + .with_metrics() + .await; + } + #[tokio::test] async fn test_count_operation_error_codes_with_extended_config_enabled() { async { From a05630cde54a131a154e05fd4527dc4d57972e5f Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 28 Apr 2025 16:36:45 -0400 Subject: [PATCH 11/46] fn for each layer --- .../src/plugins/telemetry/error_counter.rs | 104 ++++++++++++++---- apollo-router/src/plugins/telemetry/mod.rs | 12 +- 2 files changed, 90 insertions(+), 26 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 2d9ec76974..2842f77cdf 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -5,25 +5,47 @@ use crate::plugins::telemetry::apollo::{ErrorsConfiguration, ExtendedErrorMetric use crate::plugins::telemetry::{CLIENT_NAME, CLIENT_VERSION}; use crate::query_planner::APOLLO_OPERATION_ID; use crate::services::router::ClientRequestAccepts; -use crate::services::SupergraphResponse; +use crate::services::{ExecutionResponse, RouterResponse, SubgraphResponse, SupergraphResponse}; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; use crate::{graphql, Context}; use futures::future::ready; use futures::stream::once; use futures::StreamExt; +use serde::de::DeserializeOwned; use serde_json_bytes::Value; use std::collections::HashMap; use std::sync::Arc; -use serde::de::DeserializeOwned; -// TODO call this for subgraph service (pre redaction), supergraph service, and _MAYBE_ router service (service unavail and invalid headers) -pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { +pub(crate) async fn count_subgraph_errors(response: SubgraphResponse, errors_config: &ErrorsConfiguration) -> SubgraphResponse { + let context = response.context.clone(); + let errors_config = errors_config.clone(); + + let response_body = response.response.body(); + if !response_body.errors.is_empty() { + count_operation_errors(&response_body.errors, &context, &errors_config); + } + context + .insert(COUNTED_ERRORS, to_map(&response_body.errors)) + .expect("Unable to insert errors into context."); + + SubgraphResponse { + context: response.context, + subgraph_name: response.subgraph_name, + id: response.id, + response: response.response + } +} + +pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { + // TODO streaming subscriptions? + // TODO multiple responses in the stream? + let context = response.context.clone(); let errors_config = errors_config.clone(); let (parts, stream) = response.response.into_parts(); // Clone context again to avoid move issues - let stream = stream.inspect(move |resp| { + let stream = stream.inspect(move |response_body| { // TODO do we really need this? let ClientRequestAccepts { wildcard: accepts_wildcard, @@ -35,15 +57,15 @@ pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &E .with_lock(|lock| lock.get().cloned()) .unwrap_or_default(); - if !resp.has_next.unwrap_or(false) - && !resp.subscribed.unwrap_or(false) + if !response_body.has_next.unwrap_or(false) + && !response_body.subscribed.unwrap_or(false) && (accepts_json || accepts_wildcard) { // TODO ensure free plan is captured - if !resp.errors.is_empty() { - count_operation_errors(&resp.errors, &context, &errors_config); + if !response_body.errors.is_empty() { + count_operation_errors(&response_body.errors, &context, &errors_config); } - if let Some(value_completion) = resp.extensions.get(EXTENSIONS_VALUE_COMPLETION_KEY) { + if let Some(value_completion) = response_body.extensions.get(EXTENSIONS_VALUE_COMPLETION_KEY) { // TODO inline this func? count_value_completion_errors( value_completion, @@ -53,8 +75,8 @@ pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &E } } else if accepts_multipart_defer || accepts_multipart_subscription { // TODO can we combine this with above? - if !resp.errors.is_empty() { - count_operation_errors(&resp.errors, &context, &errors_config); + if !response_body.errors.is_empty() { + count_operation_errors(&response_body.errors, &context, &errors_config); } } else { // TODO supposedly this is unreachable in router service. Will we be able to pick this up in a router service plugin callback instead? @@ -68,7 +90,37 @@ pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &E } context - .insert(COUNTED_ERRORS, to_map(resp.errors.clone())) + .insert(COUNTED_ERRORS, to_map(&response_body.errors)) + .expect("Unable to insert errors into context."); + }); + + + let (first_response, rest) = StreamExt::into_future(stream).await; + let new_response = http::Response::from_parts( + parts, + once(ready(first_response.unwrap_or_default())) + .chain(rest) + .boxed(), + ); + + SupergraphResponse { + context: response.context, + response: new_response + } +} + +pub(crate) async fn count_execution_errors(response: ExecutionResponse, errors_config: &ErrorsConfiguration) -> ExecutionResponse { + let context = response.context.clone(); + let errors_config = errors_config.clone(); + + let (parts, stream) = response.response.into_parts(); + // Clone context again to avoid move issues + let stream = stream.inspect(move |response_body| { + if !response_body.errors.is_empty() { + count_operation_errors(&response_body.errors, &context, &errors_config); + } + context + .insert(COUNTED_ERRORS, to_map(&response_body.errors)) .expect("Unable to insert errors into context."); }); @@ -81,11 +133,18 @@ pub(crate) async fn count_errors(response: SupergraphResponse, errors_config: &E .boxed(), ); - SupergraphResponse { context: response.context, response: new_response } + ExecutionResponse { + context: response.context, + response: new_response + } } +pub(crate) async fn count_router_errors(response: RouterResponse, errors_config: &ErrorsConfiguration) -> RouterResponse { + // TODO how do we parse the json response to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? + return response +} -fn to_map(errors: Vec) -> HashMap { +fn to_map(errors: &Vec) -> HashMap { let mut map: HashMap = HashMap::new(); errors.into_iter().for_each(|error| { map.entry(get_code(&error).unwrap_or_default()) @@ -96,8 +155,6 @@ fn to_map(errors: Vec) -> HashMap { map } -// TODO router service plugin fn to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? Would need to parse json response - fn count_operation_error_codes( codes: &[&str], context: &Context, @@ -159,6 +216,7 @@ fn count_operation_errors( } } + // TODO how do we account for redacted errors when comparing? Likely skip them completely (they will have been counted with correct codes in subgraph layer) let mut diff_map = previously_counted_errors_map.clone(); for error in errors { let code = get_code(&error).unwrap_or_default(); @@ -257,20 +315,20 @@ mod test { use serde_json_bytes::json; use serde_json_bytes::Value; - use crate::context::{COUNTED_ERRORS, OPERATION_KIND}; use crate::context::OPERATION_NAME; + use crate::context::{COUNTED_ERRORS, OPERATION_KIND}; use crate::graphql; use crate::json_ext::Path; use crate::metrics::FutureMetricsExt; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; - use crate::plugins::telemetry::error_counter::{count_errors, count_operation_error_codes, count_operation_errors}; + use crate::plugins::telemetry::error_counter::{count_operation_error_codes, count_operation_errors, count_supergraph_errors}; use crate::plugins::telemetry::CLIENT_NAME; use crate::plugins::telemetry::CLIENT_VERSION; use crate::query_planner::APOLLO_OPERATION_ID; + use crate::services::router::ClientRequestAccepts; use crate::services::SupergraphResponse; use crate::Context; - use crate::services::router::ClientRequestAccepts; #[tokio::test] async fn test_count_errors_with_no_previously_counted_errors() { @@ -296,7 +354,7 @@ mod test { let _ = context.insert(CLIENT_NAME, "client-1".to_string()); let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - let new_response = count_errors( + let new_response = count_supergraph_errors( SupergraphResponse::fake_builder() .header("Accept", "application/json") .context(context) @@ -364,7 +422,7 @@ mod test { let _ = context.insert(CLIENT_NAME, "client-1".to_string()); let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - let new_response = count_errors( + let new_response = count_supergraph_errors( SupergraphResponse::fake_builder() .header("Accept", "application/json") .context(context) @@ -377,7 +435,7 @@ mod test { ) .error( graphql::Error::builder() - .message("Customer error text") + .message("Custom error text") .extension_code("CUSTOM_ERROR") .build() ) .build() diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 29e7b6cee7..0c013196a3 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -113,7 +113,7 @@ use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_OK; use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME; use crate::plugins::telemetry::consts::ROUTER_SPAN_NAME; use crate::plugins::telemetry::dynamic_attribute::SpanDynAttribute; -use crate::plugins::telemetry::error_counter::count_errors; +use crate::plugins::telemetry::error_counter::{count_subgraph_errors, count_supergraph_errors}; use crate::plugins::telemetry::fmt_layer::create_fmt_layer; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; @@ -730,8 +730,9 @@ impl PluginPrivate for Telemetry { } } + // TODO should I just move this to the above ok? Or maybe we want to count even if we have an Err? if let Ok(resp) = result { - result = Ok(count_errors(resp, &config.apollo.errors).await); + result = Ok(count_supergraph_errors(resp, &config.apollo.errors).await); } result = Self::update_otel_metrics( @@ -850,7 +851,7 @@ impl PluginPrivate for Telemetry { async move { let span = Span::current(); span.set_span_dyn_attributes(custom_attributes); - let result: Result = f.await; + let mut result: Result = f.await; match &result { Ok(resp) => { @@ -886,6 +887,11 @@ impl PluginPrivate for Telemetry { } } + // TODO merge into above match? + if let Ok(resp) = result { + result = Ok(count_subgraph_errors(resp, &config.apollo.errors).await); + } + result } }, From 9574ae05960e435b79566d1b654ad1ab2545fea3 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Tue, 29 Apr 2025 14:18:14 -0400 Subject: [PATCH 12/46] call counter for execution service --- .../src/plugins/telemetry/error_counter.rs | 7 +++--- apollo-router/src/plugins/telemetry/mod.rs | 22 ++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 2842f77cdf..f6f5d256c0 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -44,7 +44,7 @@ pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors let errors_config = errors_config.clone(); let (parts, stream) = response.response.into_parts(); - // Clone context again to avoid move issues + let stream = stream.inspect(move |response_body| { // TODO do we really need this? let ClientRequestAccepts { @@ -89,6 +89,7 @@ pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors ); } + // Refresh context with the most up-to-date list of errors context .insert(COUNTED_ERRORS, to_map(&response_body.errors)) .expect("Unable to insert errors into context."); @@ -114,7 +115,7 @@ pub(crate) async fn count_execution_errors(response: ExecutionResponse, errors_c let errors_config = errors_config.clone(); let (parts, stream) = response.response.into_parts(); - // Clone context again to avoid move issues + let stream = stream.inspect(move |response_body| { if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); @@ -141,7 +142,7 @@ pub(crate) async fn count_execution_errors(response: ExecutionResponse, errors_c pub(crate) async fn count_router_errors(response: RouterResponse, errors_config: &ErrorsConfiguration) -> RouterResponse { // TODO how do we parse the json response to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? - return response + response } fn to_map(errors: &Vec) -> HashMap { diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 0c013196a3..3bb743f909 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -86,7 +86,7 @@ use crate::context::CONTAINS_GRAPHQL_ERROR; use crate::context::OPERATION_KIND; use crate::context::OPERATION_NAME; use crate::graphql::ResponseVisitor; -use crate::layers::ServiceBuilderExt; +use crate::layers::{ServiceBuilderExt}; use crate::layers::instrument::InstrumentLayer; use crate::metrics::aggregation::MeterProviderType; use crate::metrics::filter::FilterMeterProvider; @@ -113,7 +113,7 @@ use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_OK; use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME; use crate::plugins::telemetry::consts::ROUTER_SPAN_NAME; use crate::plugins::telemetry::dynamic_attribute::SpanDynAttribute; -use crate::plugins::telemetry::error_counter::{count_subgraph_errors, count_supergraph_errors}; +use crate::plugins::telemetry::error_counter::{count_execution_errors, count_subgraph_errors, count_supergraph_errors}; use crate::plugins::telemetry::fmt_layer::create_fmt_layer; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; @@ -133,7 +133,7 @@ use crate::plugins::telemetry::tracing::apollo_telemetry::decode_ftv1_trace; use crate::query_planner::OperationKind; use crate::register_private_plugin; use crate::router_factory::Endpoint; -use crate::services::ExecutionRequest; +use crate::services::{ExecutionRequest, ExecutionResponse}; use crate::services::SubgraphRequest; use crate::services::SubgraphResponse; use crate::services::SupergraphRequest; @@ -731,6 +731,7 @@ impl PluginPrivate for Telemetry { } // TODO should I just move this to the above ok? Or maybe we want to count even if we have an Err? + // TODO or move to an and_then() like execution service? if let Ok(resp) = result { result = Ok(count_supergraph_errors(resp, &config.apollo.errors).await); } @@ -760,6 +761,9 @@ impl PluginPrivate for Telemetry { } fn execution_service(&self, service: execution::BoxService) -> execution::BoxService { + let config = self.config.clone(); + let config_map_res_first = config.clone(); + ServiceBuilder::new() .instrument(move |req: &ExecutionRequest| { let operation_kind = req.query_plan.query.operation.kind(); @@ -779,6 +783,14 @@ impl PluginPrivate for Telemetry { ), } }) + .and_then(move |resp: ExecutionResponse| { + // TODO make sure this will still add to the context + let config = config_map_res_first.clone(); + async move { + let resp = count_execution_errors(resp, &config.apollo.errors).await; + Ok::<_, BoxError>(resp) + } + }) .service(service) .boxed() } @@ -887,9 +899,9 @@ impl PluginPrivate for Telemetry { } } - // TODO merge into above match? + // TODO merge into above match? Move into its own and_then()? if let Ok(resp) = result { - result = Ok(count_subgraph_errors(resp, &config.apollo.errors).await); + result = Ok(count_subgraph_errors(resp, &conf.apollo.errors).await); } result From 07e74a2fcc001ca05d83d92008306f82d86cf639 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 30 Apr 2025 10:30:15 -0400 Subject: [PATCH 13/46] lint fixes --- apollo-router/src/graphql/mod.rs | 7 + apollo-router/src/metrics/mod.rs | 36 +- .../src/plugins/telemetry/error_counter.rs | 642 +++++++++--------- apollo-router/src/plugins/telemetry/mod.rs | 9 +- apollo-router/src/services/router/service.rs | 9 - 5 files changed, 344 insertions(+), 359 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 28ff608d92..3f84cf525e 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -70,6 +70,13 @@ pub struct Error { /// The optional GraphQL extensions for this error. #[serde(default, skip_serializing_if = "Object::is_empty")] pub extensions: Object, + // TODO add attr to mark as counted, skip serialize? + // TODO would include_subgraph_errors or a customer's plugin change this? + // TODO Does serialization happen btwn layers (which would break this also)? + // TODO if customer's are the only cause then maybe we can warn that this could double count. + // TODO OR make an "apollo error ID" that is serialized. Use this as hash key in context + // TODO make on init, public getter + // TODO OR can we store a list of errors in context. Assumes that Eq is actually strict equality } // Implement getter and getter_mut to not use pub field directly diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs index 269835c83f..711fd64d83 100644 --- a/apollo-router/src/metrics/mod.rs +++ b/apollo-router/src/metrics/mod.rs @@ -67,29 +67,16 @@ //! ); //! ``` -use std::collections::HashMap; #[cfg(test)] use std::future::Future; #[cfg(test)] use std::pin::Pin; -use std::sync::Arc; use std::sync::OnceLock; #[cfg(test)] use futures::FutureExt; -use serde_json_bytes::Value; -use crate::Context; -use crate::apollo_studio_interop::UsageReporting; -use crate::context::OPERATION_KIND; -use crate::context::OPERATION_NAME; -use crate::graphql; use crate::metrics::aggregation::AggregateMeterProvider; -use crate::plugins::telemetry::CLIENT_NAME; -use crate::plugins::telemetry::CLIENT_VERSION; -use crate::plugins::telemetry::apollo::ErrorsConfiguration; -use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; -use crate::query_planner::APOLLO_OPERATION_ID; pub(crate) mod aggregation; pub(crate) mod filter; @@ -1338,27 +1325,6 @@ macro_rules! assert_histogram_not_exists { }; } -/// Shared counter for `apollo.router.graphql_error` for consistency -pub(crate) fn count_graphql_error(count: u64, code: Option<&str>) { - match code { - None => { - u64_counter!( - "apollo.router.graphql_error", - "Number of GraphQL error responses returned by the router", - count - ); - } - Some(code) => { - u64_counter!( - "apollo.router.graphql_error", - "Number of GraphQL error responses returned by the router", - count, - code = code.to_string() - ); - } - } -} - /// Assert that all metrics match an [insta] snapshot. /// /// Consider using [assert_non_zero_metrics_snapshot] to produce more grokkable snapshots if @@ -1444,6 +1410,7 @@ impl FutureMetricsExt for T where T: Future {} mod test { use opentelemetry::KeyValue; use opentelemetry::metrics::MeterProvider; + use crate::metrics::FutureMetricsExt; use crate::metrics::aggregation::MeterProviderType; use crate::metrics::meter_provider; @@ -1772,5 +1739,4 @@ mod test { .with_metrics() .await; } - } diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index f6f5d256c0..8b808b7ba5 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -1,22 +1,34 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use futures::StreamExt; +use futures::future::ready; +use futures::stream::once; +use serde::de::DeserializeOwned; +use serde_json_bytes::Value; + +use crate::Context; use crate::apollo_studio_interop::UsageReporting; -use crate::context::{COUNTED_ERRORS, OPERATION_KIND, OPERATION_NAME}; +use crate::context::COUNTED_ERRORS; +use crate::context::OPERATION_KIND; +use crate::context::OPERATION_NAME; +use crate::graphql; use crate::graphql::Error; -use crate::plugins::telemetry::apollo::{ErrorsConfiguration, ExtendedErrorMetricsMode}; -use crate::plugins::telemetry::{CLIENT_NAME, CLIENT_VERSION}; +use crate::plugins::telemetry::CLIENT_NAME; +use crate::plugins::telemetry::CLIENT_VERSION; +use crate::plugins::telemetry::apollo::ErrorsConfiguration; +use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; use crate::query_planner::APOLLO_OPERATION_ID; +use crate::services::ExecutionResponse; +use crate::services::SubgraphResponse; +use crate::services::SupergraphResponse; use crate::services::router::ClientRequestAccepts; -use crate::services::{ExecutionResponse, RouterResponse, SubgraphResponse, SupergraphResponse}; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; -use crate::{graphql, Context}; -use futures::future::ready; -use futures::stream::once; -use futures::StreamExt; -use serde::de::DeserializeOwned; -use serde_json_bytes::Value; -use std::collections::HashMap; -use std::sync::Arc; -pub(crate) async fn count_subgraph_errors(response: SubgraphResponse, errors_config: &ErrorsConfiguration) -> SubgraphResponse { +pub(crate) async fn count_subgraph_errors( + response: SubgraphResponse, + errors_config: &ErrorsConfiguration, +) -> SubgraphResponse { let context = response.context.clone(); let errors_config = errors_config.clone(); @@ -32,11 +44,14 @@ pub(crate) async fn count_subgraph_errors(response: SubgraphResponse, errors_con context: response.context, subgraph_name: response.subgraph_name, id: response.id, - response: response.response + response: response.response, } } -pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors_config: &ErrorsConfiguration) -> SupergraphResponse { +pub(crate) async fn count_supergraph_errors( + response: SupergraphResponse, + errors_config: &ErrorsConfiguration, +) -> SupergraphResponse { // TODO streaming subscriptions? // TODO multiple responses in the stream? @@ -65,13 +80,12 @@ pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); } - if let Some(value_completion) = response_body.extensions.get(EXTENSIONS_VALUE_COMPLETION_KEY) { + if let Some(value_completion) = response_body + .extensions + .get(EXTENSIONS_VALUE_COMPLETION_KEY) + { // TODO inline this func? - count_value_completion_errors( - value_completion, - &context, - &errors_config, - ); + count_value_completion_errors(value_completion, &context, &errors_config); } } else if accepts_multipart_defer || accepts_multipart_subscription { // TODO can we combine this with above? @@ -82,11 +96,7 @@ pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors // TODO supposedly this is unreachable in router service. Will we be able to pick this up in a router service plugin callback instead? // TODO I'm guessing no b/c at the plugin layer, we'd have to parse the response as json. // TODO As is, this feels really bad b/c the error will be defined _AFTER_ we count it in router/service.rs - count_operation_error_codes( - &["INVALID_ACCEPT_HEADER"], - &context, - &errors_config, - ); + count_operation_error_codes(&["INVALID_ACCEPT_HEADER"], &context, &errors_config); } // Refresh context with the most up-to-date list of errors @@ -95,7 +105,6 @@ pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors .expect("Unable to insert errors into context."); }); - let (first_response, rest) = StreamExt::into_future(stream).await; let new_response = http::Response::from_parts( parts, @@ -106,11 +115,14 @@ pub(crate) async fn count_supergraph_errors(response: SupergraphResponse, errors SupergraphResponse { context: response.context, - response: new_response + response: new_response, } } -pub(crate) async fn count_execution_errors(response: ExecutionResponse, errors_config: &ErrorsConfiguration) -> ExecutionResponse { +pub(crate) async fn count_execution_errors( + response: ExecutionResponse, + errors_config: &ErrorsConfiguration, +) -> ExecutionResponse { let context = response.context.clone(); let errors_config = errors_config.clone(); @@ -125,7 +137,6 @@ pub(crate) async fn count_execution_errors(response: ExecutionResponse, errors_c .expect("Unable to insert errors into context."); }); - let (first_response, rest) = StreamExt::into_future(stream).await; let new_response = http::Response::from_parts( parts, @@ -136,20 +147,18 @@ pub(crate) async fn count_execution_errors(response: ExecutionResponse, errors_c ExecutionResponse { context: response.context, - response: new_response + response: new_response, } } -pub(crate) async fn count_router_errors(response: RouterResponse, errors_config: &ErrorsConfiguration) -> RouterResponse { - // TODO how do we parse the json response to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER? - response -} +// TODO how do we parse the json response to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER in a count_router_errors()? -fn to_map(errors: &Vec) -> HashMap { +fn to_map(errors: &[Error]) -> HashMap { let mut map: HashMap = HashMap::new(); - errors.into_iter().for_each(|error| { - map.entry(get_code(&error).unwrap_or_default()) - .and_modify(|count| { *count += 1 }) + errors.iter().for_each(|error| { + // TODO hash the full error more uniquely + map.entry(get_code(error).unwrap_or_default()) + .and_modify(|count| *count += 1) .or_insert(1); }); @@ -193,7 +202,8 @@ fn count_operation_errors( context: &Context, errors_config: &ErrorsConfiguration, ) { - let previously_counted_errors_map: HashMap = unwrap_from_context(context, COUNTED_ERRORS); + let previously_counted_errors_map: HashMap = + unwrap_from_context(context, COUNTED_ERRORS); let mut operation_id: String = unwrap_from_context(context, APOLLO_OPERATION_ID); let mut operation_name: String = unwrap_from_context(context, OPERATION_NAME); @@ -220,7 +230,7 @@ fn count_operation_errors( // TODO how do we account for redacted errors when comparing? Likely skip them completely (they will have been counted with correct codes in subgraph layer) let mut diff_map = previously_counted_errors_map.clone(); for error in errors { - let code = get_code(&error).unwrap_or_default(); + let code = get_code(error).unwrap_or_default(); // If we already counted this error in a previous layer, then skip counting it again if let Some(count) = diff_map.get_mut(&code) { @@ -282,12 +292,11 @@ fn count_operation_errors( } } -fn unwrap_from_context(context: &Context, key: &str) -> V -{ +fn unwrap_from_context(context: &Context, key: &str) -> V { context - .get::<_, V>(key) // -> Option> - .unwrap_or_default() // -> Result (defaults to Ok(T::default())) - .unwrap_or_default() // -> T (defaults on Err) + .get::<_, V>(key) // -> Option> + .unwrap_or_default() // -> Result (defaults to Ok(T::default())) + .unwrap_or_default() // -> T (defaults on Err) } fn get_code(error: &Error) -> Option { @@ -309,27 +318,29 @@ fn count_graphql_error(count: u64, code: String) { ); } - #[cfg(test)] mod test { use http::StatusCode; - use serde_json_bytes::json; use serde_json_bytes::Value; + use serde_json_bytes::json; + use crate::Context; + use crate::context::COUNTED_ERRORS; + use crate::context::OPERATION_KIND; use crate::context::OPERATION_NAME; - use crate::context::{COUNTED_ERRORS, OPERATION_KIND}; use crate::graphql; use crate::json_ext::Path; use crate::metrics::FutureMetricsExt; - use crate::plugins::telemetry::apollo::ErrorsConfiguration; - use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; - use crate::plugins::telemetry::error_counter::{count_operation_error_codes, count_operation_errors, count_supergraph_errors}; use crate::plugins::telemetry::CLIENT_NAME; use crate::plugins::telemetry::CLIENT_VERSION; + use crate::plugins::telemetry::apollo::ErrorsConfiguration; + use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; + use crate::plugins::telemetry::error_counter::count_operation_error_codes; + use crate::plugins::telemetry::error_counter::count_operation_errors; + use crate::plugins::telemetry::error_counter::count_supergraph_errors; use crate::query_planner::APOLLO_OPERATION_ID; - use crate::services::router::ClientRequestAccepts; use crate::services::SupergraphResponse; - use crate::Context; + use crate::services::router::ClientRequestAccepts; #[tokio::test] async fn test_count_errors_with_no_previously_counted_errors() { @@ -341,13 +352,14 @@ mod test { let context = Context::default(); - context.extensions() - .with_lock(|lock| lock.insert(ClientRequestAccepts{ + context.extensions().with_lock(|lock| { + lock.insert(ClientRequestAccepts { multipart_defer: false, multipart_subscription: false, json: true, wildcard: false, - })); + }) + }); let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); @@ -369,29 +381,33 @@ mod test { .build() .unwrap(), &config, - ).await; + ) + .await; assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter!( - "apollo.router.graphql_error", - 1, - code = "GRAPHQL_VALIDATION_FAILED" - ); + "apollo.router.graphql_error", + 1, + code = "GRAPHQL_VALIDATION_FAILED" + ); - assert_eq!(new_response.context.get_json_value(COUNTED_ERRORS), Some(json!({"GRAPHQL_VALIDATION_FAILED": 1}))) + assert_eq!( + new_response.context.get_json_value(COUNTED_ERRORS), + Some(json!({"GRAPHQL_VALIDATION_FAILED": 1})) + ) } .with_metrics() .await; @@ -407,13 +423,14 @@ mod test { let context = Context::default(); - context.extensions() - .with_lock(|lock| lock.insert(ClientRequestAccepts{ + context.extensions().with_lock(|lock| { + lock.insert(ClientRequestAccepts { multipart_defer: false, multipart_subscription: false, json: true, wildcard: false, - })); + }) + }); let _ = context.insert(COUNTED_ERRORS, json!({"GRAPHQL_VALIDATION_FAILED": 1})); @@ -432,62 +449,63 @@ mod test { graphql::Error::builder() .message("You did a bad request.") .extension_code("GRAPHQL_VALIDATION_FAILED") - .build() + .build(), ) .error( graphql::Error::builder() .message("Custom error text") .extension_code("CUSTOM_ERROR") - .build() ) + .build(), + ) .build() .unwrap(), &config, - ).await; + ) + .await; assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "CUSTOM_ERROR", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "CUSTOM_ERROR", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); - assert_counter!( - "apollo.router.graphql_error", - 1, - code = "CUSTOM_ERROR" - ); + assert_counter!("apollo.router.graphql_error", 1, code = "CUSTOM_ERROR"); assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter_not_exists!( - "apollo.router.graphql_error", - u64, - code = "GRAPHQL_VALIDATION_FAILED" - ); + "apollo.router.graphql_error", + u64, + code = "GRAPHQL_VALIDATION_FAILED" + ); - assert_eq!(new_response.context.get_json_value(COUNTED_ERRORS), Some(json!({"GRAPHQL_VALIDATION_FAILED": 1, "CUSTOM_ERROR": 1}))) + assert_eq!( + new_response.context.get_json_value(COUNTED_ERRORS), + Some(json!({"GRAPHQL_VALIDATION_FAILED": 1, "CUSTOM_ERROR": 1})) + ) } - .with_metrics() - .await; + .with_metrics() + .await; } #[tokio::test] @@ -512,56 +530,56 @@ mod test { ); assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "400", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter!( - "apollo.router.graphql_error", - 1, - code = "GRAPHQL_VALIDATION_FAILED" - ); + "apollo.router.graphql_error", + 1, + code = "GRAPHQL_VALIDATION_FAILED" + ); assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); assert_counter!("apollo.router.graphql_error", 1, code = "400"); } - .with_metrics() - .await; + .with_metrics() + .await; } #[tokio::test] @@ -580,55 +598,55 @@ mod test { ); assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "", + "graphql.operation.name" = "", + "graphql.operation.type" = "", + "apollo.client.name" = "", + "apollo.client.version" = "", + "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "", + "graphql.operation.name" = "", + "graphql.operation.type" = "", + "apollo.client.name" = "", + "apollo.client.version" = "", + "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); + "apollo.router.operations.error", + u64, + "apollo.operation.id" = "", + "graphql.operation.name" = "", + "graphql.operation.type" = "", + "apollo.client.name" = "", + "apollo.client.version" = "", + "graphql.error.extensions.code" = "400", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "", + "apollo.router.error.service" = "" + ); assert_counter!( - "apollo.router.graphql_error", - 1, - code = "GRAPHQL_VALIDATION_FAILED" - ); + "apollo.router.graphql_error", + 1, + code = "GRAPHQL_VALIDATION_FAILED" + ); assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); assert_counter!("apollo.router.graphql_error", 1, code = "400"); } - .with_metrics() - .await; + .with_metrics() + .await; } #[tokio::test] @@ -656,23 +674,23 @@ mod test { count_operation_errors(&[error], &context, &config); assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "SOME_ERROR_CODE", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "SOME_ERROR_CODE", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); assert_counter!("apollo.router.graphql_error", 1, code = "SOME_ERROR_CODE"); } - .with_metrics() - .await; + .with_metrics() + .await; } #[tokio::test] @@ -701,88 +719,88 @@ mod test { let errors = codes.map(|code| { graphql::Error::from_value(json!( - { - "message": "error occurred", - "extensions": { - "code": code, - "service": "mySubgraph" - }, - "path": ["obj", "field"] - } - )) - .unwrap() + { + "message": "error occurred", + "extensions": { + "code": code, + "service": "mySubgraph" + }, + "path": ["obj", "field"] + } + )) + .unwrap() }); count_operation_errors(&errors, &context, &config); assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "VALID_ERROR_CODE", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "VALID_ERROR_CODE", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); assert_counter!("apollo.router.graphql_error", 1, code = "VALID_ERROR_CODE"); assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "400", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); assert_counter!("apollo.router.graphql_error", 1, code = "400"); // Code is ignored for null, arrays, and objects assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "true", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); + "apollo.router.operations.error", + 1, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "true", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); assert_counter!("apollo.router.graphql_error", 1, code = "true"); assert_counter!( - "apollo.router.operations.error", - 3, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); + "apollo.router.operations.error", + 3, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); assert_counter!("apollo.router.graphql_error", 3); } - .with_metrics() - .await; + .with_metrics() + .await; } #[tokio::test] @@ -809,53 +827,53 @@ mod test { let errors = codes.map(|code| { graphql::Error::from_value(json!( - { - "message": "error occurred", - "extensions": { - "code": code, - "service": "mySubgraph" - }, - "path": ["obj", "field"] - } - )) - .unwrap() + { + "message": "error occurred", + "extensions": { + "code": code, + "service": "mySubgraph" + }, + "path": ["obj", "field"] + } + )) + .unwrap() }); count_operation_errors(&errors, &context, &config); assert_counter!( - "apollo.router.operations.error", - 2, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "VALID_ERROR_CODE", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); + "apollo.router.operations.error", + 2, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "VALID_ERROR_CODE", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); assert_counter!("apollo.router.graphql_error", 2, code = "VALID_ERROR_CODE"); assert_counter!( - "apollo.router.operations.error", - 2, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "/obj/field", - "apollo.router.error.service" = "mySubgraph" - ); + "apollo.router.operations.error", + 2, + "apollo.operation.id" = "some-id", + "graphql.operation.name" = "SomeOperation", + "graphql.operation.type" = "query", + "apollo.client.name" = "client-1", + "apollo.client.version" = "version-1", + "graphql.error.extensions.code" = "", + "graphql.error.extensions.severity" = "ERROR", + "graphql.error.path" = "/obj/field", + "apollo.router.error.service" = "mySubgraph" + ); assert_counter!("apollo.router.graphql_error", 2); } - .with_metrics() - .await; + .with_metrics() + .await; } -} \ No newline at end of file +} diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 3bb743f909..cca1b901d1 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -86,7 +86,7 @@ use crate::context::CONTAINS_GRAPHQL_ERROR; use crate::context::OPERATION_KIND; use crate::context::OPERATION_NAME; use crate::graphql::ResponseVisitor; -use crate::layers::{ServiceBuilderExt}; +use crate::layers::ServiceBuilderExt; use crate::layers::instrument::InstrumentLayer; use crate::metrics::aggregation::MeterProviderType; use crate::metrics::filter::FilterMeterProvider; @@ -113,7 +113,9 @@ use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_OK; use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME; use crate::plugins::telemetry::consts::ROUTER_SPAN_NAME; use crate::plugins::telemetry::dynamic_attribute::SpanDynAttribute; -use crate::plugins::telemetry::error_counter::{count_execution_errors, count_subgraph_errors, count_supergraph_errors}; +use crate::plugins::telemetry::error_counter::count_execution_errors; +use crate::plugins::telemetry::error_counter::count_subgraph_errors; +use crate::plugins::telemetry::error_counter::count_supergraph_errors; use crate::plugins::telemetry::fmt_layer::create_fmt_layer; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; @@ -133,7 +135,8 @@ use crate::plugins::telemetry::tracing::apollo_telemetry::decode_ftv1_trace; use crate::query_planner::OperationKind; use crate::register_private_plugin; use crate::router_factory::Endpoint; -use crate::services::{ExecutionRequest, ExecutionResponse}; +use crate::services::ExecutionRequest; +use crate::services::ExecutionResponse; use crate::services::SubgraphRequest; use crate::services::SubgraphResponse; use crate::services::SupergraphRequest; diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs index 807a6d43d0..c12bd96fb5 100644 --- a/apollo-router/src/services/router/service.rs +++ b/apollo-router/src/services/router/service.rs @@ -47,14 +47,10 @@ use crate::configuration::Batching; use crate::configuration::BatchingMode; use crate::graphql; use crate::http_ext; -use crate::json_ext::Value; use crate::layers::DEFAULT_BUFFER_SIZE; use crate::layers::ServiceBuilderExt; #[cfg(test)] use crate::plugin::test::MockSupergraphService; -use crate::plugins::telemetry::apollo::Config as ApolloTelemetryConfig; -use crate::plugins::telemetry::apollo::ErrorsConfiguration; -use crate::plugins::telemetry::config::Conf as TelemetryConfig; use crate::plugins::telemetry::config_new::attributes::HTTP_REQUEST_BODY; use crate::plugins::telemetry::config_new::attributes::HTTP_REQUEST_HEADERS; use crate::plugins::telemetry::config_new::attributes::HTTP_REQUEST_URI; @@ -90,7 +86,6 @@ use crate::services::router; use crate::services::router::pipeline_handle::PipelineHandle; use crate::services::router::pipeline_handle::PipelineRef; use crate::services::supergraph; -use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; pub(crate) static MULTIPART_DEFER_CONTENT_TYPE_HEADER_VALUE: HeaderValue = HeaderValue::from_static(MULTIPART_DEFER_CONTENT_TYPE); @@ -110,7 +105,6 @@ pub(crate) struct RouterService { // instance batching: Batching, supergraph_service: supergraph::BoxCloneService, - apollo_telemetry_config: ApolloTelemetryConfig, } impl RouterService { @@ -120,7 +114,6 @@ impl RouterService { persisted_query_layer: Arc, query_analysis_layer: QueryAnalysisLayer, batching: Batching, - apollo_telemetry_config: ApolloTelemetryConfig, ) -> Self { let supergraph_service: supergraph::BoxCloneService = ServiceBuilder::new().buffered().service(sgb).boxed_clone(); @@ -131,7 +124,6 @@ impl RouterService { query_analysis_layer: Arc::new(query_analysis_layer), batching, supergraph_service, - apollo_telemetry_config, } } } @@ -930,7 +922,6 @@ impl RouterCreator { persisted_query_layer, query_analysis_layer, configuration.batching.clone(), - TelemetryConfig::apollo(&configuration), )); // NOTE: This is the start of the router pipeline (router_service) From 101b50c842cf6d8b545e3a726d4623aff769f427 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 30 Apr 2025 11:36:36 -0400 Subject: [PATCH 14/46] add router service counter --- .../src/plugins/telemetry/error_counter.rs | 33 ++++++++++++++++++- apollo-router/src/plugins/telemetry/mod.rs | 11 +++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 8b808b7ba5..d034898563 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -19,7 +19,7 @@ use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; use crate::query_planner::APOLLO_OPERATION_ID; -use crate::services::ExecutionResponse; +use crate::services::{router, ExecutionResponse, RouterResponse}; use crate::services::SubgraphResponse; use crate::services::SupergraphResponse; use crate::services::router::ClientRequestAccepts; @@ -151,6 +151,37 @@ pub(crate) async fn count_execution_errors( } } +pub(crate) async fn count_router_errors( + response: RouterResponse, + errors_config: &ErrorsConfiguration, +) -> RouterResponse { + let context = response.context.clone(); + let errors_config = errors_config.clone(); + + let (parts, body) = response.response.into_parts(); + + // TODO is this a bad idea? Probably... + // Deserialize the response body back into a response obj so we can pull the errors + let bytes = router::body::into_bytes(body) + .await + .unwrap(); + let response_body: graphql::Response = serde_json::from_slice(&bytes).unwrap(); + + if !response_body.errors.is_empty() { + count_operation_errors(&response_body.errors, &context, &errors_config); + } + + // Refresh context with the most up-to-date list of errors + context + .insert(COUNTED_ERRORS, to_map(&response_body.errors)) + .expect("Unable to insert errors into context."); + + RouterResponse { + context: response.context, + response: http::Response::from_parts(parts, router::body::from_bytes(bytes)), + } +} + // TODO how do we parse the json response to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER in a count_router_errors()? fn to_map(errors: &[Error]) -> HashMap { diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index cca1b901d1..a59328d376 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -113,7 +113,7 @@ use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_OK; use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME; use crate::plugins::telemetry::consts::ROUTER_SPAN_NAME; use crate::plugins::telemetry::dynamic_attribute::SpanDynAttribute; -use crate::plugins::telemetry::error_counter::count_execution_errors; +use crate::plugins::telemetry::error_counter::{count_execution_errors, count_router_errors}; use crate::plugins::telemetry::error_counter::count_subgraph_errors; use crate::plugins::telemetry::error_counter::count_supergraph_errors; use crate::plugins::telemetry::fmt_layer::create_fmt_layer; @@ -487,7 +487,7 @@ impl PluginPrivate for Telemetry { async move { let span = Span::current(); span.set_span_dyn_attributes(custom_attributes); - let response: Result = fut.await; + let mut response: Result = fut.await; span.record( APOLLO_PRIVATE_DURATION_NS, @@ -565,6 +565,13 @@ impl PluginPrivate for Telemetry { custom_events.on_error(err, &ctx); } + // TODO should I just move this to the above ok? Or maybe we want to count even if we have an Err? + // TODO or move to an and_then() like execution service? + if let Ok(resp) = response { + response = Ok(count_router_errors(resp, &config.apollo.errors).await); + } + + response } }, From d2e652f1204bd82e1127c8753e31ea3387c3e19d Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 30 Apr 2025 11:57:22 -0400 Subject: [PATCH 15/46] remove now unused count_operation_error_codes --- .../src/plugins/telemetry/error_counter.rs | 167 +----------------- 1 file changed, 2 insertions(+), 165 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 270d000d5c..76c237eb2a 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -92,11 +92,6 @@ pub(crate) async fn count_supergraph_errors( if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); } - } else { - // TODO supposedly this is unreachable in router service. Will we be able to pick this up in a router service plugin callback instead? - // TODO I'm guessing no b/c at the plugin layer, we'd have to parse the response as json. - // TODO As is, this feels really bad b/c the error will be defined _AFTER_ we count it in router/service.rs - count_operation_error_codes(&["INVALID_ACCEPT_HEADER"], &context, &errors_config); } // Refresh context with the most up-to-date list of errors @@ -176,6 +171,8 @@ pub(crate) async fn count_router_errors( .insert(COUNTED_ERRORS, to_map(&response_body.errors)) .expect("Unable to insert errors into context."); + // TODO confirm the count_operation_error_codes() case is handled here + RouterResponse { context: response.context, response: http::Response::from_parts(parts, router::body::from_bytes(bytes)), @@ -196,24 +193,6 @@ fn to_map(errors: &[Error]) -> HashMap { map } -fn count_operation_error_codes( - codes: &[&str], - context: &Context, - errors_config: &ErrorsConfiguration, -) { - let errors: Vec = codes - .iter() - .map(|c| { - graphql::Error::builder() - .message("") - .extension_code(*c) - .build() - }) - .collect(); - - count_operation_errors(&errors, context, errors_config); -} - fn count_value_completion_errors( value_completion: &Value, context: &Context, @@ -366,7 +345,6 @@ mod test { use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; - use crate::plugins::telemetry::error_counter::count_operation_error_codes; use crate::plugins::telemetry::error_counter::count_operation_errors; use crate::plugins::telemetry::error_counter::count_supergraph_errors; use crate::query_planner::APOLLO_OPERATION_ID; @@ -539,147 +517,6 @@ mod test { .await; } - #[tokio::test] - async fn test_count_operation_error_codes_with_extended_config_enabled() { - async { - let config = ErrorsConfiguration { - preview_extended_error_metrics: ExtendedErrorMetricsMode::Enabled, - ..Default::default() - }; - - let context = Context::default(); - let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); - let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); - let _ = context.insert(OPERATION_KIND, "query".to_string()); - let _ = context.insert(CLIENT_NAME, "client-1".to_string()); - let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - - count_operation_error_codes( - &["GRAPHQL_VALIDATION_FAILED", "MY_CUSTOM_ERROR", "400"], - &context, - &config, - ); - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - - assert_counter!( - "apollo.router.operations.error", - 1, - "apollo.operation.id" = "some-id", - "graphql.operation.name" = "SomeOperation", - "graphql.operation.type" = "query", - "apollo.client.name" = "client-1", - "apollo.client.version" = "version-1", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - - assert_counter!( - "apollo.router.graphql_error", - 1, - code = "GRAPHQL_VALIDATION_FAILED" - ); - assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); - assert_counter!("apollo.router.graphql_error", 1, code = "400"); - } - .with_metrics() - .await; - } - - #[tokio::test] - async fn test_count_operation_error_codes_with_extended_config_disabled() { - async { - let config = ErrorsConfiguration { - preview_extended_error_metrics: ExtendedErrorMetricsMode::Disabled, - ..Default::default() - }; - - let context = Context::default(); - count_operation_error_codes( - &["GRAPHQL_VALIDATION_FAILED", "MY_CUSTOM_ERROR", "400"], - &context, - &config, - ); - - assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "GRAPHQL_VALIDATION_FAILED", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "MY_CUSTOM_ERROR", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - "apollo.operation.id" = "", - "graphql.operation.name" = "", - "graphql.operation.type" = "", - "apollo.client.name" = "", - "apollo.client.version" = "", - "graphql.error.extensions.code" = "400", - "graphql.error.extensions.severity" = "ERROR", - "graphql.error.path" = "", - "apollo.router.error.service" = "" - ); - - assert_counter!( - "apollo.router.graphql_error", - 1, - code = "GRAPHQL_VALIDATION_FAILED" - ); - assert_counter!("apollo.router.graphql_error", 1, code = "MY_CUSTOM_ERROR"); - assert_counter!("apollo.router.graphql_error", 1, code = "400"); - } - .with_metrics() - .await; - } - #[tokio::test] async fn test_count_operation_errors_with_extended_config_enabled() { async { From ec4171c5436ead370731c63c291eeda26bdbb4ad Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 30 Apr 2025 11:58:59 -0400 Subject: [PATCH 16/46] inline count value completion errors --- .../src/plugins/telemetry/error_counter.rs | 27 ++++++------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 76c237eb2a..40576e5abf 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -84,8 +84,13 @@ pub(crate) async fn count_supergraph_errors( .extensions .get(EXTENSIONS_VALUE_COMPLETION_KEY) { - // TODO inline this func? - count_value_completion_errors(value_completion, &context, &errors_config); + if let Some(vc_array) = value_completion.as_array() { + let errors: Vec = vc_array + .iter() + .filter_map(graphql::Error::from_value_completion_value) + .collect(); + count_operation_errors(&errors, &context, &errors_config); + } } } else if accepts_multipart_defer || accepts_multipart_subscription { // TODO can we combine this with above? @@ -171,7 +176,7 @@ pub(crate) async fn count_router_errors( .insert(COUNTED_ERRORS, to_map(&response_body.errors)) .expect("Unable to insert errors into context."); - // TODO confirm the count_operation_error_codes() case is handled here + // TODO confirm the count_operation_error_codes() INVALID_ACCEPT_HEADER case is handled here RouterResponse { context: response.context, @@ -179,8 +184,6 @@ pub(crate) async fn count_router_errors( } } -// TODO how do we parse the json response to capture SERVICE_UNAVAILABLE or INVALID_ACCEPT_HEADER in a count_router_errors()? - fn to_map(errors: &[Error]) -> HashMap { let mut map: HashMap = HashMap::new(); errors.iter().for_each(|error| { @@ -193,20 +196,6 @@ fn to_map(errors: &[Error]) -> HashMap { map } -fn count_value_completion_errors( - value_completion: &Value, - context: &Context, - errors_config: &ErrorsConfiguration, -) { - if let Some(vc_array) = value_completion.as_array() { - let errors: Vec = vc_array - .iter() - .filter_map(graphql::Error::from_value_completion_value) - .collect(); - count_operation_errors(&errors, context, errors_config); - } -} - fn count_operation_errors( errors: &[Error], context: &Context, From 64ec25e0d3f0a5bba1a3f7dd800ea0c58ba3caa9 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 1 May 2025 09:13:30 -0400 Subject: [PATCH 17/46] Add uuid to gql error struct. Fix struct literals --- apollo-router/src/error.rs | 16 ++- apollo-router/src/graphql/mod.rs | 39 ++++++- apollo-router/src/plugins/rhai/execution.rs | 22 ++-- apollo-router/src/plugins/rhai/mod.rs | 9 +- apollo-router/src/plugins/rhai/router.rs | 22 ++-- apollo-router/src/plugins/rhai/subgraph.rs | 22 ++-- apollo-router/src/plugins/rhai/supergraph.rs | 22 ++-- .../src/plugins/telemetry/error_counter.rs | 17 +-- apollo-router/src/plugins/telemetry/mod.rs | 1 + apollo-router/src/query_planner/fetch.rs | 38 +++--- apollo-router/src/services/layers/apq.rs | 110 ++++++++---------- .../src/services/supergraph/service.rs | 17 ++- apollo-router/src/spec/query.rs | 46 +++++--- 13 files changed, 219 insertions(+), 162 deletions(-) diff --git a/apollo-router/src/error.rs b/apollo-router/src/error.rs index 0a258ee610..360188acee 100644 --- a/apollo-router/src/error.rs +++ b/apollo-router/src/error.rs @@ -154,12 +154,16 @@ impl FetchError { } } - Error { - message: self.to_string(), - locations: Default::default(), - path, - extensions: value.as_object().unwrap().to_owned(), - } + Error::builder() + .message(self.to_string()) + .locations(Vec::default()) + .and_path(path) + // Extension code is required, but is only used if extensions doesn't have it. We always + // have a code so this value will be ignored. + // TODO better way to unwrap the actual code from value just in case? + .extension_code("") + .extensions(value.as_object().unwrap().to_owned()) + .build() } /// Convert the error to an appropriate response. diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 3f84cf525e..70509335dc 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -6,7 +6,7 @@ mod visitor; use std::fmt; use std::pin::Pin; - +use std::str::FromStr; use apollo_compiler::response::GraphQLError as CompilerExecutionError; use apollo_compiler::response::ResponseDataPathSegment; use futures::Stream; @@ -20,6 +20,7 @@ use serde::Serialize; use serde_json_bytes::ByteString; use serde_json_bytes::Map as JsonMap; use serde_json_bytes::Value; +use uuid::Uuid; pub(crate) use visitor::ResponseVisitor; use crate::json_ext::Object; @@ -70,6 +71,11 @@ pub struct Error { /// The optional GraphQL extensions for this error. #[serde(default, skip_serializing_if = "Object::is_empty")] pub extensions: Object, + + // TODO do we need to implement a random one for default? + /// A unique identifier for this error + apollo_id: Uuid + // TODO add attr to mark as counted, skip serialize? // TODO would include_subgraph_errors or a customer's plugin change this? // TODO Does serialization happen btwn layers (which would break this also)? @@ -120,6 +126,7 @@ impl Error { extension_code: T, // Skip the `Object` type alias in order to use buildstructor’s map special-casing mut extensions: JsonMap, + apollo_id: Option ) -> Self { extensions .entry("code") @@ -129,10 +136,13 @@ impl Error { locations, path, extensions, + apollo_id: apollo_id.unwrap_or_else(|| Uuid::new_v4()) } } pub(crate) fn from_value(value: Value) -> Result { + let _value_str = value.to_string(); // TODO temp debug remove + let mut object = ensure_object!(value).map_err(|error| MalformedResponseError { reason: format!("invalid error within `errors`: {}", error), })?; @@ -167,12 +177,24 @@ impl Error { .map_err(|err| MalformedResponseError { reason: format!("invalid `path` within error: {}", err), })?; + // TODO confirm camelcase key + let apollo_id = match extract_key_value_from_object!(object, "apolloId", Value::String(s) => s) + { + Ok(Some(s)) => Uuid::from_str(s.as_str()).map_err(|err| MalformedResponseError { + reason: format!("invalid `apolloId` within error: {}", err) + }), + Ok(None) => Ok(Uuid::new_v4()), + Err(err) => Err(MalformedResponseError { + reason: format!("invalid `apolloId` within error: {}", err), + }), + }?; Ok(Error { message, locations, path, extensions, + apollo_id }) } @@ -208,8 +230,22 @@ impl Error { locations, path, extensions, + apollo_id: Uuid::new_v4() }) } + + pub fn extension_code(&self) -> Option { + self.extensions.get("code").and_then(|c| match c { + Value::String(s) => Some(s.as_str().to_owned()), + Value::Bool(b) => Some(format!("{b}")), + Value::Number(n) => Some(n.to_string()), + Value::Null | Value::Array(_) | Value::Object(_) => None, + }) + } + + pub fn apollo_id(&self) -> Uuid { + self.apollo_id + } } /// GraphQL spec require that both "line" and "column" are positive numbers. @@ -289,6 +325,7 @@ impl From for Error { locations, path, extensions, + apollo_id: Uuid::new_v4() } } } diff --git a/apollo-router/src/plugins/rhai/execution.rs b/apollo-router/src/plugins/rhai/execution.rs index bdcd646577..b59c8605b4 100644 --- a/apollo-router/src/plugins/rhai/execution.rs +++ b/apollo-router/src/plugins/rhai/execution.rs @@ -28,10 +28,13 @@ pub(super) fn request_failure( .build()? } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .build()? @@ -53,10 +56,13 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/plugins/rhai/mod.rs b/apollo-router/src/plugins/rhai/mod.rs index 3cf5f21fbc..4b6d054c65 100644 --- a/apollo-router/src/plugins/rhai/mod.rs +++ b/apollo-router/src/plugins/rhai/mod.rs @@ -558,10 +558,11 @@ macro_rules! gen_map_deferred_response { let mut guard = shared_response.lock(); let response_opt = guard.take(); let $base::DeferredResponse { mut response, .. } = response_opt.unwrap(); - let error = Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }; + let error = Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build(); response.errors = vec![error]; return Some(response); } diff --git a/apollo-router/src/plugins/rhai/router.rs b/apollo-router/src/plugins/rhai/router.rs index b968b28270..9e3f1f35be 100644 --- a/apollo-router/src/plugins/rhai/router.rs +++ b/apollo-router/src/plugins/rhai/router.rs @@ -30,10 +30,13 @@ pub(super) fn request_failure( .build()? } else { crate::services::router::Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .build()? @@ -58,10 +61,13 @@ pub(super) fn response_failure( .build() } else { crate::services::router::Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/plugins/rhai/subgraph.rs b/apollo-router/src/plugins/rhai/subgraph.rs index 4e80b97ffe..96db937020 100644 --- a/apollo-router/src/plugins/rhai/subgraph.rs +++ b/apollo-router/src/plugins/rhai/subgraph.rs @@ -26,10 +26,13 @@ pub(super) fn request_failure( .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .subgraph_name(String::default()) // XXX: We don't know the subgraph name @@ -53,10 +56,13 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .subgraph_name(String::default()) // XXX: We don't know the subgraph name diff --git a/apollo-router/src/plugins/rhai/supergraph.rs b/apollo-router/src/plugins/rhai/supergraph.rs index 2b5b7bc804..43bf372dd7 100644 --- a/apollo-router/src/plugins/rhai/supergraph.rs +++ b/apollo-router/src/plugins/rhai/supergraph.rs @@ -28,10 +28,13 @@ pub(super) fn request_failure( .build()? } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .build()? @@ -53,10 +56,13 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 40576e5abf..49db4798ac 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -5,7 +5,6 @@ use futures::StreamExt; use futures::future::ready; use futures::stream::once; use serde::de::DeserializeOwned; -use serde_json_bytes::Value; use crate::Context; use crate::apollo_studio_interop::UsageReporting; @@ -188,7 +187,7 @@ fn to_map(errors: &[Error]) -> HashMap { let mut map: HashMap = HashMap::new(); errors.iter().for_each(|error| { // TODO hash the full error more uniquely - map.entry(get_code(error).unwrap_or_default()) + map.entry(error.extension_code().unwrap_or_default()) .and_modify(|count| *count += 1) .or_insert(1); }); @@ -201,6 +200,9 @@ fn count_operation_errors( context: &Context, errors_config: &ErrorsConfiguration, ) { + let _id_str = errors[0].apollo_id().to_string(); // TODO DEBUG REMOVE + let _msg_str = errors[0].message.clone(); // TODO DEBUG REMOVE + let previously_counted_errors_map: HashMap = unwrap_from_context(context, COUNTED_ERRORS); @@ -229,7 +231,7 @@ fn count_operation_errors( // TODO how do we account for redacted errors when comparing? Likely skip them completely (they will have been counted with correct codes in subgraph layer) let mut diff_map = previously_counted_errors_map.clone(); for error in errors { - let code = get_code(error).unwrap_or_default(); + let code = error.extension_code().unwrap_or_default(); // If we already counted this error in a previous layer, then skip counting it again if let Some(count) = diff_map.get_mut(&code) { @@ -298,15 +300,6 @@ fn unwrap_from_context(context: &Context, key: &s .unwrap_or_default() // -> T (defaults on Err) } -fn get_code(error: &Error) -> Option { - error.extensions.get("code").and_then(|c| match c { - Value::String(s) => Some(s.as_str().to_owned()), - Value::Bool(b) => Some(format!("{b}")), - Value::Number(n) => Some(n.to_string()), - Value::Null | Value::Array(_) | Value::Object(_) => None, - }) -} - fn count_graphql_error(count: u64, code: String) { // TODO ensure an empty string matches when we used a None optional before u64_counter!( diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index a59328d376..1a4dc6522f 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -911,6 +911,7 @@ impl PluginPrivate for Telemetry { // TODO merge into above match? Move into its own and_then()? if let Ok(resp) = result { + // TODO handle Err() case? result = Ok(count_subgraph_errors(resp, &conf.apollo.errors).await); } diff --git a/apollo-router/src/query_planner/fetch.rs b/apollo-router/src/query_planner/fetch.rs index 566c61d2ed..5febc22710 100644 --- a/apollo-router/src/query_planner/fetch.rs +++ b/apollo-router/src/query_planner/fetch.rs @@ -362,16 +362,24 @@ impl FetchNode { for values_path in inverted_paths.get(*i).iter().flat_map(|v| v.iter()) { - errors.push(Error { - locations: error.locations.clone(), + errors.push(Error::builder() + .locations(error.locations.clone()) // append to the entitiy's path the error's path without //`_entities` and the index - path: Some(Path::from_iter( - values_path.0.iter().chain(&path.0[2..]).cloned(), - )), - message: error.message.clone(), - extensions: error.extensions.clone(), - }) + .path( + Path::from_iter( + values_path + .0 + .iter() + .chain(&path.0[2..]) + .cloned(), + ) + ) + .message(error.message.clone()) + .extension_code(error.extension_code().unwrap_or_default()) + .extensions(error.extensions.clone()) + .build() + ) } } _ => { @@ -450,12 +458,14 @@ impl FetchNode { }) .unwrap_or_else(|| current_dir.clone()); - Error { - locations: error.locations, - path: Some(path), - message: error.message, - extensions: error.extensions, - } + Error::builder() + .locations(error.locations.clone()) + .path(path) + .message(error.message.clone()) + .extension_code(error.extension_code().unwrap_or_default()) + .extensions(error.extensions.clone()) + .apollo_id(error.apollo_id()) + .build() }) .collect(); let mut data = response.data.unwrap_or_default(); diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs index 250b12e573..89c4505fe1 100644 --- a/apollo-router/src/services/layers/apq.rs +++ b/apollo-router/src/services/layers/apq.rs @@ -8,11 +8,11 @@ use http::StatusCode; use http::header::CACHE_CONTROL; use serde::Deserialize; use serde_json_bytes::Value; -use serde_json_bytes::json; use sha2::Digest; use sha2::Sha256; use crate::cache::DeduplicatingCache; +use crate::json_ext::Path; use crate::services::SupergraphRequest; use crate::services::SupergraphResponse; @@ -131,15 +131,13 @@ async fn apq_request( Ok(request) } else { tracing::debug!("apq: graphql request doesn't match provided sha256Hash"); - let errors = vec![crate::error::Error { - message: "provided sha does not match query".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::from_value(json!({ - "code": "PERSISTED_QUERY_HASH_MISMATCH", - })) - .unwrap(), - }]; + let errors = vec![crate::error::Error::builder() + .message("provided sha does not match query".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_HASH_MISMATCH") + .build(), + ]; let res = SupergraphResponse::builder() .status_code(StatusCode::BAD_REQUEST) .data(Value::default()) @@ -164,15 +162,13 @@ async fn apq_request( } else { let _ = request.context.insert(PERSISTED_QUERY_CACHE_HIT, false); tracing::trace!("apq: cache miss"); - let errors = vec![crate::error::Error { - message: "PersistedQueryNotFound".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::from_value(json!({ - "code": "PERSISTED_QUERY_NOT_FOUND", - })) - .unwrap(), - }]; + let errors = vec![crate::error::Error::builder() + .message("PersistedQueryNotFound".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_NOT_FOUND") + .build(), + ]; let res = SupergraphResponse::builder() .data(Value::default()) .errors(errors) @@ -217,15 +213,13 @@ async fn disabled_apq_request( .extensions .contains_key("persistedQuery") { - let errors = vec![crate::error::Error { - message: "PersistedQueryNotSupported".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::from_value(json!({ - "code": "PERSISTED_QUERY_NOT_SUPPORTED", - })) - .unwrap(), - }]; + let errors = vec![crate::error::Error::builder() + .message("PersistedQueryNotSupported".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_NOT_SUPPORTED") + .build(), + ]; let res = SupergraphResponse::builder() .data(Value::default()) .errors(errors) @@ -263,15 +257,12 @@ mod apq_tests { let hash = Cow::from("ecf4edb46db40b5132295c0291d62fb65d6759a9eedfa4d5d612dd5ec54a6b38"); let hash2 = hash.clone(); - let expected_apq_miss_error = Error { - message: "PersistedQueryNotFound".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::from_value(json!({ - "code": "PERSISTED_QUERY_NOT_FOUND", - })) - .unwrap(), - }; + let expected_apq_miss_error = Error::builder() + .message("PersistedQueryNotFound".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_NOT_FOUND") + .build(); let mut router_service = from_supergraph_mock_callback(move |req| { let body = req.supergraph_request.body(); @@ -389,15 +380,12 @@ mod apq_tests { let hash = Cow::from("ecf4edb46db40b5132295c0291d62fb65d6759a9eedfa4d5d612dd5ec54a6b36"); let hash2 = hash.clone(); - let expected_apq_miss_error = Error { - message: "PersistedQueryNotFound".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::from_value(json!({ - "code": "PERSISTED_QUERY_NOT_FOUND", - })) - .unwrap(), - }; + let expected_apq_miss_error = Error::builder() + .message("PersistedQueryNotFound".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_NOT_FOUND") + .build(); let mut router_service = from_supergraph_mock_callback(move |req| { let body = req.supergraph_request.body(); @@ -491,15 +479,12 @@ mod apq_tests { .await .unwrap() .unwrap(); - let expected_apq_insert_failed_error = Error { - message: "provided sha does not match query".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::from_value(json!({ - "code": "PERSISTED_QUERY_HASH_MISMATCH", - })) - .unwrap(), - }; + let expected_apq_insert_failed_error = Error::builder() + .message("provided sha does not match query".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_HASH_MISMATCH") + .build(); assert_eq!(graphql_response.errors[0], expected_apq_insert_failed_error); // apq insert failed, this call will miss @@ -522,15 +507,12 @@ mod apq_tests { #[tokio::test] async fn return_not_supported_when_disabled() { - let expected_apq_miss_error = Error { - message: "PersistedQueryNotSupported".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::from_value(json!({ - "code": "PERSISTED_QUERY_NOT_SUPPORTED", - })) - .unwrap(), - }; + let expected_apq_miss_error =Error::builder() + .message("PersistedQueryNotSupported".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_NOT_SUPPORTED") + .build(); let mut config = Configuration::default(); config.apq.enabled = false; diff --git a/apollo-router/src/services/supergraph/service.rs b/apollo-router/src/services/supergraph/service.rs index 606a8cf279..08e45eafca 100644 --- a/apollo-router/src/services/supergraph/service.rs +++ b/apollo-router/src/services/supergraph/service.rs @@ -166,16 +166,13 @@ impl Service for SupergraphService { self.license, ) .or_else(|error: BoxError| async move { - let errors = vec![crate::error::Error { - message: error.to_string(), - extensions: serde_json_bytes::json!({ - "code": "INTERNAL_SERVER_ERROR", - }) - .as_object() - .unwrap() - .to_owned(), - ..Default::default() - }]; + let errors = vec![ + crate::error::Error::builder() + .message(error.to_string()) + // TODO this doesn't precisely match previous behavior + .extension_code("INTERNAL_SERVER_ERROR") + .build() + ]; Ok(SupergraphResponse::infallible_builder() .errors(errors) diff --git a/apollo-router/src/spec/query.rs b/apollo-router/src/spec/query.rs index 9e2c4e41a3..fcdd473887 100644 --- a/apollo-router/src/spec/query.rs +++ b/apollo-router/src/spec/query.rs @@ -370,11 +370,14 @@ impl Query { ), _ => todo!(), }; - parameters.errors.push(Error { - message, - path: Some(Path::from_response_slice(path)), - ..Error::default() - }); + parameters.errors.push( + Error::builder() + .message(message) + .path(Path::from_response_slice(path)) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ); Err(InvalidValue) } else { @@ -643,14 +646,17 @@ impl Query { output.insert((*field_name).clone(), Value::Null); } if field_type.is_non_null() { - parameters.errors.push(Error { - message: format!( - "Cannot return null for non-nullable field {current_type}.{}", - field_name.as_str() - ), - path: Some(Path::from_response_slice(path)), - ..Error::default() - }); + parameters.errors.push( + Error::builder() + .message(format!( + "Cannot return null for non-nullable field {current_type}.{}", + field_name.as_str() + )) + .path(Path::from_response_slice(path)) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ); return Err(InvalidValue); } @@ -799,14 +805,16 @@ impl Query { path.pop(); res? } else if field_type.is_non_null() { - parameters.errors.push(Error { - message: format!( + parameters.errors.push(Error::builder() + .message(format!( "Cannot return null for non-nullable field {}.{field_name_str}", root_type_name - ), - path: Some(Path::from_response_slice(path)), - ..Error::default() - }); + )) + .path(Path::from_response_slice(path)) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ); return Err(InvalidValue); } else { output.insert(field_name.clone(), Value::Null); From 1db5e0a43c159081d083f5bfd93c84be24f7693e Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 7 May 2025 13:59:16 -0400 Subject: [PATCH 18/46] store/pull router layer errors in/from context --- apollo-router/src/context/mod.rs | 2 ++ .../src/plugins/telemetry/error_counter.rs | 20 ++++++------------- apollo-router/src/services/router.rs | 5 ++++- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/apollo-router/src/context/mod.rs b/apollo-router/src/context/mod.rs index 1b86d41637..97e6971661 100644 --- a/apollo-router/src/context/mod.rs +++ b/apollo-router/src/context/mod.rs @@ -76,6 +76,8 @@ pub(crate) const DEPRECATED_OPERATION_KIND: &str = "operation_kind"; pub(crate) const CONTAINS_GRAPHQL_ERROR: &str = "apollo::telemetry::contains_graphql_error"; /// The key to a map of errors that were already counted in a previous layer pub(crate) const COUNTED_ERRORS: &str = "apollo::telemetry::counted_errors"; +/// The key for the full list of errors in the router response. This allows us to pull the value in plugins without having to deserialize the router response. +pub(crate) const ROUTER_RESPONSE_ERRORS: &str = "apollo::router::response_errors"; /// Holds [`Context`] entries. pub(crate) type Entries = Arc>; diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 49db4798ac..ca56d48b8e 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -8,7 +8,7 @@ use serde::de::DeserializeOwned; use crate::Context; use crate::apollo_studio_interop::UsageReporting; -use crate::context::COUNTED_ERRORS; +use crate::context::{COUNTED_ERRORS, ROUTER_RESPONSE_ERRORS}; use crate::context::OPERATION_KIND; use crate::context::OPERATION_NAME; use crate::graphql; @@ -157,29 +157,21 @@ pub(crate) async fn count_router_errors( let context = response.context.clone(); let errors_config = errors_config.clone(); - let (parts, body) = response.response.into_parts(); - - // TODO is this a bad idea? Probably... - // Deserialize the response body back into a response obj so we can pull the errors - let bytes = router::body::into_bytes(body) - .await - .unwrap(); - let response_body: graphql::Response = serde_json::from_slice(&bytes).unwrap(); - - if !response_body.errors.is_empty() { - count_operation_errors(&response_body.errors, &context, &errors_config); + let errors: Vec = unwrap_from_context(&context, ROUTER_RESPONSE_ERRORS); + if !errors.is_empty() { + count_operation_errors(&errors, &context, &errors_config); } // Refresh context with the most up-to-date list of errors context - .insert(COUNTED_ERRORS, to_map(&response_body.errors)) + .insert(COUNTED_ERRORS, to_map(&errors)) .expect("Unable to insert errors into context."); // TODO confirm the count_operation_error_codes() INVALID_ACCEPT_HEADER case is handled here RouterResponse { context: response.context, - response: http::Response::from_parts(parts, router::body::from_bytes(bytes)), + response: response.response, } } diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index f91e820fcc..f452735d66 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -144,7 +144,7 @@ impl Request { use displaydoc::Display; use thiserror::Error; -use crate::context::CONTAINS_GRAPHQL_ERROR; +use crate::context::{CONTAINS_GRAPHQL_ERROR, ROUTER_RESPONSE_ERRORS}; #[derive(Error, Display, Debug)] pub enum ParseError { @@ -232,6 +232,9 @@ impl Response { ) -> Result { if !errors.is_empty() { context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true)); + context + .insert(ROUTER_RESPONSE_ERRORS, errors.clone()) + .expect("Unable to serialize router response errors list for context"); } // Build a response let b = graphql::Response::builder() From 3aa06b53531e7789b6cab02b9b42c0d3e41f739b Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 21 May 2025 11:21:34 -0400 Subject: [PATCH 19/46] new err default impl. Fix from_value --- apollo-router/src/graphql/mod.rs | 40 +++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 70509335dc..ebd6ad7f42 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -53,7 +53,7 @@ pub struct Location { /// as may be found in the `errors` field of a GraphQL [`Response`]. /// /// Converted to (or from) JSON with serde. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Default)] +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] #[non_exhaustive] pub struct Error { @@ -178,23 +178,27 @@ impl Error { reason: format!("invalid `path` within error: {}", err), })?; // TODO confirm camelcase key - let apollo_id = match extract_key_value_from_object!(object, "apolloId", Value::String(s) => s) - { - Ok(Some(s)) => Uuid::from_str(s.as_str()).map_err(|err| MalformedResponseError { - reason: format!("invalid `apolloId` within error: {}", err) - }), - Ok(None) => Ok(Uuid::new_v4()), - Err(err) => Err(MalformedResponseError { + let apollo_id: Option = extract_key_value_from_object!( + object, + "apolloId", + Value::String(s) => s + ) + .map_err(|err| MalformedResponseError { + reason: format!("invalid `apolloId` within error: {}", err), + })? + .map(|s| + Uuid::from_str(s.as_str()).map_err(|err| MalformedResponseError { reason: format!("invalid `apolloId` within error: {}", err), - }), - }?; + }) + ) + .transpose()?; Ok(Error { message, locations, path, extensions, - apollo_id + apollo_id: apollo_id.unwrap_or_else(Uuid::new_v4) }) } @@ -248,6 +252,20 @@ impl Error { } } + +impl Default for Error { + fn default() -> Self { + Error { + message: String::default(), + locations: Vec::default(), + path: None, + extensions: Object::default(), + // Always generate a new UUID + apollo_id: Uuid::new_v4(), + } + } +} + /// GraphQL spec require that both "line" and "column" are positive numbers. /// However GraphQL Java and GraphQL Kotlin return `{ "line": -1, "column": -1 }` /// if they can't determine error location inside query. From 3d1d6d24af9bfd6d932d5dd291b5c97a370adea5 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 21 May 2025 12:22:39 -0400 Subject: [PATCH 20/46] Revert away from using builder everywhere --- apollo-router/src/graphql/mod.rs | 2 +- apollo-router/src/plugins/rhai/execution.rs | 22 ++++----- apollo-router/src/plugins/rhai/mod.rs | 9 ++-- apollo-router/src/plugins/rhai/router.rs | 22 ++++----- apollo-router/src/plugins/rhai/subgraph.rs | 22 ++++----- apollo-router/src/plugins/rhai/supergraph.rs | 22 ++++----- .../src/services/supergraph/service.rs | 1 - apollo-router/src/spec/query.rs | 46 ++++++++----------- 8 files changed, 56 insertions(+), 90 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index ebd6ad7f42..08bc1a2d4e 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -74,7 +74,7 @@ pub struct Error { // TODO do we need to implement a random one for default? /// A unique identifier for this error - apollo_id: Uuid + pub apollo_id: Uuid // TODO add attr to mark as counted, skip serialize? // TODO would include_subgraph_errors or a customer's plugin change this? diff --git a/apollo-router/src/plugins/rhai/execution.rs b/apollo-router/src/plugins/rhai/execution.rs index b59c8605b4..bdcd646577 100644 --- a/apollo-router/src/plugins/rhai/execution.rs +++ b/apollo-router/src/plugins/rhai/execution.rs @@ -28,13 +28,10 @@ pub(super) fn request_failure( .build()? } else { Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .context(context) .status_code(error_details.status) .build()? @@ -56,13 +53,10 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/plugins/rhai/mod.rs b/apollo-router/src/plugins/rhai/mod.rs index 4b6d054c65..3cf5f21fbc 100644 --- a/apollo-router/src/plugins/rhai/mod.rs +++ b/apollo-router/src/plugins/rhai/mod.rs @@ -558,11 +558,10 @@ macro_rules! gen_map_deferred_response { let mut guard = shared_response.lock(); let response_opt = guard.take(); let $base::DeferredResponse { mut response, .. } = response_opt.unwrap(); - let error = Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build(); + let error = Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }; response.errors = vec![error]; return Some(response); } diff --git a/apollo-router/src/plugins/rhai/router.rs b/apollo-router/src/plugins/rhai/router.rs index 9e3f1f35be..b968b28270 100644 --- a/apollo-router/src/plugins/rhai/router.rs +++ b/apollo-router/src/plugins/rhai/router.rs @@ -30,13 +30,10 @@ pub(super) fn request_failure( .build()? } else { crate::services::router::Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .context(context) .status_code(error_details.status) .build()? @@ -61,13 +58,10 @@ pub(super) fn response_failure( .build() } else { crate::services::router::Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/plugins/rhai/subgraph.rs b/apollo-router/src/plugins/rhai/subgraph.rs index 96db937020..4e80b97ffe 100644 --- a/apollo-router/src/plugins/rhai/subgraph.rs +++ b/apollo-router/src/plugins/rhai/subgraph.rs @@ -26,13 +26,10 @@ pub(super) fn request_failure( .build() } else { Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .context(context) .status_code(error_details.status) .subgraph_name(String::default()) // XXX: We don't know the subgraph name @@ -56,13 +53,10 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .status_code(error_details.status) .context(context) .subgraph_name(String::default()) // XXX: We don't know the subgraph name diff --git a/apollo-router/src/plugins/rhai/supergraph.rs b/apollo-router/src/plugins/rhai/supergraph.rs index 43bf372dd7..2b5b7bc804 100644 --- a/apollo-router/src/plugins/rhai/supergraph.rs +++ b/apollo-router/src/plugins/rhai/supergraph.rs @@ -28,13 +28,10 @@ pub(super) fn request_failure( .build()? } else { Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .context(context) .status_code(error_details.status) .build()? @@ -56,13 +53,10 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![ - Error::builder() - .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ]) + .errors(vec![Error { + message: error_details.message.unwrap_or_default(), + ..Default::default() + }]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/services/supergraph/service.rs b/apollo-router/src/services/supergraph/service.rs index 08e45eafca..d579bc8961 100644 --- a/apollo-router/src/services/supergraph/service.rs +++ b/apollo-router/src/services/supergraph/service.rs @@ -169,7 +169,6 @@ impl Service for SupergraphService { let errors = vec![ crate::error::Error::builder() .message(error.to_string()) - // TODO this doesn't precisely match previous behavior .extension_code("INTERNAL_SERVER_ERROR") .build() ]; diff --git a/apollo-router/src/spec/query.rs b/apollo-router/src/spec/query.rs index fcdd473887..9e2c4e41a3 100644 --- a/apollo-router/src/spec/query.rs +++ b/apollo-router/src/spec/query.rs @@ -370,14 +370,11 @@ impl Query { ), _ => todo!(), }; - parameters.errors.push( - Error::builder() - .message(message) - .path(Path::from_response_slice(path)) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ); + parameters.errors.push(Error { + message, + path: Some(Path::from_response_slice(path)), + ..Error::default() + }); Err(InvalidValue) } else { @@ -646,17 +643,14 @@ impl Query { output.insert((*field_name).clone(), Value::Null); } if field_type.is_non_null() { - parameters.errors.push( - Error::builder() - .message(format!( - "Cannot return null for non-nullable field {current_type}.{}", - field_name.as_str() - )) - .path(Path::from_response_slice(path)) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ); + parameters.errors.push(Error { + message: format!( + "Cannot return null for non-nullable field {current_type}.{}", + field_name.as_str() + ), + path: Some(Path::from_response_slice(path)), + ..Error::default() + }); return Err(InvalidValue); } @@ -805,16 +799,14 @@ impl Query { path.pop(); res? } else if field_type.is_non_null() { - parameters.errors.push(Error::builder() - .message(format!( + parameters.errors.push(Error { + message: format!( "Cannot return null for non-nullable field {}.{field_name_str}", root_type_name - )) - .path(Path::from_response_slice(path)) - // TODO this doesn't precisely match previous behavior - .extension_code("") - .build() - ); + ), + path: Some(Path::from_response_slice(path)), + ..Error::default() + }); return Err(InvalidValue); } else { output.insert(field_name.clone(), Value::Null); From 90129d7f2421dd1472b0555b99713e72ee7bda69 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 21 May 2025 12:42:30 -0400 Subject: [PATCH 21/46] Revert "Revert away from using builder everywhere" This reverts commit 3d1d6d24af9bfd6d932d5dd291b5c97a370adea5. --- apollo-router/src/graphql/mod.rs | 2 +- apollo-router/src/plugins/rhai/execution.rs | 22 +++++---- apollo-router/src/plugins/rhai/mod.rs | 9 ++-- apollo-router/src/plugins/rhai/router.rs | 22 +++++---- apollo-router/src/plugins/rhai/subgraph.rs | 22 +++++---- apollo-router/src/plugins/rhai/supergraph.rs | 22 +++++---- .../src/services/supergraph/service.rs | 1 + apollo-router/src/spec/query.rs | 46 +++++++++++-------- 8 files changed, 90 insertions(+), 56 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 08bc1a2d4e..ebd6ad7f42 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -74,7 +74,7 @@ pub struct Error { // TODO do we need to implement a random one for default? /// A unique identifier for this error - pub apollo_id: Uuid + apollo_id: Uuid // TODO add attr to mark as counted, skip serialize? // TODO would include_subgraph_errors or a customer's plugin change this? diff --git a/apollo-router/src/plugins/rhai/execution.rs b/apollo-router/src/plugins/rhai/execution.rs index bdcd646577..b59c8605b4 100644 --- a/apollo-router/src/plugins/rhai/execution.rs +++ b/apollo-router/src/plugins/rhai/execution.rs @@ -28,10 +28,13 @@ pub(super) fn request_failure( .build()? } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .build()? @@ -53,10 +56,13 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/plugins/rhai/mod.rs b/apollo-router/src/plugins/rhai/mod.rs index 3cf5f21fbc..4b6d054c65 100644 --- a/apollo-router/src/plugins/rhai/mod.rs +++ b/apollo-router/src/plugins/rhai/mod.rs @@ -558,10 +558,11 @@ macro_rules! gen_map_deferred_response { let mut guard = shared_response.lock(); let response_opt = guard.take(); let $base::DeferredResponse { mut response, .. } = response_opt.unwrap(); - let error = Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }; + let error = Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build(); response.errors = vec![error]; return Some(response); } diff --git a/apollo-router/src/plugins/rhai/router.rs b/apollo-router/src/plugins/rhai/router.rs index b968b28270..9e3f1f35be 100644 --- a/apollo-router/src/plugins/rhai/router.rs +++ b/apollo-router/src/plugins/rhai/router.rs @@ -30,10 +30,13 @@ pub(super) fn request_failure( .build()? } else { crate::services::router::Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .build()? @@ -58,10 +61,13 @@ pub(super) fn response_failure( .build() } else { crate::services::router::Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/plugins/rhai/subgraph.rs b/apollo-router/src/plugins/rhai/subgraph.rs index 4e80b97ffe..96db937020 100644 --- a/apollo-router/src/plugins/rhai/subgraph.rs +++ b/apollo-router/src/plugins/rhai/subgraph.rs @@ -26,10 +26,13 @@ pub(super) fn request_failure( .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .subgraph_name(String::default()) // XXX: We don't know the subgraph name @@ -53,10 +56,13 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .subgraph_name(String::default()) // XXX: We don't know the subgraph name diff --git a/apollo-router/src/plugins/rhai/supergraph.rs b/apollo-router/src/plugins/rhai/supergraph.rs index 2b5b7bc804..43bf372dd7 100644 --- a/apollo-router/src/plugins/rhai/supergraph.rs +++ b/apollo-router/src/plugins/rhai/supergraph.rs @@ -28,10 +28,13 @@ pub(super) fn request_failure( .build()? } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .context(context) .status_code(error_details.status) .build()? @@ -53,10 +56,13 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .build() } else { Response::error_builder() - .errors(vec![Error { - message: error_details.message.unwrap_or_default(), - ..Default::default() - }]) + .errors(vec![ + Error::builder() + .message(error_details.message.unwrap_or_default()) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ]) .status_code(error_details.status) .context(context) .build() diff --git a/apollo-router/src/services/supergraph/service.rs b/apollo-router/src/services/supergraph/service.rs index d579bc8961..08e45eafca 100644 --- a/apollo-router/src/services/supergraph/service.rs +++ b/apollo-router/src/services/supergraph/service.rs @@ -169,6 +169,7 @@ impl Service for SupergraphService { let errors = vec![ crate::error::Error::builder() .message(error.to_string()) + // TODO this doesn't precisely match previous behavior .extension_code("INTERNAL_SERVER_ERROR") .build() ]; diff --git a/apollo-router/src/spec/query.rs b/apollo-router/src/spec/query.rs index 9e2c4e41a3..fcdd473887 100644 --- a/apollo-router/src/spec/query.rs +++ b/apollo-router/src/spec/query.rs @@ -370,11 +370,14 @@ impl Query { ), _ => todo!(), }; - parameters.errors.push(Error { - message, - path: Some(Path::from_response_slice(path)), - ..Error::default() - }); + parameters.errors.push( + Error::builder() + .message(message) + .path(Path::from_response_slice(path)) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ); Err(InvalidValue) } else { @@ -643,14 +646,17 @@ impl Query { output.insert((*field_name).clone(), Value::Null); } if field_type.is_non_null() { - parameters.errors.push(Error { - message: format!( - "Cannot return null for non-nullable field {current_type}.{}", - field_name.as_str() - ), - path: Some(Path::from_response_slice(path)), - ..Error::default() - }); + parameters.errors.push( + Error::builder() + .message(format!( + "Cannot return null for non-nullable field {current_type}.{}", + field_name.as_str() + )) + .path(Path::from_response_slice(path)) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ); return Err(InvalidValue); } @@ -799,14 +805,16 @@ impl Query { path.pop(); res? } else if field_type.is_non_null() { - parameters.errors.push(Error { - message: format!( + parameters.errors.push(Error::builder() + .message(format!( "Cannot return null for non-nullable field {}.{field_name_str}", root_type_name - ), - path: Some(Path::from_response_slice(path)), - ..Error::default() - }); + )) + .path(Path::from_response_slice(path)) + // TODO this doesn't precisely match previous behavior + .extension_code("") + .build() + ); return Err(InvalidValue); } else { output.insert(field_name.clone(), Value::Null); From dc4b371d5c2e2defd22188f8834ed70109c28afc Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 21 May 2025 13:25:15 -0400 Subject: [PATCH 22/46] Revert default. Make extension_code Option --- apollo-router/src/graphql/mod.rs | 38 +++++++++---------- apollo-router/src/graphql/response.rs | 35 ++++++++--------- apollo-router/src/plugins/rhai/execution.rs | 4 -- apollo-router/src/plugins/rhai/mod.rs | 2 - apollo-router/src/plugins/rhai/router.rs | 4 -- apollo-router/src/plugins/rhai/subgraph.rs | 4 -- apollo-router/src/plugins/rhai/supergraph.rs | 4 -- .../src/services/supergraph/service.rs | 1 - apollo-router/src/spec/query.rs | 6 --- 9 files changed, 33 insertions(+), 65 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index ebd6ad7f42..3144f8aae9 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -116,21 +116,33 @@ impl Error { /// Optional, may be called multiple times. /// Adds one item to the [`Error::extensions`] map. /// + /// * `.extension_code(impl Into<`[`String`]`>)` + /// Optional. + /// Sets the "code" in the extension map. Will be ignored if extension already has this key + /// set. + /// + /// * `.apollo_id(impl Into<`[`UUID`]`>)` + /// Optional. + /// Sets the unique identifier for this Error. This should only be used in cases of + /// deserialization or testing. If not given, the ID will be auto-generated. + /// /// * `.build()` /// Finishes the builder and returns a GraphQL [`Error`]. #[builder(visibility = "pub")] - fn new>( + fn new( message: String, locations: Vec, path: Option, - extension_code: T, + extension_code: Option, // Skip the `Object` type alias in order to use buildstructor’s map special-casing mut extensions: JsonMap, apollo_id: Option ) -> Self { - extensions - .entry("code") - .or_insert_with(|| extension_code.into().into()); + if let Some(code) = extension_code { + extensions + .entry("code") + .or_insert(Value::String(ByteString::from(code))); + } Self { message, locations, @@ -141,8 +153,6 @@ impl Error { } pub(crate) fn from_value(value: Value) -> Result { - let _value_str = value.to_string(); // TODO temp debug remove - let mut object = ensure_object!(value).map_err(|error| MalformedResponseError { reason: format!("invalid error within `errors`: {}", error), })?; @@ -252,20 +262,6 @@ impl Error { } } - -impl Default for Error { - fn default() -> Self { - Error { - message: String::default(), - locations: Vec::default(), - path: None, - extensions: Object::default(), - // Always generate a new UUID - apollo_id: Uuid::new_v4(), - } - } -} - /// GraphQL spec require that both "line" and "column" are positive numbers. /// However GraphQL Java and GraphQL Kotlin return `{ "line": -1, "column": -1 }` /// if they can't determine error location inside query. diff --git a/apollo-router/src/graphql/response.rs b/apollo-router/src/graphql/response.rs index f14c2762bf..1cb220ba9a 100644 --- a/apollo-router/src/graphql/response.rs +++ b/apollo-router/src/graphql/response.rs @@ -265,27 +265,23 @@ mod tests { #[test] fn test_append_errors_path_fallback_and_override() { let expected_errors = vec![ - Error { - message: "Something terrible happened!".to_string(), - path: Some(Path::from("here")), - ..Default::default() - }, - Error { - message: "I mean for real".to_string(), - ..Default::default() - }, + Error::builder() + .message("Something terrible happened!") + .path(Path::from("here")) + .build(), + Error::builder() + .message("I mean for real") + .build(), ]; let mut errors_to_append = vec![ - Error { - message: "Something terrible happened!".to_string(), - path: Some(Path::from("here")), - ..Default::default() - }, - Error { - message: "I mean for real".to_string(), - ..Default::default() - }, + Error::builder() + .message("Something terrible happened!") + .path(Path::from("here")) + .build(), + Error::builder() + .message("I mean for real") + .build(), ]; let mut response = Response::builder().build(); @@ -365,7 +361,8 @@ mod tests { }) .as_object() .cloned() - .unwrap() + .unwrap(), + // TODO need to ignore the apollo id for comparison }]) .extensions( bjson!({ diff --git a/apollo-router/src/plugins/rhai/execution.rs b/apollo-router/src/plugins/rhai/execution.rs index b59c8605b4..621df1dfa7 100644 --- a/apollo-router/src/plugins/rhai/execution.rs +++ b/apollo-router/src/plugins/rhai/execution.rs @@ -31,8 +31,6 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .context(context) @@ -59,8 +57,6 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .status_code(error_details.status) diff --git a/apollo-router/src/plugins/rhai/mod.rs b/apollo-router/src/plugins/rhai/mod.rs index 4b6d054c65..9ab1478d77 100644 --- a/apollo-router/src/plugins/rhai/mod.rs +++ b/apollo-router/src/plugins/rhai/mod.rs @@ -560,8 +560,6 @@ macro_rules! gen_map_deferred_response { let $base::DeferredResponse { mut response, .. } = response_opt.unwrap(); let error = Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build(); response.errors = vec![error]; return Some(response); diff --git a/apollo-router/src/plugins/rhai/router.rs b/apollo-router/src/plugins/rhai/router.rs index 9e3f1f35be..0423399fb4 100644 --- a/apollo-router/src/plugins/rhai/router.rs +++ b/apollo-router/src/plugins/rhai/router.rs @@ -33,8 +33,6 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .context(context) @@ -64,8 +62,6 @@ pub(super) fn response_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .status_code(error_details.status) diff --git a/apollo-router/src/plugins/rhai/subgraph.rs b/apollo-router/src/plugins/rhai/subgraph.rs index 96db937020..1f292a75f6 100644 --- a/apollo-router/src/plugins/rhai/subgraph.rs +++ b/apollo-router/src/plugins/rhai/subgraph.rs @@ -29,8 +29,6 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .context(context) @@ -59,8 +57,6 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .status_code(error_details.status) diff --git a/apollo-router/src/plugins/rhai/supergraph.rs b/apollo-router/src/plugins/rhai/supergraph.rs index 43bf372dd7..f4d7120534 100644 --- a/apollo-router/src/plugins/rhai/supergraph.rs +++ b/apollo-router/src/plugins/rhai/supergraph.rs @@ -31,8 +31,6 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .context(context) @@ -59,8 +57,6 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ]) .status_code(error_details.status) diff --git a/apollo-router/src/services/supergraph/service.rs b/apollo-router/src/services/supergraph/service.rs index 08e45eafca..d579bc8961 100644 --- a/apollo-router/src/services/supergraph/service.rs +++ b/apollo-router/src/services/supergraph/service.rs @@ -169,7 +169,6 @@ impl Service for SupergraphService { let errors = vec![ crate::error::Error::builder() .message(error.to_string()) - // TODO this doesn't precisely match previous behavior .extension_code("INTERNAL_SERVER_ERROR") .build() ]; diff --git a/apollo-router/src/spec/query.rs b/apollo-router/src/spec/query.rs index fcdd473887..7596cf15ad 100644 --- a/apollo-router/src/spec/query.rs +++ b/apollo-router/src/spec/query.rs @@ -374,8 +374,6 @@ impl Query { Error::builder() .message(message) .path(Path::from_response_slice(path)) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ); @@ -653,8 +651,6 @@ impl Query { field_name.as_str() )) .path(Path::from_response_slice(path)) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ); @@ -811,8 +807,6 @@ impl Query { root_type_name )) .path(Path::from_response_slice(path)) - // TODO this doesn't precisely match previous behavior - .extension_code("") .build() ); return Err(InvalidValue); From f506777222944e842c3eba3aa5e2a5ac3a58f882 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 21 May 2025 13:43:57 -0400 Subject: [PATCH 23/46] fix one more error builder spot --- apollo-router/src/error.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/apollo-router/src/error.rs b/apollo-router/src/error.rs index 360188acee..a9bbe8e04f 100644 --- a/apollo-router/src/error.rs +++ b/apollo-router/src/error.rs @@ -158,10 +158,6 @@ impl FetchError { .message(self.to_string()) .locations(Vec::default()) .and_path(path) - // Extension code is required, but is only used if extensions doesn't have it. We always - // have a code so this value will be ignored. - // TODO better way to unwrap the actual code from value just in case? - .extension_code("") .extensions(value.as_object().unwrap().to_owned()) .build() } From 1aecdc6b2d2f0de02a07fb1a8c0739ff4240828f Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 23 May 2025 15:18:23 -0400 Subject: [PATCH 24/46] Failed attempt converting to builder (to be reverted)` --- .../src/plugins/telemetry/error_counter.rs | 34 +++---- apollo-router/src/services/router.rs | 60 +++++++++++-- apollo-router/src/services/router/service.rs | 88 ++++++++----------- 3 files changed, 105 insertions(+), 77 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index ca56d48b8e..44b7108c94 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -34,11 +34,9 @@ pub(crate) async fn count_subgraph_errors( let response_body = response.response.body(); if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); + // Refresh context with the most up-to-date list of errors + let _ = context.insert(COUNTED_ERRORS, to_map(&response_body.errors)); } - context - .insert(COUNTED_ERRORS, to_map(&response_body.errors)) - .expect("Unable to insert errors into context."); - SubgraphResponse { context: response.context, subgraph_name: response.subgraph_name, @@ -95,13 +93,10 @@ pub(crate) async fn count_supergraph_errors( // TODO can we combine this with above? if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); + // Refresh context with the most up-to-date list of errors + let _ = context.insert(COUNTED_ERRORS, to_map(&response_body.errors)); } } - - // Refresh context with the most up-to-date list of errors - context - .insert(COUNTED_ERRORS, to_map(&response_body.errors)) - .expect("Unable to insert errors into context."); }); let (first_response, rest) = StreamExt::into_future(stream).await; @@ -130,10 +125,9 @@ pub(crate) async fn count_execution_errors( let stream = stream.inspect(move |response_body| { if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); + // Refresh context with the most up-to-date list of errors + let _ = context.insert(COUNTED_ERRORS, to_map(&response_body.errors)); } - context - .insert(COUNTED_ERRORS, to_map(&response_body.errors)) - .expect("Unable to insert errors into context."); }); let (first_response, rest) = StreamExt::into_future(stream).await; @@ -157,16 +151,16 @@ pub(crate) async fn count_router_errors( let context = response.context.clone(); let errors_config = errors_config.clone(); + // We look at context for our current errors instead of the existing config so that we don't + // have to do a full deserialization of the response let errors: Vec = unwrap_from_context(&context, ROUTER_RESPONSE_ERRORS); if !errors.is_empty() { count_operation_errors(&errors, &context, &errors_config); + // Refresh context ONLY when we have errors. This + // TODO don't overwrite, append? + let _ = context.insert(COUNTED_ERRORS, to_map(&errors)); } - // Refresh context with the most up-to-date list of errors - context - .insert(COUNTED_ERRORS, to_map(&errors)) - .expect("Unable to insert errors into context."); - // TODO confirm the count_operation_error_codes() INVALID_ACCEPT_HEADER case is handled here RouterResponse { @@ -287,9 +281,9 @@ fn count_operation_errors( fn unwrap_from_context(context: &Context, key: &str) -> V { context - .get::<_, V>(key) // -> Option> - .unwrap_or_default() // -> Result (defaults to Ok(T::default())) - .unwrap_or_default() // -> T (defaults on Err) + .get::<_, V>(key) + .unwrap_or_default() + .unwrap_or_default() } fn count_graphql_error(count: u64, code: String) { diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index 4278c72171..3e9c3c0fc9 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -2,17 +2,19 @@ use std::any::Any; use std::mem; - +use buildstructor::builder; use bytes::Bytes; use displaydoc::Display; use futures::Stream; use futures::StreamExt; -use futures::future::Either; +use futures::future::{ready, Either}; +use futures::stream::once; use http::HeaderValue; use http::Method; use http::StatusCode; use http::header::CONTENT_TYPE; use http::header::HeaderName; +use http::response::Parts; use http_body_util::BodyExt; use multer::Multipart; use multimap::MultiMap; @@ -21,9 +23,9 @@ use serde_json_bytes::Map as JsonMap; use static_assertions::assert_impl_all; use thiserror::Error; use tower::BoxError; - +use wiremock::matchers::body_string; use self::body::RouterBody; -use super::supergraph; +use super::{router, supergraph}; use crate::Context; use crate::context::CONTAINS_GRAPHQL_ERROR; use crate::graphql; @@ -145,6 +147,7 @@ impl Request { } use crate::context::ROUTER_RESPONSE_ERRORS; +use crate::protocols::multipart::ProtocolMode; #[derive(Error, Display, Debug)] pub enum ParseError { @@ -235,12 +238,10 @@ impl Response { status_code: Option, headers: MultiMap, context: Context, + protocol_mode: Option ) -> Result { if !errors.is_empty() { - context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true)); - context - .insert(ROUTER_RESPONSE_ERRORS, errors.clone()) - .expect("Unable to serialize router response errors list for context"); + let context = Self::update_error_context(context, errors); } // Build a response let b = graphql::Response::builder() @@ -266,6 +267,24 @@ impl Response { let body_string = serde_json::to_string(&res)?; let body = body::from_bytes(body_string.clone()); + let body = match protocol_mode { + None => { body } + Some(mode) => { + context + .extensions() + .with_lock(|lock| lock.insert(mode)); + let response_multipart = match mode { + ProtocolMode::Subscription => { + // TODO RouterBody doesn't implement stream trait + crate::protocols::multipart::Multipart::new(body, mode) + } + ProtocolMode::Defer => { + crate::protocols::multipart::Multipart::new(once(ready(res)).chain(body), mode) + } + }; + body::from_result_stream(response_multipart) + } + }; let response = builder.body(body)?; // Stash the body in the extensions so we can access it later let mut response = Self { response, context }; @@ -274,6 +293,31 @@ impl Response { Ok(response) } + #[builder(visibility = "pub")] + fn parts_new( + response: http::Response, + context: Context, + ) -> Result { + if !response.body(). { + Self::update_error_context() + } + let response = http::Response::from_parts(parts, body); + } + + fn update_error_context(context: Context, errors: Vec) -> Context { + context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true)); + // This will ONLY capture errors if any were added during router service processing. + // We will avoid this path if no router service errors exist, even if errors were passed + // from the supergraph service, because that path builds the router::Response using the + // constructor instead of new(). This is ok because we only need this context to count + // errors introduced in the router service. + context + .insert(ROUTER_RESPONSE_ERRORS, errors.clone()) + .expect("Unable to serialize router response errors list for context"); + + context + } + /// This is the constructor (or builder) to use when constructing a Response that represents a global error. /// It has no path and no response data. /// This is useful for things such as authentication errors. diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs index 6e785f4a59..61616b9d55 100644 --- a/apollo-router/src/services/router/service.rs +++ b/apollo-router/src/services/router/service.rs @@ -23,6 +23,7 @@ use mime::APPLICATION_JSON; use multimap::MultiMap; use opentelemetry::KeyValue; use opentelemetry_semantic_conventions::trace::HTTP_REQUEST_METHOD; +use serde_json::json; use tower::BoxError; use tower::ServiceBuilder; use tower::ServiceExt; @@ -290,45 +291,38 @@ impl RouterService { match body.next().await { None => { tracing::error!("router service is not available to process request",); - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::SERVICE_UNAVAILABLE) - .body(router::body::from_bytes( - "router service is not available to process request", - )) - .expect("cannot fail"), - context, - }) + // TODO Ideally this would be done as a different from_parts builder + Ok(router::Response::builder() + .error( + graphql::Error::builder() + .message(String::from("router service is not available to process request")) + .extension_code(StatusCode::SERVICE_UNAVAILABLE.to_string()) + .build(), + ) + .status_code(StatusCode::SERVICE_UNAVAILABLE) + .data(json!({"message": "router service is not available to process request"})) + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .context(context) + .build() + .expect("cannot fail") + ) } Some(response) => { if !response.has_next.unwrap_or(false) && !response.subscribed.unwrap_or(false) && (accepts_json || accepts_wildcard) { - let body: Result = tracing::trace_span!("serialize_response") - .in_scope(|| { - let body = serde_json::to_string(&response)?; - Ok(body) - }); - let body = body?; - // XXX(@goto-bus-stop): I strongly suspect that it would be better to move this into its own layer. - let display_router_response = context - .extensions() - .with_lock(|ext| ext.get::().is_some()); - - let mut res = router::Response { - response: Response::from_parts( - parts, - router::body::from_bytes(body.clone()), - ), - context, - }; - - if display_router_response { - res.stash_the_body_in_extensions(body); - } - - Ok(res) + // TODO we will now ALWAYS stash body + // TODO we are no longer generating a body ser trace + router::Response::builder() + .and_label(response.label.clone()) + .data(response.data.clone()) + .and_path(response.path.clone()) + .errors(response.errors.clone()) + .extensions(response.extensions.clone()) + .headers(parts.headers.clone().into()) + .context(context.clone()) + .build() } else if accepts_multipart_defer || accepts_multipart_subscription { // Useful when you're using a proxy like nginx which enable proxy_buffering by default (http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_buffering) parts.headers.insert( @@ -344,23 +338,17 @@ impl RouterService { } else { ProtocolMode::Defer }; - context - .extensions() - .with_lock(|lock| lock.insert(protocol_mode)); - - let response_multipart = match protocol_mode { - ProtocolMode::Subscription => Multipart::new(body, protocol_mode), - ProtocolMode::Defer => { - Multipart::new(once(ready(response)).chain(body), protocol_mode) - } - }; - - let response = http::Response::from_parts( - parts, - router::body::from_result_stream(response_multipart), - ); - Ok(RouterResponse { response, context }) + router::Response::builder() + .and_label(response.label.clone()) + .data(response.data.clone()) + .and_path(response.path.clone()) + .errors(response.errors.clone()) + .extensions(response.extensions.clone()) + .headers(parts.headers.clone().into()) + .context(context.clone()) + .protocol_mode(protocol_mode) + .build() } else { // this should be unreachable due to a previous check, but just to be sure... Ok(invalid_accept_header_response().into()) @@ -460,6 +448,8 @@ impl RouterService { } bytes.put_u8(b']'); + // TODO there's no easy way to pull the errors from the body or http::Response here, + // TODO so we still can't store the errors in context for the router Ok(RouterResponse { response: http::Response::from_parts( parts, From 9c88ed7316d9d2e52e711247860fcbc2680ec0ee Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Tue, 3 Jun 2025 15:22:46 -0400 Subject: [PATCH 25/46] convert all router constructors into builders --- apollo-router/src/axum_factory/listeners.rs | 27 +- .../plugins/cache/invalidation_endpoint.rs | 102 +++---- apollo-router/src/plugins/coprocessor/mod.rs | 9 +- apollo-router/src/plugins/fleet_detector.rs | 75 +++-- apollo-router/src/plugins/healthcheck/mod.rs | 13 +- .../src/plugins/record_replay/record.rs | 12 +- apollo-router/src/plugins/subscription.rs | 281 ++++++++++-------- .../plugins/telemetry/metrics/prometheus.rs | 15 +- .../src/services/layers/static_page.rs | 20 +- apollo-router/src/services/router.rs | 58 ++-- apollo-router/src/services/router/service.rs | 55 ++-- 11 files changed, 340 insertions(+), 327 deletions(-) diff --git a/apollo-router/src/axum_factory/listeners.rs b/apollo-router/src/axum_factory/listeners.rs index e77ac0d20c..edc60bad90 100644 --- a/apollo-router/src/axum_factory/listeners.rs +++ b/apollo-router/src/axum_factory/listeners.rs @@ -550,14 +550,13 @@ mod tests { .unwrap(); let endpoint = service_fn(|req: router::Request| async move { - Ok::<_, BoxError>(router::Response { - response: http::Response::builder() - .body::(body::from_bytes( - "this is a test".to_string(), - )) - .unwrap(), - context: req.context, - }) + Ok::<_, BoxError>( + router::Response::builder() + .data("this is a test") + .context(req.context) + .build() + .unwrap() + ) }) .boxed(); @@ -591,14 +590,10 @@ mod tests { .build() .unwrap(); let endpoint = service_fn(|req: router::Request| async move { - Ok::<_, BoxError>(router::Response { - response: http::Response::builder() - .body::(body::from_bytes( - "this is a test".to_string(), - )) - .unwrap(), - context: req.context, - }) + router::Response::builder() + .data("this is a test") + .context(req.context) + .build() }) .boxed(); diff --git a/apollo-router/src/plugins/cache/invalidation_endpoint.rs b/apollo-router/src/plugins/cache/invalidation_endpoint.rs index 77e76edec9..6b0c6969f7 100644 --- a/apollo-router/src/plugins/cache/invalidation_endpoint.rs +++ b/apollo-router/src/plugins/cache/invalidation_endpoint.rs @@ -18,7 +18,7 @@ use tracing_futures::Instrument; use super::entity::Subgraph; use super::invalidation::Invalidation; use super::invalidation::InvalidationOrigin; -use crate::ListenAddr; +use crate::{graphql, ListenAddr}; use crate::configuration::subgraph::SubgraphConfiguration; use crate::plugins::cache::invalidation::InvalidationRequest; use crate::plugins::telemetry::consts::OTEL_STATUS_CODE; @@ -91,7 +91,7 @@ impl InvalidationService { } } -impl Service for InvalidationService { +impl Service for InvalidationService { type Response = router::Response; type Error = BoxError; type Future = BoxFuture<'static, Result>; @@ -108,13 +108,15 @@ impl Service for InvalidationService { let (parts, body) = req.router_request.into_parts(); if !parts.headers.contains_key(AUTHORIZATION) { Span::current().record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::UNAUTHORIZED) - .body(router::body::from_bytes("Missing authorization header")) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::UNAUTHORIZED) + .error(graphql::Error::builder() + .message(String::from("Missing authorization header")) + .extension_code(StatusCode::UNAUTHORIZED.to_string()) + .build() + ) + .context(req.context) + .build(); } match parts.method { Method::POST => { @@ -156,66 +158,66 @@ impl Service for InvalidationService { if !valid_shared_key { Span::current() .record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::UNAUTHORIZED) - .body(router::body::from_bytes( - "Invalid authorization header", - )) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::UNAUTHORIZED) + .error(graphql::Error::builder() + .message("Invalid authorization header") + .extension_code(StatusCode::UNAUTHORIZED.to_string()) + .build() + ) + .context(req.context) + .build(); } match invalidation .invalidate(InvalidationOrigin::Endpoint, body) .instrument(tracing::info_span!("invalidate")) .await { - Ok(count) => Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::ACCEPTED) - .body(router::body::from_bytes(serde_json::to_string( - &json!({ - "count": count - }), - )?)) - .map_err(BoxError::from)?, - context: req.context, - }), + Ok(count) => router::Response::builder() + .data(json!({ "count": count })) + .status_code(StatusCode::ACCEPTED) + .context( req.context) + .build(), Err(err) => { Span::current() .record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::BAD_REQUEST) - .body(router::body::from_bytes(err.to_string())) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::BAD_REQUEST) + .error(graphql::Error::builder() + .message(err.to_string()) + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build() + ) + .context(req.context) + .build() } } } Err(err) => { Span::current().record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::BAD_REQUEST) - .body(router::body::from_bytes(err)) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::BAD_REQUEST) + .error(graphql::Error::builder() + .message(err) + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build() + ) + .context(req.context) + .build() } } } _ => { Span::current().record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::METHOD_NOT_ALLOWED) - .body(router::body::from_bytes("".to_string())) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::METHOD_NOT_ALLOWED) + .error(graphql::Error::builder() + .message("".to_string()) + .extension_code(StatusCode::METHOD_NOT_ALLOWED.to_string()) + .build() + ) + .context(req.context) + .build() } } } diff --git a/apollo-router/src/plugins/coprocessor/mod.rs b/apollo-router/src/plugins/coprocessor/mod.rs index 6d0b881324..c1d3d70e8c 100644 --- a/apollo-router/src/plugins/coprocessor/mod.rs +++ b/apollo-router/src/plugins/coprocessor/mod.rs @@ -1105,10 +1105,11 @@ where )); // Finally, return a response which has a Body that wraps our stream of response chunks. - Ok(router::Response { - context, - response: http::Response::from_parts(parts, final_stream), - }) + router::Response::parts_builder() + .context(context) + .parts(parts) + .body(final_stream) + .build() } // ----------------------------------------------------------------------------------------------------- diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index 30c41bc6ff..ffeab20c57 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -270,19 +270,23 @@ impl PluginPrivate for FleetDetector { context: req.context, }) // Count the number of response bytes from the router to clients - .map_response(move |res: router::Response| router::Response { - response: res.response.map(move |body| { - router::body::from_result_stream(body.into_data_stream().inspect(|res| { - if let Ok(bytes) = res { - u64_counter!( - "apollo.router.operations.response_size", - "Total number of response bytes to clients", - bytes.len() as u64 - ); - } - })) - }), - context: res.context, + .map_response(move |res: router::Response| { + let (parts, body) = res.response.into_parts(); + let body = router::body::from_result_stream(body.into_data_stream().inspect(|res| { + if let Ok(bytes) = res { + u64_counter!( + "apollo.router.operations.response_size", + "Total number of response bytes to clients", + bytes.len() as u64 + ); + } + })); + router::Response::parts_builder() + .parts(parts) + .body(body) + .context(res.context) + .build() + .expect("cannot fail") // TODO better error handling }) .boxed() } @@ -530,13 +534,14 @@ mod tests { use http::StatusCode; use tower::Service as _; - + use crate::graphql; use super::*; use crate::metrics::FutureMetricsExt as _; use crate::metrics::collect_metrics; use crate::metrics::test_utils::MetricType; use crate::plugin::test::MockHttpClientService; use crate::plugin::test::MockRouterService; + use crate::query_planner::build_operation_with_aliasing; use crate::services::router::Body; #[tokio::test] @@ -551,15 +556,18 @@ mod tests { .expect_call() .times(1) .returning(|req: router::Request| { - Ok(router::Response { - context: req.context, - response: http::Response::builder() - .status(StatusCode::BAD_REQUEST) - .header("content-type", "application/json") - // making sure the request body is consumed - .body(req.router_request.into_body()) - .unwrap(), - }) + // making sure the request body is consumed + req.router_request.into_body(); + router::Response::error_builder() + .context(req.context) + .status_code(StatusCode::BAD_REQUEST) + .header("content-type", "application/json") + .error(graphql::Error::builder() + .message("bad request") + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build() + ) + .build() }); let mut bad_request_router_service = plugin.router_service(mock_bad_request_service.boxed()); @@ -609,15 +617,18 @@ mod tests { .expect_call() .times(1) .returning(|req: router::Request| { - Ok(router::Response { - context: req.context, - response: http::Response::builder() - .status(StatusCode::BAD_REQUEST) - .header("content-type", "application/json") - // making sure the request body is consumed - .body(req.router_request.into_body()) - .unwrap(), - }) + // making sure the request body is consumed + req.router_request.into_body(); + router::Response::error_builder() + .context(req.context) + .status_code(StatusCode::BAD_REQUEST) + .header("content-type", "application/json") + .error(graphql::Error::builder() + .message("bad request") + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build() + ) + .build() }); let mut bad_request_router_service = plugin.router_service(mock_bad_request_service.boxed()); diff --git a/apollo-router/src/plugins/healthcheck/mod.rs b/apollo-router/src/plugins/healthcheck/mod.rs index 6c399fe8ca..22ed30e847 100644 --- a/apollo-router/src/plugins/healthcheck/mod.rs +++ b/apollo-router/src/plugins/healthcheck/mod.rs @@ -290,14 +290,11 @@ impl PluginPrivate for HealthCheck { }; tracing::trace!(?health, request = ?req.router_request, "health check"); async move { - Ok(router::Response { - response: http::Response::builder().status(status_code).body( - router::body::from_bytes( - serde_json::to_vec(&health).map_err(BoxError::from)?, - ), - )?, - context: req.context, - }) + router::Response::builder() + .status_code(status_code) + .data(serde_json_bytes::to_value(&health).map_err(BoxError::from)?) + .context(req.context) + .build() } }) .boxed(), diff --git a/apollo-router/src/plugins/record_replay/record.rs b/apollo-router/src/plugins/record_replay/record.rs index 7c4cc1169a..f14293231b 100644 --- a/apollo-router/src/plugins/record_replay/record.rs +++ b/apollo-router/src/plugins/record_replay/record.rs @@ -127,13 +127,11 @@ impl Plugin for Record { let stream = stream.into_data_stream().chain(after_complete); - Ok(router::Response { - context: res.context, - response: http::Response::from_parts( - parts, - router::body::from_result_stream(stream), - ), - }) + router::Response::parts_builder() + .context(res.context) + .parts(parts) + .body(router::body::from_result_stream(stream)) + .build() } }) .service(service) diff --git a/apollo-router/src/plugins/subscription.rs b/apollo-router/src/plugins/subscription.rs index e9dbb2bfdd..aed734b0ec 100644 --- a/apollo-router/src/plugins/subscription.rs +++ b/apollo-router/src/plugins/subscription.rs @@ -465,13 +465,15 @@ impl Service for CallbackService { let cb_body = match cb_body { Ok(cb_body) => cb_body, Err(err) => { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::BAD_REQUEST) - .body(router::body::from_bytes(err)) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::BAD_REQUEST) + .error(graphql::Error::builder() + .message(err) + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build() + ) + .context(req.context) + .build(); } }; let id = cb_body.id().clone(); @@ -492,13 +494,15 @@ impl Service for CallbackService { let expected_hashed_verifier = verifier_hasher.finalize(); if hashed_verifier != expected_hashed_verifier { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::UNAUTHORIZED) - .body(router::body::from_bytes("verifier doesn't match")) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::UNAUTHORIZED) + .error(graphql::Error::builder() + .message("verifier doesn't match") + .extension_code(StatusCode::UNAUTHORIZED.to_string()) + .build() + ) + .context(req.context) + .build(); } if let Err(res) = ensure_id_consistency(&req.context, &sub_id, &id) { @@ -513,13 +517,15 @@ impl Service for CallbackService { let mut handle = match notify.subscribe_if_exist(id).await? { Some(handle) => handle.into_sink(), None => { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .body(router::body::from_bytes("suscription doesn't exist")) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .error(graphql::Error::builder() + .message("subscription doesn't exist") + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build(); } }; // Keep the subscription to the client opened @@ -532,35 +538,35 @@ impl Service for CallbackService { ); handle.send_sync(payload)?; - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::OK) - .body(router::body::empty()) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::builder() + .context(req.context) + .build() } CallbackPayload::Subscription(SubscriptionPayload::Check { .. }) => { if notify.exist(id).await? { - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NO_CONTENT) - .header(HeaderName::from_static(CALLBACK_SUBSCRIPTION_HEADER_NAME), HeaderValue::from_static(CALLBACK_SUBSCRIPTION_HEADER_VALUE)) - .body(router::body::empty()) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::NO_CONTENT) + .header(HeaderName::from_static(CALLBACK_SUBSCRIPTION_HEADER_NAME), HeaderValue::from_static(CALLBACK_SUBSCRIPTION_HEADER_VALUE)) + .error(graphql::Error::builder() + .message(String::default()) + .extension_code(StatusCode::NO_CONTENT.to_string()) + .build() + ) + .context(req.context) + .build() } else { - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .header(HeaderName::from_static(CALLBACK_SUBSCRIPTION_HEADER_NAME), HeaderValue::from_static(CALLBACK_SUBSCRIPTION_HEADER_VALUE)) - .body(router::body::from_bytes("suscription doesn't exist")) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .header(HeaderName::from_static(CALLBACK_SUBSCRIPTION_HEADER_NAME), HeaderValue::from_static(CALLBACK_SUBSCRIPTION_HEADER_VALUE)) + .error(graphql::Error::builder() + .message("subscription doesn't exist") + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build() } } CallbackPayload::Subscription(SubscriptionPayload::Heartbeat { @@ -569,32 +575,38 @@ impl Service for CallbackService { verifier, }) => { if !ids.contains(&id) { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::UNAUTHORIZED) - .body(router::body::from_bytes("id used for the verifier is not part of ids array")) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::UNAUTHORIZED) + .error(graphql::Error::builder() + .message("id used for the verifier is not part of ids array") + .extension_code(StatusCode::UNAUTHORIZED.to_string()) + .build() + ) + .context(req.context) + .build() } let (mut valid_ids, invalid_ids) = notify.invalid_ids(ids).await?; if invalid_ids.is_empty() { - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NO_CONTENT) - .body(router::body::empty()) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::NO_CONTENT) + .error(graphql::Error::builder() + .message(String::default()) + .extension_code(StatusCode::NO_CONTENT.to_string()) + .build() + ) + .context(req.context) + .build() } else if valid_ids.is_empty() { - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .body(router::body::from_bytes("suscriptions don't exist")) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .error(graphql::Error::builder() + .message("subscriptions don't exist") + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build() } else { let (id, verifier) = if invalid_ids.contains(&id) { (id, verifier) @@ -610,19 +622,19 @@ impl Service for CallbackService { (new_id, verifier) }; - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .body(router::body::from_bytes( - serde_json::to_string_pretty(&InvalidIdsPayload{ - invalid_ids, - id, - verifier, - })?, - )) - .map_err(BoxError::from)?, - context: req.context, - }) + router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .error(graphql::Error::builder() + .message(serde_json::to_string_pretty(&InvalidIdsPayload{ + invalid_ids, + id, + verifier, + }).map_err(BoxError::from)?) + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build() } } CallbackPayload::Subscription(SubscriptionPayload::Complete { @@ -633,22 +645,26 @@ impl Service for CallbackService { let mut handle = match notify.subscribe(id.clone()).await { Ok(handle) => handle.into_sink(), Err(NotifyError::UnknownTopic) => { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .body(router::body::from_bytes("unknown topic")) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .error(graphql::Error::builder() + .message("unknown topic") + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build(); }, Err(err) => { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .body(router::body::from_bytes(err.to_string())) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .error(graphql::Error::builder() + .message(err.to_string()) + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build(); } }; u64_counter!( @@ -661,41 +677,45 @@ impl Service for CallbackService { if let Err(_err) = handle.send_sync( graphql::Response::builder().errors(errors).build(), ) { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .body(router::body::from_bytes("cannot send errors to the client")) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .error(graphql::Error::builder() + .message("cannot send errors to the client") + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build(); } } if let Err(_err) = notify.force_delete(id).await { - return Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::NOT_FOUND) - .body(router::body::from_bytes("cannot force delete")) - .map_err(BoxError::from)?, - context: req.context, - }); + return router::Response::error_builder() + .status_code(StatusCode::NOT_FOUND) + .error(graphql::Error::builder() + .message("cannot force delete") + .extension_code(StatusCode::NOT_FOUND.to_string()) + .build() + ) + .context(req.context) + .build(); } - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::ACCEPTED) - .body(router::body::empty()) - .map_err(BoxError::from)?, - context: req.context, - }) + + router::Response::error_builder() + .status_code(StatusCode::ACCEPTED) + .context(req.context) + .build() } } } - _ => Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::METHOD_NOT_ALLOWED) - .body(router::body::empty()) - .map_err(BoxError::from)?, - context: req.context, - }), + _ => router::Response::error_builder() + .status_code(StatusCode::METHOD_NOT_ALLOWED) + .error(graphql::Error::builder() + .message(String::default()) + .extension_code(StatusCode::METHOD_NOT_ALLOWED.to_string()) + .build() + ) + .context(req.context) + .build() } } .instrument(tracing::info_span!("subscription_callback")), @@ -722,15 +742,18 @@ fn ensure_id_consistency( ) -> Result<(), router::Response> { (id_from_path != id_from_body) .then(|| { - Err(router::Response { - response: http::Response::builder() - .status(StatusCode::BAD_REQUEST) - .body(router::body::from_bytes( - "id from url path and id from body are different", - )) - .expect("this body is valid"), - context: context.clone(), - }) + Err( + router::Response::error_builder() + .status_code(StatusCode::BAD_REQUEST) + .error(graphql::Error::builder() + .message("id from url path and id from body are different") + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build() + ) + .context(context.clone()) + .build() + .expect("this response is valid") + ) }) .unwrap_or_else(|| Ok(())) } diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs index 4d3791e214..2cec78a385 100644 --- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs +++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs @@ -186,14 +186,13 @@ impl Service for PrometheusService { // Let's remove any problems they may have created for us. let stats = String::from_utf8_lossy(&result); let modified_stats = stats.replace("_total_total", "_total"); - Ok(router::Response { - response: http::Response::builder() - .status(StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/plain; version=0.0.4") - .body(router::body::from_bytes(modified_stats)) - .map_err(BoxError::from)?, - context: req.context, - }) + + router::Response::builder() + .status_code(StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/plain; version=0.0.4") + .data(modified_stats) + .context(req.context) + .build() }) } } diff --git a/apollo-router/src/services/layers/static_page.rs b/apollo-router/src/services/layers/static_page.rs index a6ec6e91a5..7955ed88d6 100644 --- a/apollo-router/src/services/layers/static_page.rs +++ b/apollo-router/src/services/layers/static_page.rs @@ -59,17 +59,15 @@ where let res = if req.router_request.method() == Method::GET && accepts_html(req.router_request.headers()) { - let response = http::Response::builder() - .header( - CONTENT_TYPE, - HeaderValue::from_static(mime::TEXT_HTML_UTF_8.as_ref()), - ) - .body(router::body::from_bytes(page.clone())) - .unwrap(); - ControlFlow::Break(router::Response { - response, - context: req.context, - }) + ControlFlow::Break( + router::Response::builder() + .header(CONTENT_TYPE, + HeaderValue::from_static(mime::TEXT_HTML_UTF_8.as_ref())) + .data(serde_json_bytes::Value::from_bytes(page.clone()).map_err(BoxError::from)?) + .context(req.context) + .build() + .unwrap() + ) } else { ControlFlow::Continue(req) }; diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index 3e9c3c0fc9..4dc27f5b67 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -238,10 +238,17 @@ impl Response { status_code: Option, headers: MultiMap, context: Context, - protocol_mode: Option ) -> Result { if !errors.is_empty() { - let context = Self::update_error_context(context, errors); + context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true)); + // This will ONLY capture errors if any were added during router service processing. + // We will avoid this path if no router service errors exist, even if errors were passed + // from the supergraph service, because that path builds the router::Response using the + // constructor instead of new(). This is ok because we only need this context to count + // errors introduced in the router service. + context + .insert(ROUTER_RESPONSE_ERRORS, errors.clone()) + .expect("Unable to serialize router response errors list for context"); } // Build a response let b = graphql::Response::builder() @@ -267,24 +274,6 @@ impl Response { let body_string = serde_json::to_string(&res)?; let body = body::from_bytes(body_string.clone()); - let body = match protocol_mode { - None => { body } - Some(mode) => { - context - .extensions() - .with_lock(|lock| lock.insert(mode)); - let response_multipart = match mode { - ProtocolMode::Subscription => { - // TODO RouterBody doesn't implement stream trait - crate::protocols::multipart::Multipart::new(body, mode) - } - ProtocolMode::Defer => { - crate::protocols::multipart::Multipart::new(once(ready(res)).chain(body), mode) - } - }; - body::from_result_stream(response_multipart) - } - }; let response = builder.body(body)?; // Stash the body in the extensions so we can access it later let mut response = Self { response, context }; @@ -295,27 +284,20 @@ impl Response { #[builder(visibility = "pub")] fn parts_new( - response: http::Response, + parts: Parts, + body: Body, context: Context, + body_to_stash: Option, ) -> Result { - if !response.body(). { - Self::update_error_context() + let response = http::Response::from_parts(parts, body); + let mut res = Self { + response, + context + }; + if body_to_stash.is_some() { + res.stash_the_body_in_extensions(body_to_stash.unwrap()) } - let response = http::Response::from_parts(parts, body); - } - - fn update_error_context(context: Context, errors: Vec) -> Context { - context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true)); - // This will ONLY capture errors if any were added during router service processing. - // We will avoid this path if no router service errors exist, even if errors were passed - // from the supergraph service, because that path builds the router::Response using the - // constructor instead of new(). This is ok because we only need this context to count - // errors introduced in the router service. - context - .insert(ROUTER_RESPONSE_ERRORS, errors.clone()) - .expect("Unable to serialize router response errors list for context"); - - context + Ok(res) } /// This is the constructor (or builder) to use when constructing a Response that represents a global error. diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs index 61616b9d55..e42cecc838 100644 --- a/apollo-router/src/services/router/service.rs +++ b/apollo-router/src/services/router/service.rs @@ -291,8 +291,7 @@ impl RouterService { match body.next().await { None => { tracing::error!("router service is not available to process request",); - // TODO Ideally this would be done as a different from_parts builder - Ok(router::Response::builder() + router::Response::error_builder() .error( graphql::Error::builder() .message(String::from("router service is not available to process request")) @@ -300,28 +299,31 @@ impl RouterService { .build(), ) .status_code(StatusCode::SERVICE_UNAVAILABLE) - .data(json!({"message": "router service is not available to process request"})) .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) .context(context) .build() - .expect("cannot fail") - ) } Some(response) => { if !response.has_next.unwrap_or(false) && !response.subscribed.unwrap_or(false) && (accepts_json || accepts_wildcard) { - // TODO we will now ALWAYS stash body - // TODO we are no longer generating a body ser trace - router::Response::builder() - .and_label(response.label.clone()) - .data(response.data.clone()) - .and_path(response.path.clone()) - .errors(response.errors.clone()) - .extensions(response.extensions.clone()) - .headers(parts.headers.clone().into()) - .context(context.clone()) + let body: Result = tracing::trace_span!("serialize_response") + .in_scope(|| { + let body = serde_json::to_string(&response)?; + Ok(body) + }); + let body = body?; + // XXX(@goto-bus-stop): I strongly suspect that it would be better to move this into its own layer. + let display_router_response = context + .extensions() + .with_lock(|ext| ext.get::().is_some()); + + router::Response::parts_builder() + .parts(parts) + .body(router::body::from_bytes(body.clone())) + .and_body_to_stash(if display_router_response { Some(body) } else { None }) + .context(context) .build() } else if accepts_multipart_defer || accepts_multipart_subscription { // Useful when you're using a proxy like nginx which enable proxy_buffering by default (http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_buffering) @@ -338,16 +340,21 @@ impl RouterService { } else { ProtocolMode::Defer }; + context + .extensions() + .with_lock(|lock| lock.insert(protocol_mode)); + + let response_multipart = match protocol_mode { + ProtocolMode::Subscription => Multipart::new(body, protocol_mode), + ProtocolMode::Defer => { + Multipart::new(once(ready(response)).chain(body), protocol_mode) + } + }; - router::Response::builder() - .and_label(response.label.clone()) - .data(response.data.clone()) - .and_path(response.path.clone()) - .errors(response.errors.clone()) - .extensions(response.extensions.clone()) - .headers(parts.headers.clone().into()) - .context(context.clone()) - .protocol_mode(protocol_mode) + RouterResponse::parts_builder() + .parts(parts) + .body(router::body::from_result_stream(response_multipart)) + .context(context) .build() } else { // this should be unreachable due to a previous check, but just to be sure... From cc4fd6c4aad8e0ffda6e071ee013b936e3e86feb Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Tue, 3 Jun 2025 15:33:06 -0400 Subject: [PATCH 26/46] fix comment --- apollo-router/src/services/router.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index 4dc27f5b67..f6599b0dd6 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -241,11 +241,12 @@ impl Response { ) -> Result { if !errors.is_empty() { context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true)); - // This will ONLY capture errors if any were added during router service processing. - // We will avoid this path if no router service errors exist, even if errors were passed - // from the supergraph service, because that path builds the router::Response using the - // constructor instead of new(). This is ok because we only need this context to count - // errors introduced in the router service. + // This is ONLY guaranteed to capture errors if any were added during router service + // processing. We will sometimes avoid this path if no router service errors exist, even + // if errors were passed from the supergraph service, because that path builds the + // router::Response using parts_new(). This is ok because we only need this context to + // count errors introduced in the router service; however, it means that we handle error + // counting differently in this layer than others. context .insert(ROUTER_RESPONSE_ERRORS, errors.clone()) .expect("Unable to serialize router response errors list for context"); From 8c1c4d7ac98bccd395c3e8473f7b7059ac090619 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 4 Jun 2025 11:00:30 -0400 Subject: [PATCH 27/46] Use apollo id for error counting --- apollo-router/src/graphql/mod.rs | 28 +++++----- .../src/plugins/telemetry/error_counter.rs | 55 +++++++++---------- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 3144f8aae9..4c3005fe6c 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -72,17 +72,8 @@ pub struct Error { #[serde(default, skip_serializing_if = "Object::is_empty")] pub extensions: Object, - // TODO do we need to implement a random one for default? /// A unique identifier for this error apollo_id: Uuid - - // TODO add attr to mark as counted, skip serialize? - // TODO would include_subgraph_errors or a customer's plugin change this? - // TODO Does serialization happen btwn layers (which would break this also)? - // TODO if customer's are the only cause then maybe we can warn that this could double count. - // TODO OR make an "apollo error ID" that is serialized. Use this as hash key in context - // TODO make on init, public getter - // TODO OR can we store a list of errors in context. Assumes that Eq is actually strict equality } // Implement getter and getter_mut to not use pub field directly @@ -203,13 +194,14 @@ impl Error { ) .transpose()?; - Ok(Error { + Ok(Self::new( message, locations, path, + None, extensions, - apollo_id: apollo_id.unwrap_or_else(Uuid::new_v4) - }) + apollo_id + )) } pub(crate) fn from_value_completion_value(value: &Value) -> Option { @@ -239,13 +231,19 @@ impl Error { .and_then(|p: &serde_json_bytes::Value| -> Option { serde_json_bytes::from_value(p.clone()).ok() }); - Some(Error { + let apollo_id = value_completion + .get("apolloId") + .and_then(|id| id.as_str()) + .map(|id| Uuid::from_str(id).ok())?; + + Some(Self::new( message, locations, path, + None, extensions, - apollo_id: Uuid::new_v4() - }) + apollo_id, // TODO confirm this exists from serialized error + )) } pub fn extension_code(&self) -> Option { diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 44b7108c94..bdc3c528df 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; use std::sync::Arc; - +use ahash::{HashSet, HashSetExt}; use futures::StreamExt; use futures::future::ready; use futures::stream::once; use serde::de::DeserializeOwned; - +use uuid::Uuid; use crate::Context; use crate::apollo_studio_interop::UsageReporting; use crate::context::{COUNTED_ERRORS, ROUTER_RESPONSE_ERRORS}; @@ -35,7 +35,7 @@ pub(crate) async fn count_subgraph_errors( if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); // Refresh context with the most up-to-date list of errors - let _ = context.insert(COUNTED_ERRORS, to_map(&response_body.errors)); + let _ = context.insert(COUNTED_ERRORS, to_set(&response_body.errors)); } SubgraphResponse { context: response.context, @@ -94,7 +94,7 @@ pub(crate) async fn count_supergraph_errors( if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); // Refresh context with the most up-to-date list of errors - let _ = context.insert(COUNTED_ERRORS, to_map(&response_body.errors)); + let _ = context.insert(COUNTED_ERRORS, to_set(&response_body.errors)); } } }); @@ -126,7 +126,7 @@ pub(crate) async fn count_execution_errors( if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); // Refresh context with the most up-to-date list of errors - let _ = context.insert(COUNTED_ERRORS, to_map(&response_body.errors)); + let _ = context.insert(COUNTED_ERRORS, to_set(&response_body.errors)); } }); @@ -151,14 +151,17 @@ pub(crate) async fn count_router_errors( let context = response.context.clone(); let errors_config = errors_config.clone(); - // We look at context for our current errors instead of the existing config so that we don't - // have to do a full deserialization of the response + // We look at context for our current errors instead of the existing response to avoid a full + // response deserialization. let errors: Vec = unwrap_from_context(&context, ROUTER_RESPONSE_ERRORS); if !errors.is_empty() { count_operation_errors(&errors, &context, &errors_config); - // Refresh context ONLY when we have errors. This - // TODO don't overwrite, append? - let _ = context.insert(COUNTED_ERRORS, to_map(&errors)); + // Router layer handling is unique in that the list of new errors from context may not + // include errors we previously counted. Thus, we must combine the set of previously counted + // errors with the set of new errors here before adding to context. + let mut counted_errors: HashSet = unwrap_from_context(&context, COUNTED_ERRORS); + counted_errors.extend(errors.iter().map(Error::apollo_id)); + let _ = context.insert(COUNTED_ERRORS, counted_errors); } // TODO confirm the count_operation_error_codes() INVALID_ACCEPT_HEADER case is handled here @@ -169,16 +172,11 @@ pub(crate) async fn count_router_errors( } } -fn to_map(errors: &[Error]) -> HashMap { - let mut map: HashMap = HashMap::new(); - errors.iter().for_each(|error| { - // TODO hash the full error more uniquely - map.entry(error.extension_code().unwrap_or_default()) - .and_modify(|count| *count += 1) - .or_insert(1); - }); - - map +fn to_set(errors: &[Error]) -> HashSet { + errors + .iter() + .map(Error::apollo_id) + .collect() } fn count_operation_errors( @@ -189,7 +187,7 @@ fn count_operation_errors( let _id_str = errors[0].apollo_id().to_string(); // TODO DEBUG REMOVE let _msg_str = errors[0].message.clone(); // TODO DEBUG REMOVE - let previously_counted_errors_map: HashMap = + let previously_counted_errors_map: HashSet = unwrap_from_context(context, COUNTED_ERRORS); let mut operation_id: String = unwrap_from_context(context, APOLLO_OPERATION_ID); @@ -215,21 +213,16 @@ fn count_operation_errors( } // TODO how do we account for redacted errors when comparing? Likely skip them completely (they will have been counted with correct codes in subgraph layer) - let mut diff_map = previously_counted_errors_map.clone(); + // TODO ^This might not matter now that we're using apollo_id for error in errors { - let code = error.extension_code().unwrap_or_default(); + let apollo_id = error.apollo_id(); // If we already counted this error in a previous layer, then skip counting it again - if let Some(count) = diff_map.get_mut(&code) { - *count = count.saturating_sub(1); - if *count == 0 { - diff_map.remove(&code); - } + if previously_counted_errors_map.contains(&apollo_id) { continue; } - // If we haven't seen this error before, or we see more occurrences than we've counted - // before, then count the error + // If we haven't seen this error before, then count it let service = error .extensions .get("service") @@ -256,6 +249,8 @@ fn count_operation_errors( ) }; + let code = error.extension_code().unwrap_or_default(); + if send_otlp_errors { let severity_str = severity .unwrap_or(tracing::Level::ERROR.as_str()) From e91ec8691778ee80ab94f9e2141e783857aaea3e Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 4 Jun 2025 13:58:13 -0400 Subject: [PATCH 28/46] fix test error comparisons --- apollo-router/src/graphql/response.rs | 63 +++++++++++-------- .../layers/allow_only_http_post_mutations.rs | 34 +++++----- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/apollo-router/src/graphql/response.rs b/apollo-router/src/graphql/response.rs index 1cb220ba9a..ab9eeca2c6 100644 --- a/apollo-router/src/graphql/response.rs +++ b/apollo-router/src/graphql/response.rs @@ -330,8 +330,16 @@ mod tests { .to_string() .as_str(), ); + let response = result.unwrap(); + let actual_error_apollo_id = response + .clone() + .errors + .get(0) + .unwrap() + .apollo_id + .clone(); assert_eq!( - result.unwrap(), + response, Response::builder() .data(json!({ "hero": { @@ -352,18 +360,16 @@ mod tests { ] } })) - .errors(vec![Error { - message: "Name for character with ID 1002 could not be fetched.".into(), - locations: vec!(Location { line: 6, column: 7 }), - path: Some(Path::from("hero/heroFriends/1/name")), - extensions: bjson!({ - "error-extension": 5, - }) - .as_object() - .cloned() - .unwrap(), - // TODO need to ignore the apollo id for comparison - }]) + .errors(vec![ + Error::builder() + .message("Name for character with ID 1002 could not be fetched.") + .locations(vec!(Location { line: 6, column: 7 })) + .path(Path::from("hero/heroFriends/1/name")) + .extensions(bjson!({ "error-extension": 5, }).as_object().cloned().unwrap()) + // Use actual's generated UUID for comparison + .apollo_id(actual_error_apollo_id) + .build() + ]) .extensions( bjson!({ "response-extension": 3, @@ -420,8 +426,16 @@ mod tests { .to_string() .as_str(), ); + let response = result.unwrap(); + let actual_error_apollo_id = response + .clone() + .errors + .get(0) + .unwrap() + .apollo_id + .clone(); assert_eq!( - result.unwrap(), + response, Response::builder() .label("part".to_owned()) .data(json!({ @@ -444,17 +458,16 @@ mod tests { } })) .path(Path::from("hero/heroFriends/1/name")) - .errors(vec![Error { - message: "Name for character with ID 1002 could not be fetched.".into(), - locations: vec!(Location { line: 6, column: 7 }), - path: Some(Path::from("hero/heroFriends/1/name")), - extensions: bjson!({ - "error-extension": 5, - }) - .as_object() - .cloned() - .unwrap() - }]) + .errors(vec![ + Error::builder() + .message("Name for character with ID 1002 could not be fetched.") + .locations(vec!(Location { line: 6, column: 7 })) + .path(Path::from("hero/heroFriends/1/name")) + .extensions(bjson!({ "error-extension": 5, }).as_object().cloned().unwrap()) + // Use actual's generated UUID for comparison + .apollo_id(actual_error_apollo_id) + .build() + ]) .extensions( bjson!({ "response-extension": 3, diff --git a/apollo-router/src/services/layers/allow_only_http_post_mutations.rs b/apollo-router/src/services/layers/allow_only_http_post_mutations.rs index f16bf23ddc..df9526586a 100644 --- a/apollo-router/src/services/layers/allow_only_http_post_mutations.rs +++ b/apollo-router/src/services/layers/allow_only_http_post_mutations.rs @@ -149,7 +149,7 @@ mod forbid_http_get_mutations_tests { use tower::ServiceExt; use super::*; - use crate::Context; + use crate::{json_ext, Context}; use crate::error::Error; use crate::graphql::Response; use crate::plugin::test::MockSupergraphService; @@ -239,17 +239,6 @@ mod forbid_http_get_mutations_tests { #[tokio::test] async fn it_doesnt_let_non_http_post_mutations_pass_through() { - let expected_error = Error { - message: "Mutations can only be sent over HTTP POST".to_string(), - locations: Default::default(), - path: Default::default(), - extensions: serde_json_bytes::json!({ - "code": "MUTATION_FORBIDDEN" - }) - .as_object() - .unwrap() - .to_owned(), - }; let expected_status = StatusCode::METHOD_NOT_ALLOWED; let expected_allow_header = "POST"; @@ -276,21 +265,26 @@ mod forbid_http_get_mutations_tests { let mut service_stack = AllowOnlyHttpPostMutationsLayer::default().layer(mock_service); let services = service_stack.ready().await.unwrap(); - let mut actual_error = services.call(request).await.unwrap(); + let mut error_response = services.call(request).await.unwrap(); + let response = error_response.next_response().await.unwrap(); + let actual_error = response.errors[0].clone(); - assert_eq!(expected_status, actual_error.response.status()); + let expected_error = Error::builder() + .message( "Mutations can only be sent over HTTP POST".to_string()) + .extension_code("MUTATION_FORBIDDEN") + // Take UUID from actual to ensure equality + .apollo_id(actual_error.apollo_id()) + .build(); + + assert_eq!(expected_status, error_response.response.status()); assert_eq!( expected_allow_header, - actual_error.response.headers().get("Allow").unwrap() + error_response.response.headers().get("Allow").unwrap() ); - assert_error_matches(&expected_error, actual_error.next_response().await.unwrap()); + assert_eq!(actual_error, expected_error); } } - fn assert_error_matches(expected_error: &Error, response: Response) { - assert_eq!(&response.errors[0], expected_error); - } - fn create_request(method: Method, operation_kind: OperationKind) -> SupergraphRequest { let query = match operation_kind { OperationKind::Query => { From 50528ef9a75f329ac3bc117cc7da983e7d3b0d1e Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 4 Jun 2025 15:54:03 -0400 Subject: [PATCH 29/46] lint fixes --- apollo-router/src/axum_factory/listeners.rs | 3 +- apollo-router/src/graphql/mod.rs | 32 +++++------ apollo-router/src/graphql/response.rs | 38 ++++++------- .../plugins/cache/invalidation_endpoint.rs | 56 +++++++++++-------- apollo-router/src/plugins/fleet_detector.rs | 41 +++++++------- apollo-router/src/plugins/rhai/execution.rs | 4 +- apollo-router/src/plugins/rhai/router.rs | 4 +- apollo-router/src/plugins/rhai/subgraph.rs | 4 +- apollo-router/src/plugins/rhai/supergraph.rs | 4 +- apollo-router/src/plugins/subscription.rs | 19 +++---- .../src/plugins/telemetry/error_counter.rs | 23 ++++---- apollo-router/src/plugins/telemetry/mod.rs | 4 +- apollo-router/src/query_planner/fetch.rs | 29 +++++----- .../layers/allow_only_http_post_mutations.rs | 5 +- apollo-router/src/services/layers/apq.rs | 41 +++++++------- .../src/services/layers/static_page.rs | 13 +++-- apollo-router/src/services/router.rs | 23 +++----- apollo-router/src/services/router/service.rs | 11 +++- .../src/services/supergraph/service.rs | 2 +- apollo-router/src/spec/query.rs | 19 ++++--- 20 files changed, 189 insertions(+), 186 deletions(-) diff --git a/apollo-router/src/axum_factory/listeners.rs b/apollo-router/src/axum_factory/listeners.rs index edc60bad90..e49a0b0def 100644 --- a/apollo-router/src/axum_factory/listeners.rs +++ b/apollo-router/src/axum_factory/listeners.rs @@ -522,7 +522,6 @@ mod tests { use crate::configuration::Sandbox; use crate::configuration::Supergraph; use crate::services::router; - use crate::services::router::body; #[tokio::test] async fn it_makes_sure_same_listenaddrs_are_accepted() { @@ -555,7 +554,7 @@ mod tests { .data("this is a test") .context(req.context) .build() - .unwrap() + .unwrap(), ) }) .boxed(); diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 4c3005fe6c..9f72093fe3 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -7,6 +7,7 @@ mod visitor; use std::fmt; use std::pin::Pin; use std::str::FromStr; + use apollo_compiler::response::GraphQLError as CompilerExecutionError; use apollo_compiler::response::ResponseDataPathSegment; use futures::Stream; @@ -73,7 +74,7 @@ pub struct Error { pub extensions: Object, /// A unique identifier for this error - apollo_id: Uuid + apollo_id: Uuid, } // Implement getter and getter_mut to not use pub field directly @@ -112,7 +113,7 @@ impl Error { /// Sets the "code" in the extension map. Will be ignored if extension already has this key /// set. /// - /// * `.apollo_id(impl Into<`[`UUID`]`>)` + /// * `.apollo_id(impl Into<`[`Uuid`]`>)` /// Optional. /// Sets the unique identifier for this Error. This should only be used in cases of /// deserialization or testing. If not given, the ID will be auto-generated. @@ -127,9 +128,9 @@ impl Error { extension_code: Option, // Skip the `Object` type alias in order to use buildstructor’s map special-casing mut extensions: JsonMap, - apollo_id: Option + apollo_id: Option, ) -> Self { - if let Some(code) = extension_code { + if let Some(code) = extension_code { extensions .entry("code") .or_insert(Value::String(ByteString::from(code))); @@ -139,7 +140,7 @@ impl Error { locations, path, extensions, - apollo_id: apollo_id.unwrap_or_else(|| Uuid::new_v4()) + apollo_id: apollo_id.unwrap_or_else(Uuid::new_v4), } } @@ -187,20 +188,15 @@ impl Error { .map_err(|err| MalformedResponseError { reason: format!("invalid `apolloId` within error: {}", err), })? - .map(|s| + .map(|s| { Uuid::from_str(s.as_str()).map_err(|err| MalformedResponseError { reason: format!("invalid `apolloId` within error: {}", err), }) - ) + }) .transpose()?; Ok(Self::new( - message, - locations, - path, - None, - extensions, - apollo_id + message, locations, path, None, extensions, apollo_id, )) } @@ -237,15 +233,12 @@ impl Error { .map(|id| Uuid::from_str(id).ok())?; Some(Self::new( - message, - locations, - path, - None, - extensions, + message, locations, path, None, extensions, apollo_id, // TODO confirm this exists from serialized error )) } + /// Extract the error code from [`Error::extensions`] as a String if it is set. pub fn extension_code(&self) -> Option { self.extensions.get("code").and_then(|c| match c { Value::String(s) => Some(s.as_str().to_owned()), @@ -255,6 +248,7 @@ impl Error { }) } + /// Retrieve the internal Apollo unique ID for this error pub fn apollo_id(&self) -> Uuid { self.apollo_id } @@ -337,7 +331,7 @@ impl From for Error { locations, path, extensions, - apollo_id: Uuid::new_v4() + apollo_id: Uuid::new_v4(), } } } diff --git a/apollo-router/src/graphql/response.rs b/apollo-router/src/graphql/response.rs index ab9eeca2c6..89600efaf6 100644 --- a/apollo-router/src/graphql/response.rs +++ b/apollo-router/src/graphql/response.rs @@ -269,9 +269,7 @@ mod tests { .message("Something terrible happened!") .path(Path::from("here")) .build(), - Error::builder() - .message("I mean for real") - .build(), + Error::builder().message("I mean for real").build(), ]; let mut errors_to_append = vec![ @@ -279,9 +277,7 @@ mod tests { .message("Something terrible happened!") .path(Path::from("here")) .build(), - Error::builder() - .message("I mean for real") - .build(), + Error::builder().message("I mean for real").build(), ]; let mut response = Response::builder().build(); @@ -331,13 +327,7 @@ mod tests { .as_str(), ); let response = result.unwrap(); - let actual_error_apollo_id = response - .clone() - .errors - .get(0) - .unwrap() - .apollo_id - .clone(); + let actual_error_apollo_id = response.clone().errors.first().unwrap().apollo_id; assert_eq!( response, Response::builder() @@ -365,7 +355,12 @@ mod tests { .message("Name for character with ID 1002 could not be fetched.") .locations(vec!(Location { line: 6, column: 7 })) .path(Path::from("hero/heroFriends/1/name")) - .extensions(bjson!({ "error-extension": 5, }).as_object().cloned().unwrap()) + .extensions( + bjson!({ "error-extension": 5, }) + .as_object() + .cloned() + .unwrap() + ) // Use actual's generated UUID for comparison .apollo_id(actual_error_apollo_id) .build() @@ -427,13 +422,7 @@ mod tests { .as_str(), ); let response = result.unwrap(); - let actual_error_apollo_id = response - .clone() - .errors - .get(0) - .unwrap() - .apollo_id - .clone(); + let actual_error_apollo_id = response.clone().errors.first().unwrap().apollo_id; assert_eq!( response, Response::builder() @@ -463,7 +452,12 @@ mod tests { .message("Name for character with ID 1002 could not be fetched.") .locations(vec!(Location { line: 6, column: 7 })) .path(Path::from("hero/heroFriends/1/name")) - .extensions(bjson!({ "error-extension": 5, }).as_object().cloned().unwrap()) + .extensions( + bjson!({ "error-extension": 5, }) + .as_object() + .cloned() + .unwrap() + ) // Use actual's generated UUID for comparison .apollo_id(actual_error_apollo_id) .build() diff --git a/apollo-router/src/plugins/cache/invalidation_endpoint.rs b/apollo-router/src/plugins/cache/invalidation_endpoint.rs index 6b0c6969f7..191135a539 100644 --- a/apollo-router/src/plugins/cache/invalidation_endpoint.rs +++ b/apollo-router/src/plugins/cache/invalidation_endpoint.rs @@ -18,8 +18,9 @@ use tracing_futures::Instrument; use super::entity::Subgraph; use super::invalidation::Invalidation; use super::invalidation::InvalidationOrigin; -use crate::{graphql, ListenAddr}; +use crate::ListenAddr; use crate::configuration::subgraph::SubgraphConfiguration; +use crate::graphql; use crate::plugins::cache::invalidation::InvalidationRequest; use crate::plugins::telemetry::consts::OTEL_STATUS_CODE; use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_ERROR; @@ -91,7 +92,7 @@ impl InvalidationService { } } -impl Service for InvalidationService { +impl Service for InvalidationService { type Response = router::Response; type Error = BoxError; type Future = BoxFuture<'static, Result>; @@ -110,10 +111,11 @@ impl Service for InvalidationService { Span::current().record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); return router::Response::error_builder() .status_code(StatusCode::UNAUTHORIZED) - .error(graphql::Error::builder() - .message(String::from("Missing authorization header")) - .extension_code(StatusCode::UNAUTHORIZED.to_string()) - .build() + .error( + graphql::Error::builder() + .message(String::from("Missing authorization header")) + .extension_code(StatusCode::UNAUTHORIZED.to_string()) + .build(), ) .context(req.context) .build(); @@ -160,10 +162,13 @@ impl Service for InvalidationService { .record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); return router::Response::error_builder() .status_code(StatusCode::UNAUTHORIZED) - .error(graphql::Error::builder() - .message("Invalid authorization header") - .extension_code(StatusCode::UNAUTHORIZED.to_string()) - .build() + .error( + graphql::Error::builder() + .message("Invalid authorization header") + .extension_code( + StatusCode::UNAUTHORIZED.to_string(), + ) + .build(), ) .context(req.context) .build(); @@ -176,17 +181,20 @@ impl Service for InvalidationService { Ok(count) => router::Response::builder() .data(json!({ "count": count })) .status_code(StatusCode::ACCEPTED) - .context( req.context) + .context(req.context) .build(), Err(err) => { Span::current() .record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); router::Response::error_builder() .status_code(StatusCode::BAD_REQUEST) - .error(graphql::Error::builder() - .message(err.to_string()) - .extension_code(StatusCode::BAD_REQUEST.to_string()) - .build() + .error( + graphql::Error::builder() + .message(err.to_string()) + .extension_code( + StatusCode::BAD_REQUEST.to_string(), + ) + .build(), ) .context(req.context) .build() @@ -197,10 +205,11 @@ impl Service for InvalidationService { Span::current().record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); router::Response::error_builder() .status_code(StatusCode::BAD_REQUEST) - .error(graphql::Error::builder() - .message(err) - .extension_code(StatusCode::BAD_REQUEST.to_string()) - .build() + .error( + graphql::Error::builder() + .message(err) + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build(), ) .context(req.context) .build() @@ -211,10 +220,11 @@ impl Service for InvalidationService { Span::current().record(OTEL_STATUS_CODE, OTEL_STATUS_CODE_ERROR); router::Response::error_builder() .status_code(StatusCode::METHOD_NOT_ALLOWED) - .error(graphql::Error::builder() - .message("".to_string()) - .extension_code(StatusCode::METHOD_NOT_ALLOWED.to_string()) - .build() + .error( + graphql::Error::builder() + .message("".to_string()) + .extension_code(StatusCode::METHOD_NOT_ALLOWED.to_string()) + .build(), ) .context(req.context) .build() diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index ffeab20c57..8b82fac8ff 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -272,15 +272,16 @@ impl PluginPrivate for FleetDetector { // Count the number of response bytes from the router to clients .map_response(move |res: router::Response| { let (parts, body) = res.response.into_parts(); - let body = router::body::from_result_stream(body.into_data_stream().inspect(|res| { - if let Ok(bytes) = res { - u64_counter!( - "apollo.router.operations.response_size", - "Total number of response bytes to clients", - bytes.len() as u64 - ); - } - })); + let body = + router::body::from_result_stream(body.into_data_stream().inspect(|res| { + if let Ok(bytes) = res { + u64_counter!( + "apollo.router.operations.response_size", + "Total number of response bytes to clients", + bytes.len() as u64 + ); + } + })); router::Response::parts_builder() .parts(parts) .body(body) @@ -534,14 +535,14 @@ mod tests { use http::StatusCode; use tower::Service as _; - use crate::graphql; + use super::*; + use crate::graphql; use crate::metrics::FutureMetricsExt as _; use crate::metrics::collect_metrics; use crate::metrics::test_utils::MetricType; use crate::plugin::test::MockHttpClientService; use crate::plugin::test::MockRouterService; - use crate::query_planner::build_operation_with_aliasing; use crate::services::router::Body; #[tokio::test] @@ -562,10 +563,11 @@ mod tests { .context(req.context) .status_code(StatusCode::BAD_REQUEST) .header("content-type", "application/json") - .error(graphql::Error::builder() - .message("bad request") - .extension_code(StatusCode::BAD_REQUEST.to_string()) - .build() + .error( + graphql::Error::builder() + .message("bad request") + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build(), ) .build() }); @@ -623,10 +625,11 @@ mod tests { .context(req.context) .status_code(StatusCode::BAD_REQUEST) .header("content-type", "application/json") - .error(graphql::Error::builder() - .message("bad request") - .extension_code(StatusCode::BAD_REQUEST.to_string()) - .build() + .error( + graphql::Error::builder() + .message("bad request") + .extension_code(StatusCode::BAD_REQUEST.to_string()) + .build(), ) .build() }); diff --git a/apollo-router/src/plugins/rhai/execution.rs b/apollo-router/src/plugins/rhai/execution.rs index 621df1dfa7..5af1fabc93 100644 --- a/apollo-router/src/plugins/rhai/execution.rs +++ b/apollo-router/src/plugins/rhai/execution.rs @@ -31,7 +31,7 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .context(context) .status_code(error_details.status) @@ -57,7 +57,7 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .status_code(error_details.status) .context(context) diff --git a/apollo-router/src/plugins/rhai/router.rs b/apollo-router/src/plugins/rhai/router.rs index 0423399fb4..06d9faa911 100644 --- a/apollo-router/src/plugins/rhai/router.rs +++ b/apollo-router/src/plugins/rhai/router.rs @@ -33,7 +33,7 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .context(context) .status_code(error_details.status) @@ -62,7 +62,7 @@ pub(super) fn response_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .status_code(error_details.status) .context(context) diff --git a/apollo-router/src/plugins/rhai/subgraph.rs b/apollo-router/src/plugins/rhai/subgraph.rs index 1f292a75f6..17ecc41405 100644 --- a/apollo-router/src/plugins/rhai/subgraph.rs +++ b/apollo-router/src/plugins/rhai/subgraph.rs @@ -29,7 +29,7 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .context(context) .status_code(error_details.status) @@ -57,7 +57,7 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .status_code(error_details.status) .context(context) diff --git a/apollo-router/src/plugins/rhai/supergraph.rs b/apollo-router/src/plugins/rhai/supergraph.rs index f4d7120534..4b7ac475dc 100644 --- a/apollo-router/src/plugins/rhai/supergraph.rs +++ b/apollo-router/src/plugins/rhai/supergraph.rs @@ -31,7 +31,7 @@ pub(super) fn request_failure( .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .context(context) .status_code(error_details.status) @@ -57,7 +57,7 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) -> .errors(vec![ Error::builder() .message(error_details.message.unwrap_or_default()) - .build() + .build(), ]) .status_code(error_details.status) .context(context) diff --git a/apollo-router/src/plugins/subscription.rs b/apollo-router/src/plugins/subscription.rs index 744d9fa3d0..1f79ff257d 100644 --- a/apollo-router/src/plugins/subscription.rs +++ b/apollo-router/src/plugins/subscription.rs @@ -742,18 +742,17 @@ fn ensure_id_consistency( id_from_body: &str, ) -> Result<(), router::Response> { if id_from_path != id_from_body { - Err( - router::Response::error_builder() - .status_code(StatusCode::BAD_REQUEST) - .error(graphql::Error::builder() + Err(router::Response::error_builder() + .status_code(StatusCode::BAD_REQUEST) + .error( + graphql::Error::builder() .message("id from url path and id from body are different") .extension_code(StatusCode::BAD_REQUEST.to_string()) - .build() - ) - .context(context.clone()) - .build() - .expect("this response is valid") - ) + .build(), + ) + .context(context.clone()) + .build() + .expect("this response is valid")) } else { Ok(()) } diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index bdc3c528df..b5bb3381d5 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -1,27 +1,30 @@ -use std::collections::HashMap; use std::sync::Arc; -use ahash::{HashSet, HashSetExt}; + +use ahash::HashSet; use futures::StreamExt; use futures::future::ready; use futures::stream::once; use serde::de::DeserializeOwned; use uuid::Uuid; + use crate::Context; use crate::apollo_studio_interop::UsageReporting; -use crate::context::{COUNTED_ERRORS, ROUTER_RESPONSE_ERRORS}; +use crate::context::COUNTED_ERRORS; use crate::context::OPERATION_KIND; use crate::context::OPERATION_NAME; +use crate::context::ROUTER_RESPONSE_ERRORS; use crate::graphql; use crate::graphql::Error; +use crate::plugins::content_negotiation::ClientRequestAccepts; use crate::plugins::telemetry::CLIENT_NAME; use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; use crate::query_planner::APOLLO_OPERATION_ID; -use crate::services::{router, ExecutionResponse, RouterResponse}; +use crate::services::ExecutionResponse; +use crate::services::RouterResponse; use crate::services::SubgraphResponse; use crate::services::SupergraphResponse; -use crate::plugins::content_negotiation::ClientRequestAccepts; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; pub(crate) async fn count_subgraph_errors( @@ -173,10 +176,7 @@ pub(crate) async fn count_router_errors( } fn to_set(errors: &[Error]) -> HashSet { - errors - .iter() - .map(Error::apollo_id) - .collect() + errors.iter().map(Error::apollo_id).collect() } fn count_operation_errors( @@ -187,8 +187,7 @@ fn count_operation_errors( let _id_str = errors[0].apollo_id().to_string(); // TODO DEBUG REMOVE let _msg_str = errors[0].message.clone(); // TODO DEBUG REMOVE - let previously_counted_errors_map: HashSet = - unwrap_from_context(context, COUNTED_ERRORS); + let previously_counted_errors_map: HashSet = unwrap_from_context(context, COUNTED_ERRORS); let mut operation_id: String = unwrap_from_context(context, APOLLO_OPERATION_ID); let mut operation_name: String = unwrap_from_context(context, OPERATION_NAME); @@ -304,6 +303,7 @@ mod test { use crate::graphql; use crate::json_ext::Path; use crate::metrics::FutureMetricsExt; + use crate::plugins::content_negotiation::ClientRequestAccepts; use crate::plugins::telemetry::CLIENT_NAME; use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; @@ -312,7 +312,6 @@ mod test { use crate::plugins::telemetry::error_counter::count_supergraph_errors; use crate::query_planner::APOLLO_OPERATION_ID; use crate::services::SupergraphResponse; - use crate::plugins::content_negotiation::ClientRequestAccepts; #[tokio::test] async fn test_count_errors_with_no_previously_counted_errors() { diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 6886e04ce3..68d9ccfdf2 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -113,7 +113,8 @@ use crate::plugins::telemetry::consts::OTEL_STATUS_CODE_OK; use crate::plugins::telemetry::consts::REQUEST_SPAN_NAME; use crate::plugins::telemetry::consts::ROUTER_SPAN_NAME; use crate::plugins::telemetry::dynamic_attribute::SpanDynAttribute; -use crate::plugins::telemetry::error_counter::{count_execution_errors, count_router_errors}; +use crate::plugins::telemetry::error_counter::count_execution_errors; +use crate::plugins::telemetry::error_counter::count_router_errors; use crate::plugins::telemetry::error_counter::count_subgraph_errors; use crate::plugins::telemetry::error_counter::count_supergraph_errors; use crate::plugins::telemetry::fmt_layer::create_fmt_layer; @@ -614,7 +615,6 @@ impl PluginPrivate for Telemetry { response = Ok(count_router_errors(resp, &config.apollo.errors).await); } - response } }, diff --git a/apollo-router/src/query_planner/fetch.rs b/apollo-router/src/query_planner/fetch.rs index 5febc22710..70e7ecebd1 100644 --- a/apollo-router/src/query_planner/fetch.rs +++ b/apollo-router/src/query_planner/fetch.rs @@ -362,23 +362,20 @@ impl FetchNode { for values_path in inverted_paths.get(*i).iter().flat_map(|v| v.iter()) { - errors.push(Error::builder() - .locations(error.locations.clone()) - // append to the entitiy's path the error's path without - //`_entities` and the index - .path( - Path::from_iter( - values_path - .0 - .iter() - .chain(&path.0[2..]) - .cloned(), + errors.push( + Error::builder() + .locations(error.locations.clone()) + // append to the entitiy's path the error's path without + //`_entities` and the index + .path(Path::from_iter( + values_path.0.iter().chain(&path.0[2..]).cloned(), + )) + .message(error.message.clone()) + .extension_code( + error.extension_code().unwrap_or_default(), ) - ) - .message(error.message.clone()) - .extension_code(error.extension_code().unwrap_or_default()) - .extensions(error.extensions.clone()) - .build() + .extensions(error.extensions.clone()) + .build(), ) } } diff --git a/apollo-router/src/services/layers/allow_only_http_post_mutations.rs b/apollo-router/src/services/layers/allow_only_http_post_mutations.rs index df9526586a..d3a153ea5d 100644 --- a/apollo-router/src/services/layers/allow_only_http_post_mutations.rs +++ b/apollo-router/src/services/layers/allow_only_http_post_mutations.rs @@ -149,9 +149,8 @@ mod forbid_http_get_mutations_tests { use tower::ServiceExt; use super::*; - use crate::{json_ext, Context}; + use crate::Context; use crate::error::Error; - use crate::graphql::Response; use crate::plugin::test::MockSupergraphService; use crate::query_planner::fetch::OperationKind; use crate::services::layers::query_analysis::ParsedDocumentInner; @@ -270,7 +269,7 @@ mod forbid_http_get_mutations_tests { let actual_error = response.errors[0].clone(); let expected_error = Error::builder() - .message( "Mutations can only be sent over HTTP POST".to_string()) + .message("Mutations can only be sent over HTTP POST".to_string()) .extension_code("MUTATION_FORBIDDEN") // Take UUID from actual to ensure equality .apollo_id(actual_error.apollo_id()) diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs index 89c4505fe1..950846b1f9 100644 --- a/apollo-router/src/services/layers/apq.rs +++ b/apollo-router/src/services/layers/apq.rs @@ -131,12 +131,13 @@ async fn apq_request( Ok(request) } else { tracing::debug!("apq: graphql request doesn't match provided sha256Hash"); - let errors = vec![crate::error::Error::builder() - .message("provided sha does not match query".to_string()) - .locations(Default::default()) - .path(Path::default()) - .extension_code("PERSISTED_QUERY_HASH_MISMATCH") - .build(), + let errors = vec![ + crate::error::Error::builder() + .message("provided sha does not match query".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_HASH_MISMATCH") + .build(), ]; let res = SupergraphResponse::builder() .status_code(StatusCode::BAD_REQUEST) @@ -162,12 +163,13 @@ async fn apq_request( } else { let _ = request.context.insert(PERSISTED_QUERY_CACHE_HIT, false); tracing::trace!("apq: cache miss"); - let errors = vec![crate::error::Error::builder() - .message("PersistedQueryNotFound".to_string()) - .locations(Default::default()) - .path(Path::default()) - .extension_code("PERSISTED_QUERY_NOT_FOUND") - .build(), + let errors = vec![ + crate::error::Error::builder() + .message("PersistedQueryNotFound".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_NOT_FOUND") + .build(), ]; let res = SupergraphResponse::builder() .data(Value::default()) @@ -213,12 +215,13 @@ async fn disabled_apq_request( .extensions .contains_key("persistedQuery") { - let errors = vec![crate::error::Error::builder() - .message("PersistedQueryNotSupported".to_string()) - .locations(Default::default()) - .path(Path::default()) - .extension_code("PERSISTED_QUERY_NOT_SUPPORTED") - .build(), + let errors = vec![ + crate::error::Error::builder() + .message("PersistedQueryNotSupported".to_string()) + .locations(Default::default()) + .path(Path::default()) + .extension_code("PERSISTED_QUERY_NOT_SUPPORTED") + .build(), ]; let res = SupergraphResponse::builder() .data(Value::default()) @@ -507,7 +510,7 @@ mod apq_tests { #[tokio::test] async fn return_not_supported_when_disabled() { - let expected_apq_miss_error =Error::builder() + let expected_apq_miss_error = Error::builder() .message("PersistedQueryNotSupported".to_string()) .locations(Default::default()) .path(Path::default()) diff --git a/apollo-router/src/services/layers/static_page.rs b/apollo-router/src/services/layers/static_page.rs index 7955ed88d6..97fdab7521 100644 --- a/apollo-router/src/services/layers/static_page.rs +++ b/apollo-router/src/services/layers/static_page.rs @@ -61,12 +61,17 @@ where { ControlFlow::Break( router::Response::builder() - .header(CONTENT_TYPE, - HeaderValue::from_static(mime::TEXT_HTML_UTF_8.as_ref())) - .data(serde_json_bytes::Value::from_bytes(page.clone()).map_err(BoxError::from)?) + .header( + CONTENT_TYPE, + HeaderValue::from_static(mime::TEXT_HTML_UTF_8.as_ref()), + ) + .data( + serde_json_bytes::Value::from_bytes(page.clone()) + .map_err(BoxError::from)?, + ) .context(req.context) .build() - .unwrap() + .unwrap(), ) } else { ControlFlow::Continue(req) diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index f6599b0dd6..a0809dc2c6 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -2,13 +2,12 @@ use std::any::Any; use std::mem; -use buildstructor::builder; + use bytes::Bytes; use displaydoc::Display; use futures::Stream; use futures::StreamExt; -use futures::future::{ready, Either}; -use futures::stream::once; +use futures::future::Either; use http::HeaderValue; use http::Method; use http::StatusCode; @@ -23,9 +22,9 @@ use serde_json_bytes::Map as JsonMap; use static_assertions::assert_impl_all; use thiserror::Error; use tower::BoxError; -use wiremock::matchers::body_string; + use self::body::RouterBody; -use super::{router, supergraph}; +use super::supergraph; use crate::Context; use crate::context::CONTAINS_GRAPHQL_ERROR; use crate::graphql; @@ -147,7 +146,6 @@ impl Request { } use crate::context::ROUTER_RESPONSE_ERRORS; -use crate::protocols::multipart::ProtocolMode; #[derive(Error, Display, Debug)] pub enum ParseError { @@ -289,14 +287,11 @@ impl Response { body: Body, context: Context, body_to_stash: Option, - ) -> Result { - let response = http::Response::from_parts(parts, body); - let mut res = Self { - response, - context - }; - if body_to_stash.is_some() { - res.stash_the_body_in_extensions(body_to_stash.unwrap()) + ) -> Result { + let response = http::Response::from_parts(parts, body); + let mut res = Self { response, context }; + if let Some(body_to_stash) = body_to_stash { + res.stash_the_body_in_extensions(body_to_stash) } Ok(res) } diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs index 80931f9263..903c5d33a0 100644 --- a/apollo-router/src/services/router/service.rs +++ b/apollo-router/src/services/router/service.rs @@ -23,7 +23,6 @@ use mime::APPLICATION_JSON; use multimap::MultiMap; use opentelemetry::KeyValue; use opentelemetry_semantic_conventions::trace::HTTP_REQUEST_METHOD; -use serde_json::json; use tower::BoxError; use tower::ServiceBuilder; use tower::ServiceExt; @@ -294,7 +293,9 @@ impl RouterService { router::Response::error_builder() .error( graphql::Error::builder() - .message(String::from("router service is not available to process request")) + .message(String::from( + "router service is not available to process request", + )) .extension_code(StatusCode::SERVICE_UNAVAILABLE.to_string()) .build(), ) @@ -322,7 +323,11 @@ impl RouterService { router::Response::parts_builder() .parts(parts) .body(router::body::from_bytes(body.clone())) - .and_body_to_stash(if display_router_response { Some(body) } else { None }) + .and_body_to_stash(if display_router_response { + Some(body) + } else { + None + }) .context(context) .build() } else if accepts_multipart_defer || accepts_multipart_subscription { diff --git a/apollo-router/src/services/supergraph/service.rs b/apollo-router/src/services/supergraph/service.rs index d579bc8961..2f17341ae5 100644 --- a/apollo-router/src/services/supergraph/service.rs +++ b/apollo-router/src/services/supergraph/service.rs @@ -170,7 +170,7 @@ impl Service for SupergraphService { crate::error::Error::builder() .message(error.to_string()) .extension_code("INTERNAL_SERVER_ERROR") - .build() + .build(), ]; Ok(SupergraphResponse::infallible_builder() diff --git a/apollo-router/src/spec/query.rs b/apollo-router/src/spec/query.rs index 8dd49c2a91..3c9ccd419a 100644 --- a/apollo-router/src/spec/query.rs +++ b/apollo-router/src/spec/query.rs @@ -374,8 +374,8 @@ impl Query { Error::builder() .message(message) .path(Path::from_response_slice(path)) - .build() - ); + .build(), + ); Err(InvalidValue) } else { @@ -797,13 +797,14 @@ impl Query { path.pop(); res? } else if field_type.is_non_null() { - parameters.errors.push(Error::builder() - .message(format!( - "Cannot return null for non-nullable field {}.{field_name_str}", - root_type_name - )) - .path(Path::from_response_slice(path)) - .build() + parameters.errors.push( + Error::builder() + .message(format!( + "Cannot return null for non-nullable field {}.{field_name_str}", + root_type_name + )) + .path(Path::from_response_slice(path)) + .build(), ); return Err(InvalidValue); } else { From b6ebe24e2ec867707db5ef78482d9f88e694ad51 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 4 Jun 2025 16:58:15 -0400 Subject: [PATCH 30/46] fix err counting tests --- .../src/plugins/telemetry/error_counter.rs | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index b5bb3381d5..e81ea8b56b 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -96,10 +96,10 @@ pub(crate) async fn count_supergraph_errors( // TODO can we combine this with above? if !response_body.errors.is_empty() { count_operation_errors(&response_body.errors, &context, &errors_config); - // Refresh context with the most up-to-date list of errors - let _ = context.insert(COUNTED_ERRORS, to_set(&response_body.errors)); } } + // Refresh context with the most up-to-date list of errors + let _ = context.insert(COUNTED_ERRORS, to_set(&response_body.errors)); }); let (first_response, rest) = StreamExt::into_future(stream).await; @@ -292,10 +292,11 @@ fn count_graphql_error(count: u64, code: String) { #[cfg(test)] mod test { + use std::collections::HashSet; use http::StatusCode; use serde_json_bytes::Value; use serde_json_bytes::json; - + use uuid::Uuid; use crate::Context; use crate::context::COUNTED_ERRORS; use crate::context::OPERATION_KIND; @@ -308,7 +309,7 @@ mod test { use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; - use crate::plugins::telemetry::error_counter::count_operation_errors; + use crate::plugins::telemetry::error_counter::{count_operation_errors, unwrap_from_context}; use crate::plugins::telemetry::error_counter::count_supergraph_errors; use crate::query_planner::APOLLO_OPERATION_ID; use crate::services::SupergraphResponse; @@ -338,6 +339,7 @@ mod test { let _ = context.insert(CLIENT_NAME, "client-1".to_string()); let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + let error_id = Uuid::new_v4(); let new_response = count_supergraph_errors( SupergraphResponse::fake_builder() .header("Accept", "application/json") @@ -347,6 +349,7 @@ mod test { graphql::Error::builder() .message("You did a bad request.") .extension_code("GRAPHQL_VALIDATION_FAILED") + .apollo_id(error_id) .build(), ]) .build() @@ -376,8 +379,8 @@ mod test { ); assert_eq!( - new_response.context.get_json_value(COUNTED_ERRORS), - Some(json!({"GRAPHQL_VALIDATION_FAILED": 1})) + unwrap_from_context::>(&new_response.context, COUNTED_ERRORS), + HashSet::from([error_id]) ) } .with_metrics() @@ -403,7 +406,10 @@ mod test { }) }); - let _ = context.insert(COUNTED_ERRORS, json!({"GRAPHQL_VALIDATION_FAILED": 1})); + let validation_error_id = Uuid::new_v4(); + let custom_error_id = Uuid::new_v4(); + + let _ = context.insert(COUNTED_ERRORS, HashSet::from([validation_error_id])); let _ = context.insert(APOLLO_OPERATION_ID, "some-id".to_string()); let _ = context.insert(OPERATION_NAME, "SomeOperation".to_string()); @@ -411,6 +417,8 @@ mod test { let _ = context.insert(CLIENT_NAME, "client-1".to_string()); let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); + + let new_response = count_supergraph_errors( SupergraphResponse::fake_builder() .header("Accept", "application/json") @@ -420,12 +428,14 @@ mod test { graphql::Error::builder() .message("You did a bad request.") .extension_code("GRAPHQL_VALIDATION_FAILED") + .apollo_id(validation_error_id) .build(), ) .error( graphql::Error::builder() .message("Custom error text") .extension_code("CUSTOM_ERROR") + .apollo_id(custom_error_id) .build(), ) .build() @@ -471,8 +481,8 @@ mod test { ); assert_eq!( - new_response.context.get_json_value(COUNTED_ERRORS), - Some(json!({"GRAPHQL_VALIDATION_FAILED": 1, "CUSTOM_ERROR": 1})) + unwrap_from_context::>(&new_response.context, COUNTED_ERRORS), + HashSet::from([validation_error_id, custom_error_id]) ) } .with_metrics() @@ -706,4 +716,4 @@ mod test { .with_metrics() .await; } -} +} \ No newline at end of file From 03674a0eb05fb87cb3317a9f225613ccc4fcbd0c Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 5 Jun 2025 14:41:46 -0400 Subject: [PATCH 31/46] serde default random uuid. Skip serialize. Remove debug --- apollo-router/src/graphql/mod.rs | 7 +++++++ apollo-router/src/plugins/telemetry/error_counter.rs | 5 ----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 9f72093fe3..a9af85aba0 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -74,6 +74,7 @@ pub struct Error { pub extensions: Object, /// A unique identifier for this error + #[serde(default = "generate_uuid", skip_serializing)] apollo_id: Uuid, } // Implement getter and getter_mut to not use pub field directly @@ -254,6 +255,12 @@ impl Error { } } +/// Generate a random Uuid. For use in generating a default [`Error:apollo_id`] when not supplied +/// during deserialization. +fn generate_uuid() -> Uuid { + Uuid::new_v4() +} + /// GraphQL spec require that both "line" and "column" are positive numbers. /// However GraphQL Java and GraphQL Kotlin return `{ "line": -1, "column": -1 }` /// if they can't determine error location inside query. diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index e81ea8b56b..a9ffa32a6b 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -167,8 +167,6 @@ pub(crate) async fn count_router_errors( let _ = context.insert(COUNTED_ERRORS, counted_errors); } - // TODO confirm the count_operation_error_codes() INVALID_ACCEPT_HEADER case is handled here - RouterResponse { context: response.context, response: response.response, @@ -184,9 +182,6 @@ fn count_operation_errors( context: &Context, errors_config: &ErrorsConfiguration, ) { - let _id_str = errors[0].apollo_id().to_string(); // TODO DEBUG REMOVE - let _msg_str = errors[0].message.clone(); // TODO DEBUG REMOVE - let previously_counted_errors_map: HashSet = unwrap_from_context(context, COUNTED_ERRORS); let mut operation_id: String = unwrap_from_context(context, APOLLO_OPERATION_ID); From aba73243e39aa1b606e1b812a2eb97f14936f42a Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 5 Jun 2025 15:22:40 -0400 Subject: [PATCH 32/46] fix auth tests --- .../src/plugins/authentication/tests.rs | 37 ++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/apollo-router/src/plugins/authentication/tests.rs b/apollo-router/src/plugins/authentication/tests.rs index 3e865a7563..3de1915a68 100644 --- a/apollo-router/src/plugins/authentication/tests.rs +++ b/apollo-router/src/plugins/authentication/tests.rs @@ -233,6 +233,8 @@ async fn it_rejects_when_there_is_no_auth_header() { let expected_error = graphql::Error::builder() .message("The request is not authenticated") .extension_code("AUTH_ERROR") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) .build(); assert_eq!(response.errors, vec![expected_error]); @@ -272,6 +274,8 @@ async fn it_rejects_when_auth_prefix_is_missing() { http::header::AUTHORIZATION, )) .extension_code("AUTH_ERROR") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) .build(); assert_eq!(response.errors, vec![expected_error]); @@ -311,6 +315,8 @@ async fn it_rejects_when_auth_prefix_has_no_jwt_token() { http::header::AUTHORIZATION, )) .extension_code("AUTH_ERROR") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) .build(); assert_eq!(response.errors, vec![expected_error]); @@ -349,6 +355,8 @@ async fn it_rejects_when_auth_prefix_has_invalid_format_jwt() { "'{HEADER_TOKEN_TRUNCATED}' is not a valid JWT header: InvalidToken" )) .extension_code("AUTH_ERROR") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) .build(); assert_eq!(response.errors, vec![expected_error]); @@ -386,9 +394,11 @@ async fn it_rejects_when_auth_prefix_has_correct_format_but_invalid_jwt() { .unwrap(); let expected_error = graphql::Error::builder() - .message(format!("'{HEADER_TOKEN_TRUNCATED}' is not a valid JWT header: Base64 error: Invalid last symbol 114, offset 5.")) - .extension_code("AUTH_ERROR") - .build(); + .message(format!("'{HEADER_TOKEN_TRUNCATED}' is not a valid JWT header: Base64 error: Invalid last symbol 114, offset 5.")) + .extension_code("AUTH_ERROR") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) + .build(); assert_eq!(response.errors, vec![expected_error]); @@ -427,6 +437,8 @@ async fn it_rejects_when_auth_prefix_has_correct_format_and_invalid_jwt() { let expected_error = graphql::Error::builder() .message("Cannot decode JWT: InvalidSignature") .extension_code("AUTH_ERROR") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) .build(); assert_eq!(response.errors, vec![expected_error]); @@ -775,6 +787,8 @@ async fn it_inserts_failure_jwt_status_into_context() { let expected_error = graphql::Error::builder() .message("Cannot decode JWT: InvalidSignature") .extension_code("AUTH_ERROR") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) .build(); assert_eq!(response.errors, vec![expected_error]); @@ -1278,7 +1292,13 @@ async fn issuer_check() { ) .unwrap(); assert_eq!(response, graphql::Response::builder() - .errors(vec![graphql::Error::builder().extension_code("AUTH_ERROR").message("Invalid issuer: the token's `iss` was 'hallo', but signed with a key from JWKS configured to only accept from 'hello'").build()]).build()); + .errors(vec![graphql::Error::builder() + .extension_code("AUTH_ERROR") + .message("Invalid issuer: the token's `iss` was 'hallo', but signed with a key from JWKS configured to only accept from 'hello'") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) + .build() + ]).build()); } ControlFlow::Continue(req) => { println!("got req with issuer check"); @@ -1318,7 +1338,12 @@ async fn issuer_check() { ) .unwrap(); assert_eq!(response, graphql::Response::builder() - .errors(vec![graphql::Error::builder().extension_code("AUTH_ERROR").message("Invalid issuer: the token's `iss` was 'AAAA', but signed with a key from JWKS configured to only accept from 'goodbye, hello'").build()]).build()); + .errors(vec![graphql::Error::builder() + .extension_code("AUTH_ERROR") + .message("Invalid issuer: the token's `iss` was 'AAAA', but signed with a key from JWKS configured to only accept from 'goodbye, hello'") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) + .build()]).build()); } ControlFlow::Continue(_) => { panic!("issuer check should have failed") @@ -1506,6 +1531,8 @@ async fn audience_check() { graphql::Error::builder() .extension_code("AUTH_ERROR") .message("Invalid audience: the token's `aud` was 'AAAA', but 'goodbye, hello' was expected") + // Overwrite expected id with actual id + .apollo_id(response.errors.first().unwrap().apollo_id()) .build() ]).build()); } From 85e76de9e209aaa302f4ffbb72cda15a85f2dccd Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 6 Jun 2025 14:16:17 -0400 Subject: [PATCH 33/46] fix some tests --- apollo-router/src/error.rs | 2 +- apollo-router/src/graphql/mod.rs | 8 ++++++++ apollo-router/src/graphql/response.rs | 16 +++++++++++++--- .../src/plugins/connectors/handle_responses.rs | 15 ++++++++++++++- apollo-router/src/plugins/coprocessor/test.rs | 4 +++- apollo-router/src/plugins/fleet_detector.rs | 2 +- apollo-router/src/plugins/forbid_mutations.rs | 16 ++++++++-------- apollo-router/src/query_planner/fetch.rs | 6 ++---- 8 files changed, 50 insertions(+), 19 deletions(-) diff --git a/apollo-router/src/error.rs b/apollo-router/src/error.rs index 0143eb9714..bd7bdb40a0 100644 --- a/apollo-router/src/error.rs +++ b/apollo-router/src/error.rs @@ -660,6 +660,6 @@ mod tests { ) .build(); - assert_eq!(expected_gql_error, error.to_graphql_error(None)); + assert_eq!(expected_gql_error.with_null_id(), error.to_graphql_error(None).with_null_id()); } } diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index a9af85aba0..6c15ada018 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -253,6 +253,14 @@ impl Error { pub fn apollo_id(&self) -> Uuid { self.apollo_id } + + #[cfg(test)] + /// Null out the ID for comparing errors in tests where you cannot extract the randomly + /// generated Uuid + pub fn with_null_id(mut self) -> Self { + self.apollo_id = Uuid::nil(); + self + } } /// Generate a random Uuid. For use in generating a default [`Error:apollo_id`] when not supplied diff --git a/apollo-router/src/graphql/response.rs b/apollo-router/src/graphql/response.rs index 89600efaf6..4e52c8ec42 100644 --- a/apollo-router/src/graphql/response.rs +++ b/apollo-router/src/graphql/response.rs @@ -258,26 +258,36 @@ impl From for Response { mod tests { use serde_json::json; use serde_json_bytes::json as bjson; - + use uuid::Uuid; use super::*; use crate::graphql::Location; #[test] fn test_append_errors_path_fallback_and_override() { + let uuid1 = Uuid::new_v4(); + let uuid2 = Uuid::new_v4(); let expected_errors = vec![ Error::builder() .message("Something terrible happened!") .path(Path::from("here")) + .apollo_id(uuid1) + .build(), + Error::builder() + .message("I mean for real") + .apollo_id(uuid2) .build(), - Error::builder().message("I mean for real").build(), ]; let mut errors_to_append = vec![ Error::builder() .message("Something terrible happened!") .path(Path::from("here")) + .apollo_id(uuid1) + .build(), + Error::builder() + .message("I mean for real") + .apollo_id(uuid2) .build(), - Error::builder().message("I mean for real").build(), ]; let mut response = Response::builder().build(); diff --git a/apollo-router/src/plugins/connectors/handle_responses.rs b/apollo-router/src/plugins/connectors/handle_responses.rs index f9a61e7ecf..53ef9ab14a 100644 --- a/apollo-router/src/plugins/connectors/handle_responses.rs +++ b/apollo-router/src/plugins/connectors/handle_responses.rs @@ -1325,7 +1325,7 @@ mod tests { .unwrap(), ); - let res = super::aggregate_responses(vec![ + let mut res = super::aggregate_responses(vec![ process_response( Ok(response_plaintext), response_key_plaintext, @@ -1373,6 +1373,16 @@ mod tests { ]) .unwrap(); + // Take ownership of the original errors then null out error IDs so we can compare without + // randomness of Uuid::new_v4() + let body = res.response.body_mut(); + let old_errors = std::mem::take(&mut body.errors); + let new_errors = old_errors + .into_iter() + .map(|e| e.with_null_id()) + .collect(); + body.errors = new_errors; + assert_debug_snapshot!(res, @r#" Response { response: Response { @@ -1432,6 +1442,7 @@ mod tests { "subgraph_name", ), }, + apollo_id: 00000000-0000-0000-0000-000000000000, }, Error { message: "Request failed", @@ -1465,6 +1476,7 @@ mod tests { "subgraph_name", ), }, + apollo_id: 00000000-0000-0000-0000-000000000000, }, Error { message: "Request failed", @@ -1498,6 +1510,7 @@ mod tests { "subgraph_name", ), }, + apollo_id: 00000000-0000-0000-0000-000000000000, }, ], extensions: {}, diff --git a/apollo-router/src/plugins/coprocessor/test.rs b/apollo-router/src/plugins/coprocessor/test.rs index 86c8332261..e80d8d877d 100644 --- a/apollo-router/src/plugins/coprocessor/test.rs +++ b/apollo-router/src/plugins/coprocessor/test.rs @@ -976,7 +976,9 @@ mod tests { "message": "my error message", "extensions": { "code": "ERROR" - } + }, + // Override id to avoid comparing random value + "apolloId": actual_response.errors.first().unwrap().apollo_id() }] })) .unwrap(), diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index 8b82fac8ff..6d27a2a91b 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -535,7 +535,6 @@ mod tests { use http::StatusCode; use tower::Service as _; - use super::*; use crate::graphql; use crate::metrics::FutureMetricsExt as _; @@ -651,6 +650,7 @@ mod tests { .unwrap(); // THEN operation size metrics should exist + // TODO check with fleet people to see if the value here actually matters assert_counter!("apollo.router.operations.request_size", 7, &[]); assert_counter!("apollo.router.operations.response_size", 7, &[]); } diff --git a/apollo-router/src/plugins/forbid_mutations.rs b/apollo-router/src/plugins/forbid_mutations.rs index aad5750b9d..1830341bc3 100644 --- a/apollo-router/src/plugins/forbid_mutations.rs +++ b/apollo-router/src/plugins/forbid_mutations.rs @@ -117,7 +117,8 @@ mod forbid_http_get_mutations_tests { let expected_error = Error::builder() .message("Mutations are forbidden".to_string()) .extension_code("MUTATION_FORBIDDEN") - .build(); + .build() + .with_null_id(); let expected_status = StatusCode::BAD_REQUEST; let service_stack = ForbidMutations::new(PluginInit::fake_new( @@ -129,10 +130,13 @@ mod forbid_http_get_mutations_tests { .execution_service(MockExecutionService::new().boxed()); let request = create_request(Method::GET, OperationKind::Mutation); - let mut actual_error = service_stack.oneshot(request).await.unwrap(); + let mut response = service_stack.oneshot(request).await.unwrap(); + + assert_eq!(expected_status, response.response.status()); + + let actual_error = &response.next_response().await.unwrap().errors[0]; - assert_eq!(expected_status, actual_error.response.status()); - assert_error_matches(&expected_error, actual_error.next_response().await.unwrap()); + assert_eq!(actual_error.clone().with_null_id(), expected_error); } #[tokio::test] @@ -163,10 +167,6 @@ mod forbid_http_get_mutations_tests { .unwrap(); } - fn assert_error_matches(expected_error: &Error, response: Response) { - assert_eq!(&response.errors[0], expected_error); - } - fn create_request(method: Method, operation_kind: OperationKind) -> ExecutionRequest { let root: PlanNode = if operation_kind == OperationKind::Mutation { serde_json::from_value(json!({ diff --git a/apollo-router/src/query_planner/fetch.rs b/apollo-router/src/query_planner/fetch.rs index 70e7ecebd1..9468a62a04 100644 --- a/apollo-router/src/query_planner/fetch.rs +++ b/apollo-router/src/query_planner/fetch.rs @@ -371,9 +371,7 @@ impl FetchNode { values_path.0.iter().chain(&path.0[2..]).cloned(), )) .message(error.message.clone()) - .extension_code( - error.extension_code().unwrap_or_default(), - ) + .and_extension_code(error.extension_code()) .extensions(error.extensions.clone()) .build(), ) @@ -459,7 +457,7 @@ impl FetchNode { .locations(error.locations.clone()) .path(path) .message(error.message.clone()) - .extension_code(error.extension_code().unwrap_or_default()) + .and_extension_code(error.extension_code()) .extensions(error.extensions.clone()) .apollo_id(error.apollo_id()) .build() From 956ddca8c6ac00892fb0a87c86000e0a193cafdf Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 9 Jun 2025 12:30:57 -0400 Subject: [PATCH 34/46] parts builder --> http::response builder to avoid changing response body --- apollo-router/src/axum_factory/listeners.rs | 19 +++++++++++++++---- apollo-router/src/plugins/coprocessor/mod.rs | 5 ++--- apollo-router/src/plugins/fleet_detector.rs | 18 ++++++++---------- apollo-router/src/plugins/healthcheck/mod.rs | 11 +++++++---- .../src/plugins/record_replay/record.rs | 8 +++++--- .../plugins/telemetry/metrics/prometheus.rs | 11 +++++++---- .../src/services/layers/static_page.rs | 17 +++++++++-------- apollo-router/src/services/router.rs | 6 ++---- apollo-router/src/services/router/service.rs | 13 +++++++------ 9 files changed, 62 insertions(+), 46 deletions(-) diff --git a/apollo-router/src/axum_factory/listeners.rs b/apollo-router/src/axum_factory/listeners.rs index e49a0b0def..89f2bd97a2 100644 --- a/apollo-router/src/axum_factory/listeners.rs +++ b/apollo-router/src/axum_factory/listeners.rs @@ -522,6 +522,7 @@ mod tests { use crate::configuration::Sandbox; use crate::configuration::Supergraph; use crate::services::router; + use crate::services::router::body; #[tokio::test] async fn it_makes_sure_same_listenaddrs_are_accepted() { @@ -550,8 +551,13 @@ mod tests { let endpoint = service_fn(|req: router::Request| async move { Ok::<_, BoxError>( - router::Response::builder() - .data("this is a test") + router::Response::http_response_builder() + .response( + http::Response::builder() + .body::(body::from_bytes( + "this is a test".to_string(), + ))? + ) .context(req.context) .build() .unwrap(), @@ -589,8 +595,13 @@ mod tests { .build() .unwrap(); let endpoint = service_fn(|req: router::Request| async move { - router::Response::builder() - .data("this is a test") + router::Response::http_response_builder() + .response( + http::Response::builder() + .body::(body::from_bytes( + "this is a test".to_string(), + ))? + ) .context(req.context) .build() }) diff --git a/apollo-router/src/plugins/coprocessor/mod.rs b/apollo-router/src/plugins/coprocessor/mod.rs index c1d3d70e8c..5439249b71 100644 --- a/apollo-router/src/plugins/coprocessor/mod.rs +++ b/apollo-router/src/plugins/coprocessor/mod.rs @@ -1105,10 +1105,9 @@ where )); // Finally, return a response which has a Body that wraps our stream of response chunks. - router::Response::parts_builder() + router::Response::http_response_builder() .context(context) - .parts(parts) - .body(final_stream) + .response(http::Response::from_parts(parts, final_stream)) .build() } // ----------------------------------------------------------------------------------------------------- diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index 6d27a2a91b..bd3ad172b0 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -271,20 +271,18 @@ impl PluginPrivate for FleetDetector { }) // Count the number of response bytes from the router to clients .map_response(move |res: router::Response| { - let (parts, body) = res.response.into_parts(); - let body = - router::body::from_result_stream(body.into_data_stream().inspect(|res| { - if let Ok(bytes) = res { - u64_counter!( + router::Response::http_response_builder() + .response(res.response.map(move |body| { + router::body::from_result_stream(body.into_data_stream().inspect(|res| { + if let Ok(bytes) = res { + u64_counter!( "apollo.router.operations.response_size", "Total number of response bytes to clients", bytes.len() as u64 ); - } - })); - router::Response::parts_builder() - .parts(parts) - .body(body) + } + })) + })) .context(res.context) .build() .expect("cannot fail") // TODO better error handling diff --git a/apollo-router/src/plugins/healthcheck/mod.rs b/apollo-router/src/plugins/healthcheck/mod.rs index 22ed30e847..07f6fd4d7f 100644 --- a/apollo-router/src/plugins/healthcheck/mod.rs +++ b/apollo-router/src/plugins/healthcheck/mod.rs @@ -13,7 +13,7 @@ use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; use std::time::Duration; - +use http::response::Parts; use http::StatusCode; use multimap::MultiMap; use schemars::JsonSchema; @@ -290,9 +290,12 @@ impl PluginPrivate for HealthCheck { }; tracing::trace!(?health, request = ?req.router_request, "health check"); async move { - router::Response::builder() - .status_code(status_code) - .data(serde_json_bytes::to_value(&health).map_err(BoxError::from)?) + router::Response::http_response_builder() + .response(http::Response::builder().status(status_code).body( + router::body::from_bytes( + serde_json::to_vec(&health).map_err(BoxError::from)?, + ) + )?) .context(req.context) .build() } diff --git a/apollo-router/src/plugins/record_replay/record.rs b/apollo-router/src/plugins/record_replay/record.rs index 99966d3da5..dde27a641d 100644 --- a/apollo-router/src/plugins/record_replay/record.rs +++ b/apollo-router/src/plugins/record_replay/record.rs @@ -127,10 +127,12 @@ impl Plugin for Record { let stream = stream.into_data_stream().chain(after_complete); - router::Response::parts_builder() + router::Response::http_response_builder() .context(res.context) - .parts(parts) - .body(router::body::from_result_stream(stream)) + .response(http::Response::from_parts( + parts, + router::body::from_result_stream(stream), + )) .build() } }) diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs index 2cec78a385..77457c9e36 100644 --- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs +++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs @@ -187,10 +187,13 @@ impl Service for PrometheusService { let stats = String::from_utf8_lossy(&result); let modified_stats = stats.replace("_total_total", "_total"); - router::Response::builder() - .status_code(StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/plain; version=0.0.4") - .data(modified_stats) + router::Response::http_response_builder() + .response(http::Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/plain; version=0.0.4") + .body(router::body::from_bytes(modified_stats)) + .map_err(BoxError::from)? + ) .context(req.context) .build() }) diff --git a/apollo-router/src/services/layers/static_page.rs b/apollo-router/src/services/layers/static_page.rs index 97fdab7521..d9d943a836 100644 --- a/apollo-router/src/services/layers/static_page.rs +++ b/apollo-router/src/services/layers/static_page.rs @@ -60,14 +60,15 @@ where && accepts_html(req.router_request.headers()) { ControlFlow::Break( - router::Response::builder() - .header( - CONTENT_TYPE, - HeaderValue::from_static(mime::TEXT_HTML_UTF_8.as_ref()), - ) - .data( - serde_json_bytes::Value::from_bytes(page.clone()) - .map_err(BoxError::from)?, + router::Response::http_response_builder() + .response( + http::Response::builder() + .header( + CONTENT_TYPE, + HeaderValue::from_static(mime::TEXT_HTML_UTF_8.as_ref()), + ) + .body(router::body::from_bytes(page.clone())) + .unwrap() ) .context(req.context) .build() diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index a0809dc2c6..256bede07b 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -282,13 +282,11 @@ impl Response { } #[builder(visibility = "pub")] - fn parts_new( - parts: Parts, - body: Body, + fn http_response_new( + response: http::Response, context: Context, body_to_stash: Option, ) -> Result { - let response = http::Response::from_parts(parts, body); let mut res = Self { response, context }; if let Some(body_to_stash) = body_to_stash { res.stash_the_body_in_extensions(body_to_stash) diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs index 903c5d33a0..b8b86e0678 100644 --- a/apollo-router/src/services/router/service.rs +++ b/apollo-router/src/services/router/service.rs @@ -320,9 +320,8 @@ impl RouterService { .extensions() .with_lock(|ext| ext.get::().is_some()); - router::Response::parts_builder() - .parts(parts) - .body(router::body::from_bytes(body.clone())) + router::Response::http_response_builder() + .response(Response::from_parts(parts, router::body::from_bytes(body.clone()))) .and_body_to_stash(if display_router_response { Some(body) } else { @@ -356,9 +355,11 @@ impl RouterService { } }; - RouterResponse::parts_builder() - .parts(parts) - .body(router::body::from_result_stream(response_multipart)) + RouterResponse::http_response_builder() + .response(http::Response::from_parts( + parts, + router::body::from_result_stream(response_multipart), + )) .context(context) .build() } else { From c2ad7d09a8194eba10786379d0456730df7ccdcd Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 9 Jun 2025 14:42:58 -0400 Subject: [PATCH 35/46] Fix most remaining tests --- apollo-router/src/axum_factory/tests.rs | 18 ++++--- .../src/plugins/license_enforcement/mod.rs | 1 + apollo-router/src/plugins/rhai/tests.rs | 3 ++ apollo-router/src/plugins/subscription.rs | 8 +++- .../src/plugins/telemetry/error_counter.rs | 3 ++ apollo-router/src/protocols/websocket.rs | 6 +++ apollo-router/src/services/layers/apq.rs | 4 +- .../services/layers/persisted_queries/mod.rs | 47 +++++++++++++++---- .../src/services/subgraph_service.rs | 29 +++++++++--- 9 files changed, 93 insertions(+), 26 deletions(-) diff --git a/apollo-router/src/axum_factory/tests.rs b/apollo-router/src/axum_factory/tests.rs index 0e53641a66..c7494d1f4a 100644 --- a/apollo-router/src/axum_factory/tests.rs +++ b/apollo-router/src/axum_factory/tests.rs @@ -1047,7 +1047,7 @@ async fn response_failure() -> Result<(), ApolloRouterError> { .await; let (server, client) = init(router_service).await; - let response = client + let mut response = client .post(format!( "{}/", server.graphql_listen_address().as_ref().unwrap() @@ -1066,14 +1066,18 @@ async fn response_failure() -> Result<(), ApolloRouterError> { .await .unwrap(); + let mut expected_response = crate::error::FetchError::SubrequestHttpError { + status_code: Some(200), + service: "Mock service".to_string(), + reason: "Mock error".to_string(), + }.to_response(); + // Overwrite error IDs to avoid random Uuid mismatch + response.errors[0] = response.errors[0].clone().with_null_id(); + expected_response.errors[0] = expected_response.errors[0].clone().with_null_id(); + assert_eq!( response, - crate::error::FetchError::SubrequestHttpError { - status_code: Some(200), - service: "Mock service".to_string(), - reason: "Mock error".to_string(), - } - .to_response() + expected_response ); server.shutdown().await } diff --git a/apollo-router/src/plugins/license_enforcement/mod.rs b/apollo-router/src/plugins/license_enforcement/mod.rs index e884277f05..5266730f96 100644 --- a/apollo-router/src/plugins/license_enforcement/mod.rs +++ b/apollo-router/src/plugins/license_enforcement/mod.rs @@ -165,6 +165,7 @@ mod test { } #[tokio::test] + // TODO CONVERT THIS INTO INTEGRATION TEST WITH free license plugin + telemetry async fn it_emits_metrics_when_tps_enforced() { async { // GIVEN diff --git a/apollo-router/src/plugins/rhai/tests.rs b/apollo-router/src/plugins/rhai/tests.rs index 882fff38f6..c274b2a709 100644 --- a/apollo-router/src/plugins/rhai/tests.rs +++ b/apollo-router/src/plugins/rhai/tests.rs @@ -634,6 +634,8 @@ async fn it_can_process_om_subgraph_forbidden_with_graphql_payload() { .unwrap_err(); let processed_error = process_error(error); + // Overwrite error ID to avoid random Uuid mismatch + let error_id = processed_error.body.clone().unwrap().errors[0].apollo_id(); assert_eq!(processed_error.status, StatusCode::FORBIDDEN); assert_eq!( processed_error.body, @@ -643,6 +645,7 @@ async fn it_can_process_om_subgraph_forbidden_with_graphql_payload() { Error::builder() .message("I have raised a 403") .extension_code("ACCESS_DENIED") + .apollo_id(error_id) .build() }]) .build() diff --git a/apollo-router/src/plugins/subscription.rs b/apollo-router/src/plugins/subscription.rs index 1f79ff257d..d3a9681b77 100644 --- a/apollo-router/src/plugins/subscription.rs +++ b/apollo-router/src/plugins/subscription.rs @@ -1139,7 +1139,8 @@ mod tests { let resp = web_endpoint.clone().oneshot(http_req).await.unwrap(); assert_eq!(resp.status(), http::StatusCode::ACCEPTED); let msg = handler.next().await.unwrap(); - + // Overwrite error ID to avoid random Uuid mismatch + let error_id = msg.errors[0].apollo_id(); assert_eq!( msg, graphql::Response::builder() @@ -1147,6 +1148,7 @@ mod tests { graphql::Error::builder() .message("cannot complete the subscription") .extension_code("SUBSCRIPTION_ERROR") + .apollo_id(error_id) .build() ]) .build() @@ -1222,6 +1224,9 @@ mod tests { .await .unwrap(); + // Overwrite error ID to avoid random Uuid mismatch + let error_id = subgraph_response.response.body().errors[0].apollo_id(); + assert_eq!( subgraph_response.response.body(), &graphql::Response::builder() @@ -1232,6 +1237,7 @@ mod tests { "cannot execute a subscription if it's not enabled in the configuration" ) .extension_code("SUBSCRIPTION_DISABLED") + .apollo_id(error_id) .build() ) .extensions(Object::default()) diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index a9ffa32a6b..8935f11ed8 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -182,6 +182,9 @@ fn count_operation_errors( context: &Context, errors_config: &ErrorsConfiguration, ) { + let _id_str = errors[0].apollo_id().to_string(); // TODO DEBUG REMOVE + let _msg_str = errors[0].message.clone(); // TODO DEBUG REMOVE + let previously_counted_errors_map: HashSet = unwrap_from_context(context, COUNTED_ERRORS); let mut operation_id: String = unwrap_from_context(context, APOLLO_OPERATION_ID); diff --git a/apollo-router/src/protocols/websocket.rs b/apollo-router/src/protocols/websocket.rs index 199b57cdb0..57cae3322f 100644 --- a/apollo-router/src/protocols/websocket.rs +++ b/apollo-router/src/protocols/websocket.rs @@ -982,12 +982,15 @@ mod tests { .unwrap(); let next_payload = gql_read_stream.next().await.unwrap(); + // Overwrite error ID to avoid random Uuid mismatch + let error_id = next_payload.errors[0].apollo_id(); assert_eq!(next_payload, graphql::Response::builder() .error( graphql::Error::builder() .message( "cannot deserialize websocket server message: Error(\"expected value\", line: 1, column: 1)".to_string()) .extension_code("INVALID_WEBSOCKET_SERVER_MESSAGE_FORMAT") + .apollo_id(error_id) .build(), ) .build() @@ -1044,12 +1047,15 @@ mod tests { .unwrap(); let next_payload = gql_read_stream.next().await.unwrap(); + // Overwrite error ID to avoid random Uuid mismatch + let error_id = next_payload.errors[0].apollo_id(); assert_eq!(next_payload, graphql::Response::builder() .error( graphql::Error::builder() .message( "cannot deserialize websocket server message: Error(\"expected value\", line: 1, column: 1)".to_string()) .extension_code("INVALID_WEBSOCKET_SERVER_MESSAGE_FORMAT") + .apollo_id(error_id) .build(), ) .build() diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs index 950846b1f9..b778230a31 100644 --- a/apollo-router/src/services/layers/apq.rs +++ b/apollo-router/src/services/layers/apq.rs @@ -488,7 +488,7 @@ mod apq_tests { .path(Path::default()) .extension_code("PERSISTED_QUERY_HASH_MISMATCH") .build(); - assert_eq!(graphql_response.errors[0], expected_apq_insert_failed_error); + assert_eq!(graphql_response.errors[0].clone().with_null_id(), expected_apq_insert_failed_error.with_null_id()); // apq insert failed, this call will miss let second_apq_error = router_service @@ -616,7 +616,7 @@ mod apq_tests { } fn assert_error_matches(expected_error: &Error, res: Response) { - assert_eq!(&res.errors[0], expected_error); + assert_eq!(res.errors[0].clone().with_null_id(), expected_error.clone().with_null_id()); } fn new_context() -> Context { diff --git a/apollo-router/src/services/layers/persisted_queries/mod.rs b/apollo-router/src/services/layers/persisted_queries/mod.rs index 3e4defe184..5b38e8356e 100644 --- a/apollo-router/src/services/layers/persisted_queries/mod.rs +++ b/apollo-router/src/services/layers/persisted_queries/mod.rs @@ -459,7 +459,7 @@ fn supergraph_err( #[cfg(test)] mod tests { use std::time::Duration; - + use http_body_util::BodyExt; use serde_json::json; use tracing::instrument::WithSubscriber; @@ -471,6 +471,7 @@ mod tests { use crate::configuration::PersistedQueries; use crate::configuration::PersistedQueriesSafelist; use crate::configuration::Supergraph; + use crate::graphql::Error; use crate::metrics::FutureMetricsExt; use crate::services::layers::persisted_queries::freeform_graphql_behavior::FreeformGraphQLBehavior; use crate::services::layers::query_analysis::QueryAnalysisLayer; @@ -704,8 +705,11 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors, - vec![graphql_err_operation_not_found(invalid_id)] + response.errors + .iter() + .map(|e| e.clone().with_null_id()) + .collect::>(), + vec![graphql_err_operation_not_found(invalid_id).with_null_id()] ); } @@ -838,8 +842,11 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors, - vec![graphql_err_operation_not_in_safelist()] + response.errors + .iter() + .map(|e| e.clone().with_null_id()) + .collect::>(), + vec![graphql_err_operation_not_in_safelist().with_null_id()] ); let mut metric_attributes = vec![opentelemetry::KeyValue::new( "persisted_queries.safelist.rejected.unknown".to_string(), @@ -1079,8 +1086,11 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors, - vec![graphql_err_operation_not_found(invalid_id)] + response.errors + .iter() + .map(|e| e.clone().with_null_id()) + .collect::>(), + vec![graphql_err_operation_not_found(invalid_id).with_null_id()] ); } @@ -1202,7 +1212,12 @@ mod tests { .next_response() .await .expect("could not get response from pq layer"); - assert_eq!(response.errors, vec![graphql_err_pq_id_required()]); + assert_eq!( + response.errors + .iter() + .map(|e| e.clone().with_null_id()) + .collect::>(), + vec![graphql_err_pq_id_required().with_null_id()]); // Try again skipping enforcement. let context = Context::new(); @@ -1330,7 +1345,13 @@ mod tests { .next_response() .await .expect("could not get response from pq layer"); - assert_eq!(response.errors, vec![graphql_err_cannot_send_id_and_body()]); + assert_eq!( + response.errors + .iter() + .map(|e| e.clone().with_null_id()) + .collect::>(), + vec![graphql_err_cannot_send_id_and_body().with_null_id()] + ); } #[tokio::test(flavor = "multi_thread")] @@ -1363,6 +1384,12 @@ mod tests { .next_response() .await .expect("could not get response from pq layer"); - assert_eq!(response.errors, vec![graphql_err_cannot_send_id_and_body()]); + assert_eq!( + response.errors + .iter() + .map(|e| e.clone().with_null_id()) + .collect::>(), + vec![graphql_err_cannot_send_id_and_body().with_null_id()] + ); } } diff --git a/apollo-router/src/services/subgraph_service.rs b/apollo-router/src/services/subgraph_service.rs index ebffe2cc2e..06f31f93a5 100644 --- a/apollo-router/src/services/subgraph_service.rs +++ b/apollo-router/src/services/subgraph_service.rs @@ -3226,12 +3226,17 @@ mod tests { .body(None) .unwrap() .into_parts(); - let actual = super::http_response_to_graphql_response( + let mut actual = super::http_response_to_graphql_response( "test_service", Ok(ContentType::ApplicationGraphqlResponseJson), body, &parts, ); + // Null out error IDs so we can avoid random Uuid mismatch + actual.errors = actual.errors + .into_iter() + .map(|e| e.clone().with_null_id()) + .collect(); let expected = graphql::Response::builder() .error( @@ -3240,7 +3245,8 @@ mod tests { service: "test_service".into(), reason: "418: I'm a teapot".into(), } - .to_graphql_error(None), + .to_graphql_error(None) + .with_null_id(), ) .build(); assert_eq!(actual, expected); @@ -3293,16 +3299,21 @@ mod tests { .unwrap() .into_parts(); - let actual = super::http_response_to_graphql_response( + let mut actual = super::http_response_to_graphql_response( "test_service", Ok(ContentType::ApplicationGraphqlResponseJson), body, &parts, ); + // Null out error IDs so we can avoid random Uuid mismatch + actual.errors = actual.errors + .into_iter() + .map(|e| e.clone().with_null_id()) + .collect(); let expected = graphql::Response::builder() .data(json["data"].take()) - .error(error) + .error(error.with_null_id()) .build(); assert_eq!(actual, expected); } @@ -3327,12 +3338,17 @@ mod tests { .unwrap() .into_parts(); - let actual = super::http_response_to_graphql_response( + let mut actual = super::http_response_to_graphql_response( "test_service", Ok(ContentType::ApplicationGraphqlResponseJson), body, &parts, ); + // Null out error IDs so we can avoid random Uuid mismatch + actual.errors = actual.errors + .into_iter() + .map(|e| e.clone().with_null_id()) + .collect(); let expected = graphql::Response::builder() .data(json["data"].take()) @@ -3342,7 +3358,8 @@ mod tests { service: "test_service".into(), reason: "418: I'm a teapot".into(), } - .to_graphql_error(None), + .to_graphql_error(None) + .with_null_id(), ) .error(error) .build(); From fdc7bba2462285dea2d09dc6c4e8ca155fdcec9b Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 9 Jun 2025 15:18:05 -0400 Subject: [PATCH 36/46] lint fixes --- apollo-router/src/axum_factory/listeners.rs | 14 +++++------ apollo-router/src/axum_factory/tests.rs | 8 +++---- apollo-router/src/error.rs | 5 +++- apollo-router/src/graphql/response.rs | 1 + .../plugins/connectors/handle_responses.rs | 5 +--- apollo-router/src/plugins/coprocessor/test.rs | 2 +- apollo-router/src/plugins/fleet_detector.rs | 9 ++++---- apollo-router/src/plugins/forbid_mutations.rs | 1 - apollo-router/src/plugins/healthcheck/mod.rs | 4 ++-- .../src/plugins/telemetry/error_counter.rs | 9 ++++---- .../plugins/telemetry/metrics/prometheus.rs | 11 +++++---- apollo-router/src/services/layers/apq.rs | 10 ++++++-- .../services/layers/persisted_queries/mod.rs | 23 ++++++++++++------- .../src/services/layers/static_page.rs | 6 +++-- apollo-router/src/services/router.rs | 1 - apollo-router/src/services/router/service.rs | 5 +++- .../src/services/subgraph_service.rs | 9 +++++--- 17 files changed, 71 insertions(+), 52 deletions(-) diff --git a/apollo-router/src/axum_factory/listeners.rs b/apollo-router/src/axum_factory/listeners.rs index 89f2bd97a2..5cbe7891da 100644 --- a/apollo-router/src/axum_factory/listeners.rs +++ b/apollo-router/src/axum_factory/listeners.rs @@ -553,10 +553,9 @@ mod tests { Ok::<_, BoxError>( router::Response::http_response_builder() .response( - http::Response::builder() - .body::(body::from_bytes( - "this is a test".to_string(), - ))? + http::Response::builder().body::( + body::from_bytes("this is a test".to_string()), + )?, ) .context(req.context) .build() @@ -597,10 +596,9 @@ mod tests { let endpoint = service_fn(|req: router::Request| async move { router::Response::http_response_builder() .response( - http::Response::builder() - .body::(body::from_bytes( - "this is a test".to_string(), - ))? + http::Response::builder().body::( + body::from_bytes("this is a test".to_string()), + )?, ) .context(req.context) .build() diff --git a/apollo-router/src/axum_factory/tests.rs b/apollo-router/src/axum_factory/tests.rs index c7494d1f4a..851e2dcda9 100644 --- a/apollo-router/src/axum_factory/tests.rs +++ b/apollo-router/src/axum_factory/tests.rs @@ -1070,15 +1070,13 @@ async fn response_failure() -> Result<(), ApolloRouterError> { status_code: Some(200), service: "Mock service".to_string(), reason: "Mock error".to_string(), - }.to_response(); + } + .to_response(); // Overwrite error IDs to avoid random Uuid mismatch response.errors[0] = response.errors[0].clone().with_null_id(); expected_response.errors[0] = expected_response.errors[0].clone().with_null_id(); - assert_eq!( - response, - expected_response - ); + assert_eq!(response, expected_response); server.shutdown().await } diff --git a/apollo-router/src/error.rs b/apollo-router/src/error.rs index bd7bdb40a0..a49b612695 100644 --- a/apollo-router/src/error.rs +++ b/apollo-router/src/error.rs @@ -660,6 +660,9 @@ mod tests { ) .build(); - assert_eq!(expected_gql_error.with_null_id(), error.to_graphql_error(None).with_null_id()); + assert_eq!( + expected_gql_error.with_null_id(), + error.to_graphql_error(None).with_null_id() + ); } } diff --git a/apollo-router/src/graphql/response.rs b/apollo-router/src/graphql/response.rs index 4e52c8ec42..0e3d04220c 100644 --- a/apollo-router/src/graphql/response.rs +++ b/apollo-router/src/graphql/response.rs @@ -259,6 +259,7 @@ mod tests { use serde_json::json; use serde_json_bytes::json as bjson; use uuid::Uuid; + use super::*; use crate::graphql::Location; diff --git a/apollo-router/src/plugins/connectors/handle_responses.rs b/apollo-router/src/plugins/connectors/handle_responses.rs index 53ef9ab14a..14cedfe314 100644 --- a/apollo-router/src/plugins/connectors/handle_responses.rs +++ b/apollo-router/src/plugins/connectors/handle_responses.rs @@ -1377,10 +1377,7 @@ mod tests { // randomness of Uuid::new_v4() let body = res.response.body_mut(); let old_errors = std::mem::take(&mut body.errors); - let new_errors = old_errors - .into_iter() - .map(|e| e.with_null_id()) - .collect(); + let new_errors = old_errors.into_iter().map(|e| e.with_null_id()).collect(); body.errors = new_errors; assert_debug_snapshot!(res, @r#" diff --git a/apollo-router/src/plugins/coprocessor/test.rs b/apollo-router/src/plugins/coprocessor/test.rs index e80d8d877d..6a25e1a9d1 100644 --- a/apollo-router/src/plugins/coprocessor/test.rs +++ b/apollo-router/src/plugins/coprocessor/test.rs @@ -978,7 +978,7 @@ mod tests { "code": "ERROR" }, // Override id to avoid comparing random value - "apolloId": actual_response.errors.first().unwrap().apollo_id() + "apolloId": actual_response.errors.first().unwrap().apollo_id() }] })) .unwrap(), diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index bd3ad172b0..2867d30df3 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -276,10 +276,10 @@ impl PluginPrivate for FleetDetector { router::body::from_result_stream(body.into_data_stream().inspect(|res| { if let Ok(bytes) = res { u64_counter!( - "apollo.router.operations.response_size", - "Total number of response bytes to clients", - bytes.len() as u64 - ); + "apollo.router.operations.response_size", + "Total number of response bytes to clients", + bytes.len() as u64 + ); } })) })) @@ -533,6 +533,7 @@ mod tests { use http::StatusCode; use tower::Service as _; + use super::*; use crate::graphql; use crate::metrics::FutureMetricsExt as _; diff --git a/apollo-router/src/plugins/forbid_mutations.rs b/apollo-router/src/plugins/forbid_mutations.rs index 1830341bc3..e21a7c5811 100644 --- a/apollo-router/src/plugins/forbid_mutations.rs +++ b/apollo-router/src/plugins/forbid_mutations.rs @@ -76,7 +76,6 @@ mod forbid_http_get_mutations_tests { use super::*; use crate::graphql; - use crate::graphql::Response; use crate::http_ext::Request; use crate::plugin::PluginInit; use crate::plugin::test::MockExecutionService; diff --git a/apollo-router/src/plugins/healthcheck/mod.rs b/apollo-router/src/plugins/healthcheck/mod.rs index 07f6fd4d7f..6645e3f75f 100644 --- a/apollo-router/src/plugins/healthcheck/mod.rs +++ b/apollo-router/src/plugins/healthcheck/mod.rs @@ -13,7 +13,7 @@ use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; use std::time::Duration; -use http::response::Parts; + use http::StatusCode; use multimap::MultiMap; use schemars::JsonSchema; @@ -294,7 +294,7 @@ impl PluginPrivate for HealthCheck { .response(http::Response::builder().status(status_code).body( router::body::from_bytes( serde_json::to_vec(&health).map_err(BoxError::from)?, - ) + ), )?) .context(req.context) .build() diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index 8935f11ed8..ec3e424b3b 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -291,10 +291,12 @@ fn count_graphql_error(count: u64, code: String) { #[cfg(test)] mod test { use std::collections::HashSet; + use http::StatusCode; use serde_json_bytes::Value; use serde_json_bytes::json; use uuid::Uuid; + use crate::Context; use crate::context::COUNTED_ERRORS; use crate::context::OPERATION_KIND; @@ -307,8 +309,9 @@ mod test { use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; use crate::plugins::telemetry::apollo::ExtendedErrorMetricsMode; - use crate::plugins::telemetry::error_counter::{count_operation_errors, unwrap_from_context}; + use crate::plugins::telemetry::error_counter::count_operation_errors; use crate::plugins::telemetry::error_counter::count_supergraph_errors; + use crate::plugins::telemetry::error_counter::unwrap_from_context; use crate::query_planner::APOLLO_OPERATION_ID; use crate::services::SupergraphResponse; @@ -415,8 +418,6 @@ mod test { let _ = context.insert(CLIENT_NAME, "client-1".to_string()); let _ = context.insert(CLIENT_VERSION, "version-1".to_string()); - - let new_response = count_supergraph_errors( SupergraphResponse::fake_builder() .header("Accept", "application/json") @@ -714,4 +715,4 @@ mod test { .with_metrics() .await; } -} \ No newline at end of file +} diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs index 77457c9e36..9f357136f8 100644 --- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs +++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs @@ -188,11 +188,12 @@ impl Service for PrometheusService { let modified_stats = stats.replace("_total_total", "_total"); router::Response::http_response_builder() - .response(http::Response::builder() - .status(StatusCode::OK) - .header(http::header::CONTENT_TYPE, "text/plain; version=0.0.4") - .body(router::body::from_bytes(modified_stats)) - .map_err(BoxError::from)? + .response( + http::Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, "text/plain; version=0.0.4") + .body(router::body::from_bytes(modified_stats)) + .map_err(BoxError::from)?, ) .context(req.context) .build() diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs index b778230a31..14db3a1819 100644 --- a/apollo-router/src/services/layers/apq.rs +++ b/apollo-router/src/services/layers/apq.rs @@ -488,7 +488,10 @@ mod apq_tests { .path(Path::default()) .extension_code("PERSISTED_QUERY_HASH_MISMATCH") .build(); - assert_eq!(graphql_response.errors[0].clone().with_null_id(), expected_apq_insert_failed_error.with_null_id()); + assert_eq!( + graphql_response.errors[0].clone().with_null_id(), + expected_apq_insert_failed_error.with_null_id() + ); // apq insert failed, this call will miss let second_apq_error = router_service @@ -616,7 +619,10 @@ mod apq_tests { } fn assert_error_matches(expected_error: &Error, res: Response) { - assert_eq!(res.errors[0].clone().with_null_id(), expected_error.clone().with_null_id()); + assert_eq!( + res.errors[0].clone().with_null_id(), + expected_error.clone().with_null_id() + ); } fn new_context() -> Context { diff --git a/apollo-router/src/services/layers/persisted_queries/mod.rs b/apollo-router/src/services/layers/persisted_queries/mod.rs index 5b38e8356e..7ce5683499 100644 --- a/apollo-router/src/services/layers/persisted_queries/mod.rs +++ b/apollo-router/src/services/layers/persisted_queries/mod.rs @@ -459,7 +459,7 @@ fn supergraph_err( #[cfg(test)] mod tests { use std::time::Duration; - use http_body_util::BodyExt; + use serde_json::json; use tracing::instrument::WithSubscriber; @@ -705,7 +705,8 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors + response + .errors .iter() .map(|e| e.clone().with_null_id()) .collect::>(), @@ -842,7 +843,8 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors + response + .errors .iter() .map(|e| e.clone().with_null_id()) .collect::>(), @@ -1086,7 +1088,8 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors + response + .errors .iter() .map(|e| e.clone().with_null_id()) .collect::>(), @@ -1213,11 +1216,13 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors + response + .errors .iter() .map(|e| e.clone().with_null_id()) .collect::>(), - vec![graphql_err_pq_id_required().with_null_id()]); + vec![graphql_err_pq_id_required().with_null_id()] + ); // Try again skipping enforcement. let context = Context::new(); @@ -1346,7 +1351,8 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors + response + .errors .iter() .map(|e| e.clone().with_null_id()) .collect::>(), @@ -1385,7 +1391,8 @@ mod tests { .await .expect("could not get response from pq layer"); assert_eq!( - response.errors + response + .errors .iter() .map(|e| e.clone().with_null_id()) .collect::>(), diff --git a/apollo-router/src/services/layers/static_page.rs b/apollo-router/src/services/layers/static_page.rs index d9d943a836..e530554483 100644 --- a/apollo-router/src/services/layers/static_page.rs +++ b/apollo-router/src/services/layers/static_page.rs @@ -65,10 +65,12 @@ where http::Response::builder() .header( CONTENT_TYPE, - HeaderValue::from_static(mime::TEXT_HTML_UTF_8.as_ref()), + HeaderValue::from_static( + mime::TEXT_HTML_UTF_8.as_ref(), + ), ) .body(router::body::from_bytes(page.clone())) - .unwrap() + .unwrap(), ) .context(req.context) .build() diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs index 256bede07b..4540dfc978 100644 --- a/apollo-router/src/services/router.rs +++ b/apollo-router/src/services/router.rs @@ -13,7 +13,6 @@ use http::Method; use http::StatusCode; use http::header::CONTENT_TYPE; use http::header::HeaderName; -use http::response::Parts; use http_body_util::BodyExt; use multer::Multipart; use multimap::MultiMap; diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs index b8b86e0678..30894ba998 100644 --- a/apollo-router/src/services/router/service.rs +++ b/apollo-router/src/services/router/service.rs @@ -321,7 +321,10 @@ impl RouterService { .with_lock(|ext| ext.get::().is_some()); router::Response::http_response_builder() - .response(Response::from_parts(parts, router::body::from_bytes(body.clone()))) + .response(Response::from_parts( + parts, + router::body::from_bytes(body.clone()), + )) .and_body_to_stash(if display_router_response { Some(body) } else { diff --git a/apollo-router/src/services/subgraph_service.rs b/apollo-router/src/services/subgraph_service.rs index 06f31f93a5..aa7678a776 100644 --- a/apollo-router/src/services/subgraph_service.rs +++ b/apollo-router/src/services/subgraph_service.rs @@ -3233,7 +3233,8 @@ mod tests { &parts, ); // Null out error IDs so we can avoid random Uuid mismatch - actual.errors = actual.errors + actual.errors = actual + .errors .into_iter() .map(|e| e.clone().with_null_id()) .collect(); @@ -3306,7 +3307,8 @@ mod tests { &parts, ); // Null out error IDs so we can avoid random Uuid mismatch - actual.errors = actual.errors + actual.errors = actual + .errors .into_iter() .map(|e| e.clone().with_null_id()) .collect(); @@ -3345,7 +3347,8 @@ mod tests { &parts, ); // Null out error IDs so we can avoid random Uuid mismatch - actual.errors = actual.errors + actual.errors = actual + .errors .into_iter() .map(|e| e.clone().with_null_id()) .collect(); From b38b8ce06cb3a16c67a9bd0fe60fef91ea6c83b0 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Wed, 11 Jun 2025 09:15:51 -0400 Subject: [PATCH 37/46] WIP convert license unit test to integration test --- .../src/plugins/license_enforcement/mod.rs | 78 +++++++++++++++---- 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/apollo-router/src/plugins/license_enforcement/mod.rs b/apollo-router/src/plugins/license_enforcement/mod.rs index 5266730f96..6dc9e64082 100644 --- a/apollo-router/src/plugins/license_enforcement/mod.rs +++ b/apollo-router/src/plugins/license_enforcement/mod.rs @@ -102,9 +102,17 @@ register_private_plugin!("apollo", "license_enforcement", LicenseEnforcement); #[cfg(test)] mod test { + use serde_json::Value; + use tower_service::Service; + use tracing_subscriber::filter::FilterExt; use super::*; use crate::metrics::FutureMetricsExt; - use crate::plugins::test::PluginTestHarness; + use crate::plugin::test::MockRouterService; + use crate::plugins::telemetry::apollo_exporter::Sender; + use crate::plugins::telemetry::Telemetry; + use crate::plugins::test::{FakeDefault, PluginTestHarness}; + use crate::services::supergraph; + use crate::TestHarness; use crate::uplink::license_enforcement::LicenseLimits; use crate::uplink::license_enforcement::LicenseState; use crate::uplink::license_enforcement::TpsLimit; @@ -180,25 +188,67 @@ mod test { }), }; - let test_harness: PluginTestHarness = PluginTestHarness::builder() - .license(license) + let license_plugin = LicenseEnforcement::new( + PluginInit::fake_builder() + .config(LicenseEnforcementConfig {}) + .license(license) + .build() + ).await.expect("license plugin"); + + let full_config = serde_yaml::from_str::(r#" + telemetry: + apollo: + endpoint: "http://example.com" + client_name_header: "name_header" + client_version_header: "version_header" + buffer_size: 10000 + schema_id: "schema_sha" + "#).unwrap(); + + let telemetry_config = full_config + .as_object() + .expect("must be an object") + .get("telemetry") + .expect("telemetry must be a root key"); + + let init = PluginInit::fake_builder() + .config(telemetry_config.clone()) + .full_config(full_config) .build() + .with_deserialized_config() + .expect("unable to deserialize telemetry config"); + let mut telemetry_plugin = Telemetry::new(init) .await - .expect("test harness"); + .expect("telemetry plugin"); - let service = test_harness.router_service(|_req| async { - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - Ok(router::Response::fake_builder() - .data(serde_json::json!({"data": {"field": "value"}})) - .header("x-custom-header", "test-value") - .build() - .unwrap()) - }); + let mut router_service = MockRouterService::new(); + router_service.expect_clone().return_once(move || { + let mut mock_service = test::MockRouterService::new(); + mock_service.expect_call() + .times(2) + .returning(move |_| { + // TODO do we need the async wait? + Ok(router::Response::fake_builder() + .data(serde_json::json!({"data": {"field": "value"}})) + .header("x-custom-header", "test-value") + .build() + .unwrap()) + }); + mock_service + }); + + let mut test_harness = TestHarness::builder() + .extra_private_plugin(telemetry_plugin) + .extra_private_plugin(license_plugin) + .router_hook(move |_| router_service.clone().boxed()) + .build_router() + .await + .unwrap(); // WHEN // * two reqs happen - let _ = service.call_default().await; - let _ = service.call_default().await; + let _ = test_harness.call(router::Request::default()).await; + let _ = test_harness.call(router::Request::default()).await; // THEN // * we get a metric saying the tps limit was enforced From dd0d382326b37db85e2a265a15be1782ad8af42d Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Thu, 12 Jun 2025 12:05:59 -0400 Subject: [PATCH 38/46] additional attempts to fix licesnse enforcement test --- .../src/plugins/license_enforcement/mod.rs | 49 ++++++++++++++++--- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/apollo-router/src/plugins/license_enforcement/mod.rs b/apollo-router/src/plugins/license_enforcement/mod.rs index 6dc9e64082..cb77707e13 100644 --- a/apollo-router/src/plugins/license_enforcement/mod.rs +++ b/apollo-router/src/plugins/license_enforcement/mod.rs @@ -72,6 +72,12 @@ impl PluginPrivate for LicenseEnforcement { match response { Ok(ok) => Ok(ok), Err(err) if err.is::() => { + u64_counter!( // TODO TEMP REMOVE + "apollo.router.graphql_error", + "Number of GraphQL error responses returned by the router", + 1, + code = "ROUTER_FREE_PLAN_RATE_LIMIT_REACHED" + ); let error = graphql::Error::builder() .message("Your request has been rate limited. You've reached the limits for the Free plan. Consider upgrading to a higher plan for increased limits.") .extension_code("ROUTER_FREE_PLAN_RATE_LIMIT_REACHED") @@ -102,7 +108,7 @@ register_private_plugin!("apollo", "license_enforcement", LicenseEnforcement); #[cfg(test)] mod test { - use serde_json::Value; + use serde_json::{json, Value}; use tower_service::Service; use tracing_subscriber::filter::FilterExt; use super::*; @@ -110,9 +116,9 @@ mod test { use crate::plugin::test::MockRouterService; use crate::plugins::telemetry::apollo_exporter::Sender; use crate::plugins::telemetry::Telemetry; - use crate::plugins::test::{FakeDefault, PluginTestHarness}; + use crate::plugins::test::{FakeDefault, PluginTestHarness, RequestTestExt}; use crate::services::supergraph; - use crate::TestHarness; + use crate::{Context, TestHarness}; use crate::uplink::license_enforcement::LicenseLimits; use crate::uplink::license_enforcement::LicenseState; use crate::uplink::license_enforcement::TpsLimit; @@ -217,7 +223,7 @@ mod test { .build() .with_deserialized_config() .expect("unable to deserialize telemetry config"); - let mut telemetry_plugin = Telemetry::new(init) + let telemetry_plugin = Telemetry::new(init) .await .expect("telemetry plugin"); @@ -238,8 +244,8 @@ mod test { }); let mut test_harness = TestHarness::builder() - .extra_private_plugin(telemetry_plugin) .extra_private_plugin(license_plugin) + .extra_private_plugin(telemetry_plugin) .router_hook(move |_| router_service.clone().boxed()) .build_router() .await @@ -247,11 +253,38 @@ mod test { // WHEN // * two reqs happen - let _ = test_harness.call(router::Request::default()).await; - let _ = test_harness.call(router::Request::default()).await; + let _first_response = test_harness + .ready() + .await + .unwrap() + .call(router::Request::fake_builder() + .header("content-type", "application/json") + .build() + .unwrap() + ) + .await + .unwrap() + .next_response() + .await + .unwrap(); + let _second_response = test_harness + .ready() + .await + .unwrap() + .call(router::Request::fake_builder() + .header("content-type", "application/json") + .build() + .unwrap() + ) + .await + .unwrap() + .next_response() + .await + .unwrap(); + // THEN - // * we get a metric saying the tps limit was enforced + // * we get a metric from the telemetry plugin saying the tps limit was enforced assert_counter!( "apollo.router.graphql_error", 1, From 87445fb810e1ee17bb680c31dcbbcacf109142a8 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 13 Jun 2025 11:06:06 -0400 Subject: [PATCH 39/46] fix operations test and move to telemetry plugin --- apollo-router/src/graphql/mod.rs | 12 +- .../src/plugins/license_enforcement/mod.rs | 10 +- .../src/plugins/telemetry/error_counter.rs | 52 ++---- apollo-router/src/plugins/telemetry/mod.rs | 176 +++++++++++++++++- apollo-router/src/services/router/tests.rs | 175 +---------------- 5 files changed, 207 insertions(+), 218 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 6c15ada018..186f52c080 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -228,14 +228,14 @@ impl Error { .and_then(|p: &serde_json_bytes::Value| -> Option { serde_json_bytes::from_value(p.clone()).ok() }); - let apollo_id = value_completion - .get("apolloId") - .and_then(|id| id.as_str()) - .map(|id| Uuid::from_str(id).ok())?; Some(Self::new( - message, locations, path, None, extensions, - apollo_id, // TODO confirm this exists from serialized error + message, + locations, + path, + None, + extensions, + None // apollo_id is not serialized, so it will never exist in a serialized vc error )) } diff --git a/apollo-router/src/plugins/license_enforcement/mod.rs b/apollo-router/src/plugins/license_enforcement/mod.rs index cb77707e13..029fe1771d 100644 --- a/apollo-router/src/plugins/license_enforcement/mod.rs +++ b/apollo-router/src/plugins/license_enforcement/mod.rs @@ -112,13 +112,13 @@ mod test { use tower_service::Service; use tracing_subscriber::filter::FilterExt; use super::*; - use crate::metrics::FutureMetricsExt; + use crate::metrics::{meter_provider, FutureMetricsExt}; use crate::plugin::test::MockRouterService; use crate::plugins::telemetry::apollo_exporter::Sender; use crate::plugins::telemetry::Telemetry; use crate::plugins::test::{FakeDefault, PluginTestHarness, RequestTestExt}; use crate::services::supergraph; - use crate::{Context, TestHarness}; + use crate::{Context, TestHarness, _private}; use crate::uplink::license_enforcement::LicenseLimits; use crate::uplink::license_enforcement::LicenseState; use crate::uplink::license_enforcement::TpsLimit; @@ -243,7 +243,7 @@ mod test { mock_service }); - let mut test_harness = TestHarness::builder() + let mut router_service = TestHarness::builder() .extra_private_plugin(license_plugin) .extra_private_plugin(telemetry_plugin) .router_hook(move |_| router_service.clone().boxed()) @@ -253,7 +253,7 @@ mod test { // WHEN // * two reqs happen - let _first_response = test_harness + let _first_response = router_service .ready() .await .unwrap() @@ -267,7 +267,7 @@ mod test { .next_response() .await .unwrap(); - let _second_response = test_harness + let _second_response = router_service .ready() .await .unwrap() diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index ec3e424b3b..ff93394984 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -61,43 +61,24 @@ pub(crate) async fn count_supergraph_errors( let (parts, stream) = response.response.into_parts(); let stream = stream.inspect(move |response_body| { - // TODO do we really need this? - let ClientRequestAccepts { - wildcard: accepts_wildcard, - json: accepts_json, - multipart_defer: accepts_multipart_defer, - multipart_subscription: accepts_multipart_subscription, - } = context - .extensions() - .with_lock(|lock| lock.get().cloned()) - .unwrap_or_default(); - - if !response_body.has_next.unwrap_or(false) - && !response_body.subscribed.unwrap_or(false) - && (accepts_json || accepts_wildcard) + // TODO ensure free plan is captured + if !response_body.errors.is_empty() { + count_operation_errors(&response_body.errors, &context, &errors_config); + } + if let Some(value_completion) = response_body + .extensions + .get(EXTENSIONS_VALUE_COMPLETION_KEY) { - // TODO ensure free plan is captured - if !response_body.errors.is_empty() { - count_operation_errors(&response_body.errors, &context, &errors_config); - } - if let Some(value_completion) = response_body - .extensions - .get(EXTENSIONS_VALUE_COMPLETION_KEY) - { - if let Some(vc_array) = value_completion.as_array() { - let errors: Vec = vc_array - .iter() - .filter_map(graphql::Error::from_value_completion_value) - .collect(); - count_operation_errors(&errors, &context, &errors_config); - } - } - } else if accepts_multipart_defer || accepts_multipart_subscription { - // TODO can we combine this with above? - if !response_body.errors.is_empty() { - count_operation_errors(&response_body.errors, &context, &errors_config); + if let Some(vc_array) = value_completion.as_array() { + // We only count these in the supergraph layer to avoid double counting + let errors: Vec = vc_array + .iter() + .filter_map(graphql::Error::from_value_completion_value) + .collect(); + count_operation_errors(&errors, &context, &errors_config); } } + // Refresh context with the most up-to-date list of errors let _ = context.insert(COUNTED_ERRORS, to_set(&response_body.errors)); }); @@ -182,9 +163,6 @@ fn count_operation_errors( context: &Context, errors_config: &ErrorsConfiguration, ) { - let _id_str = errors[0].apollo_id().to_string(); // TODO DEBUG REMOVE - let _msg_str = errors[0].message.clone(); // TODO DEBUG REMOVE - let previously_counted_errors_map: HashSet = unwrap_from_context(context, COUNTED_ERRORS); let mut operation_id: String = unwrap_from_context(context, APOLLO_OPERATION_ID); diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 16e9c27e94..1b183a84fc 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -2046,12 +2046,14 @@ mod tests { use std::sync::atomic::Ordering; use axum_extra::headers::HeaderName; - use http::HeaderMap; + use http::{HeaderMap, Method, Uri}; use http::HeaderValue; use http::StatusCode; use http::header::CONTENT_TYPE; use insta::assert_snapshot; use itertools::Itertools; + use mime::APPLICATION_JSON; + use opentelemetry::KeyValue; use opentelemetry::propagation::Injector; use opentelemetry::propagation::TextMapPropagator; use opentelemetry::trace::SpanContext; @@ -2067,17 +2069,18 @@ mod tests { use tower::ServiceExt; use tower::util::BoxService; - use super::CustomTraceIdPropagator; + use super::{CustomTraceIdPropagator, CLIENT_NAME, CLIENT_VERSION}; use super::EnabledFeatures; use super::Telemetry; use super::apollo::ForwardHeaders; use crate::error::FetchError; - use crate::graphql; + use crate::{graphql, Configuration, Context, TestHarness}; + use crate::context::{OPERATION_KIND, OPERATION_NAME}; use crate::graphql::Error; use crate::graphql::IntoGraphQLErrors; use crate::graphql::Request; use crate::http_ext; - use crate::json_ext::Object; + use crate::json_ext::{Object, Path}; use crate::metrics::FutureMetricsExt; use crate::plugin::DynPlugin; use crate::plugin::PluginInit; @@ -2091,13 +2094,17 @@ mod tests { use crate::plugins::demand_control::DemandControlError; use crate::plugins::telemetry::EnableSubgraphFtv1; use crate::plugins::telemetry::config::TraceIdFormat; - use crate::services::RouterRequest; + use crate::plugins::test::PluginTestHarness; + use crate::query_planner::APOLLO_OPERATION_ID; + use crate::services::{supergraph, RouterRequest}; use crate::services::RouterResponse; use crate::services::SubgraphRequest; use crate::services::SubgraphResponse; use crate::services::SupergraphRequest; use crate::services::SupergraphResponse; use crate::services::router; + use crate::services::router::service::from_supergraph_mock_callback_and_configuration; + use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; macro_rules! assert_prometheus_metrics { ($plugin:expr) => {{ @@ -3372,4 +3379,163 @@ mod tests { .with_metrics() .await; } + + #[tokio::test] + async fn test_operation_errors_emitted_when_config_is_enabled() { + async { + let query = "query operationName { __typename }"; + let operation_name = "operationName"; + let operation_type = "query"; + let operation_id = "opId"; + let client_name = "client"; + let client_version = "version"; + + let config = json!({ + "telemetry":{ + "apollo": { + "errors": { + "preview_extended_error_metrics": "enabled", + "subgraph": { + "subgraphs": { + "myIgnoredSubgraph": { + "send": false, + } + } + } + } + } + } + }).to_string(); + + let test_harness: PluginTestHarness = PluginTestHarness::builder() + .config(&config) + .build() + .await + .expect("test harness"); + + let router_service = test_harness + .supergraph_service(|req| async { + let example_response = graphql::Response::builder() + .data(json!({"data": null})) + .extension(EXTENSIONS_VALUE_COMPLETION_KEY, json!([{ + "message": "Cannot return null for non-nullable field SomeType.someField", + "path": Path::from("someType/someField") + }])) + .errors(vec![ + graphql::Error::builder() + .message("some error") + .extension_code("SOME_ERROR_CODE") + .extension("service", "mySubgraph") + .path(Path::from("obj/field")) + .build(), + graphql::Error::builder() + .message("some other error") + .extension_code("SOME_OTHER_ERROR_CODE") + .extension("service", "myOtherSubgraph") + .path(Path::from("obj/arr/@/firstElementField")) + .build(), + graphql::Error::builder() + .message("some ignored error") + .extension_code("SOME_IGNORED_ERROR_CODE") + .extension("service", "myIgnoredSubgraph") + .path(Path::from("obj/arr/@/firstElementField")) + .build(), + ]) + .build(); + Ok(SupergraphResponse::new_from_graphql_response( + example_response, + req.context, + )) + }); + + let context = Context::new(); + context.insert_json_value(APOLLO_OPERATION_ID, operation_id.into()); + context.insert_json_value(OPERATION_NAME, operation_name.into()); + context.insert_json_value(OPERATION_KIND, operation_type.into()); + context.insert_json_value(CLIENT_NAME, client_name.into()); + context.insert_json_value(CLIENT_VERSION, client_version.into()); + + let post_request = supergraph::Request::builder() + .query(query) + .operation_name(operation_name) + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .uri(Uri::from_static("/")) + .method(Method::POST) + .context(context) + .build() + .unwrap(); + + router_service + .call(post_request.try_into().unwrap()) + .await + .unwrap(); + + assert_counter!( + "apollo.router.operations.error", + 1, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new("graphql.error.extensions.code", "SOME_ERROR_CODE"), + KeyValue::new("graphql.error.extensions.severity", "ERROR"), + KeyValue::new("graphql.error.path", "/obj/field"), + KeyValue::new("apollo.router.error.service", "mySubgraph"), + ] + ); + assert_counter!( + "apollo.router.operations.error", + 1, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new("graphql.error.extensions.code", "SOME_OTHER_ERROR_CODE"), + KeyValue::new("graphql.error.extensions.severity", "ERROR"), + KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), + KeyValue::new("apollo.router.error.service", "myOtherSubgraph"), + ] + ); + assert_counter!( + "apollo.router.operations.error", + 1, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new( + "graphql.error.extensions.code", + "RESPONSE_VALIDATION_FAILED" + ), + KeyValue::new("graphql.error.extensions.severity", "WARN"), + KeyValue::new("graphql.error.path", "/someType/someField"), + KeyValue::new("apollo.router.error.service", ""), + ] + ); + assert_counter_not_exists!( + "apollo.router.operations.error", + u64, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new("graphql.error.extensions.code", "SOME_IGNORED_ERROR_CODE"), + KeyValue::new("graphql.error.extensions.severity", "ERROR"), + KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), + KeyValue::new("apollo.router.error.service", "myIgnoredSubgraph"), + ] + ); + } + .with_metrics() + .await; + } + } diff --git a/apollo-router/src/services/router/tests.rs b/apollo-router/src/services/router/tests.rs index 16b29a4d4b..ac0e747842 100644 --- a/apollo-router/src/services/router/tests.rs +++ b/apollo-router/src/services/router/tests.rs @@ -12,22 +12,27 @@ use serde_json_bytes::json; use tower::ServiceExt; use tower_service::Service; -use crate::Configuration; +use crate::{Configuration, TestHarness}; use crate::Context; use crate::context::OPERATION_KIND; use crate::context::OPERATION_NAME; use crate::graphql; use crate::json_ext::Path; -use crate::metrics::FutureMetricsExt; +use crate::metrics::{meter_provider, FutureMetricsExt}; +use crate::metrics::test_utils::Metrics; +use crate::plugin::{PluginInit, PluginPrivate}; +use crate::plugin::test::MockSupergraphService; use crate::plugins::content_negotiation::MULTIPART_DEFER_CONTENT_TYPE_HEADER_VALUE; -use crate::plugins::telemetry::CLIENT_NAME; +use crate::plugins::telemetry::{Telemetry, CLIENT_NAME}; use crate::plugins::telemetry::CLIENT_VERSION; use crate::query_planner::APOLLO_OPERATION_ID; -use crate::services::SupergraphRequest; +use crate::services::layers::query_analysis::QueryAnalysisLayer; +use crate::services::{HasSchema, SupergraphRequest}; +use crate::services::layers::persisted_queries::PersistedQueryLayer; use crate::services::SupergraphResponse; use crate::services::router; use crate::services::router::body::RouterBody; -use crate::services::router::service::from_supergraph_mock_callback; +use crate::services::router::service::{from_supergraph_mock_callback, RouterCreator}; use crate::services::router::service::from_supergraph_mock_callback_and_configuration; use crate::services::subgraph; use crate::services::supergraph; @@ -566,166 +571,6 @@ async fn escaped_quotes_in_string_literal() { assert!(subgraph_query.contains(r#"reviewsForAuthor(authorID: "\"1\"")"#)); } -#[tokio::test] -async fn it_stores_operation_error_when_config_is_enabled() { - async { - let query = "query operationName { __typename }"; - let operation_name = "operationName"; - let operation_type = "query"; - let operation_id = "opId"; - let client_name = "client"; - let client_version = "version"; - - let mut config = Configuration::default(); - config.apollo_plugins.plugins.insert( - "telemetry".to_string(), - serde_json::json!({ - "apollo": { - "errors": { - "preview_extended_error_metrics": "enabled", - "subgraph": { - "subgraphs": { - "myIgnoredSubgraph": { - "send": false, - } - } - } - } - } - }), - ); - - let mut router_service = from_supergraph_mock_callback_and_configuration( - move |req| { - let example_response = graphql::Response::builder() - .data(json!({"data": null})) - .extension(EXTENSIONS_VALUE_COMPLETION_KEY, json!([{ - "message": "Cannot return null for non-nullable field SomeType.someField", - "path": Path::from("someType/someField") - }])) - .errors(vec![ - graphql::Error::builder() - .message("some error") - .extension_code("SOME_ERROR_CODE") - .extension("service", "mySubgraph") - .path(Path::from("obj/field")) - .build(), - graphql::Error::builder() - .message("some other error") - .extension_code("SOME_OTHER_ERROR_CODE") - .extension("service", "myOtherSubgraph") - .path(Path::from("obj/arr/@/firstElementField")) - .build(), - graphql::Error::builder() - .message("some ignored error") - .extension_code("SOME_IGNORED_ERROR_CODE") - .extension("service", "myIgnoredSubgraph") - .path(Path::from("obj/arr/@/firstElementField")) - .build(), - ]) - .build(); - Ok(SupergraphResponse::new_from_graphql_response( - example_response, - req.context, - )) - }, - Arc::new(config), - ) - .await; - - let context = Context::new(); - context.insert_json_value(APOLLO_OPERATION_ID, operation_id.into()); - context.insert_json_value(OPERATION_NAME, operation_name.into()); - context.insert_json_value(OPERATION_KIND, query.into()); - context.insert_json_value(CLIENT_NAME, client_name.into()); - context.insert_json_value(CLIENT_VERSION, client_version.into()); - - let post_request = supergraph::Request::builder() - .query(query) - .operation_name(operation_name) - .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) - .uri(Uri::from_static("/")) - .method(Method::POST) - .context(context) - .build() - .unwrap(); - - router_service - .ready() - .await - .unwrap() - .call(post_request.try_into().unwrap()) - .await - .unwrap(); - - assert_counter!( - "apollo.router.operations.error", - 1, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new("graphql.error.extensions.code", "SOME_ERROR_CODE"), - KeyValue::new("graphql.error.extensions.severity", "ERROR"), - KeyValue::new("graphql.error.path", "/obj/field"), - KeyValue::new("apollo.router.error.service", "mySubgraph"), - ] - ); - assert_counter!( - "apollo.router.operations.error", - 1, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new("graphql.error.extensions.code", "SOME_OTHER_ERROR_CODE"), - KeyValue::new("graphql.error.extensions.severity", "ERROR"), - KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), - KeyValue::new("apollo.router.error.service", "myOtherSubgraph"), - ] - ); - assert_counter!( - "apollo.router.operations.error", - 1, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new( - "graphql.error.extensions.code", - "RESPONSE_VALIDATION_FAILED" - ), - KeyValue::new("graphql.error.extensions.severity", "WARN"), - KeyValue::new("graphql.error.path", "/someType/someField"), - KeyValue::new("apollo.router.error.service", ""), - ] - ); - assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new("graphql.error.extensions.code", "SOME_IGNORED_ERROR_CODE"), - KeyValue::new("graphql.error.extensions.severity", "ERROR"), - KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), - KeyValue::new("apollo.router.error.service", "myIgnoredSubgraph"), - ] - ); - } - .with_metrics() - .await; -} - #[tokio::test] async fn it_processes_a_valid_query_batch_with_maximum_size() { let expected_response: serde_json::Value = serde_json::from_str(include_str!( From b58dc6b66595cb5eeb301fc70f360e51cc4c7f1e Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 13 Jun 2025 11:19:38 -0400 Subject: [PATCH 40/46] lint --- apollo-router/src/graphql/mod.rs | 8 +- apollo-router/src/metrics/mod.rs | 6 +- .../src/plugins/license_enforcement/mod.rs | 56 +++--- .../src/plugins/telemetry/error_counter.rs | 1 - apollo-router/src/plugins/telemetry/mod.rs | 184 +++++++++--------- apollo-router/src/services/router/tests.rs | 21 +- 6 files changed, 131 insertions(+), 145 deletions(-) diff --git a/apollo-router/src/graphql/mod.rs b/apollo-router/src/graphql/mod.rs index 186f52c080..85470ee82b 100644 --- a/apollo-router/src/graphql/mod.rs +++ b/apollo-router/src/graphql/mod.rs @@ -230,12 +230,8 @@ impl Error { }); Some(Self::new( - message, - locations, - path, - None, - extensions, - None // apollo_id is not serialized, so it will never exist in a serialized vc error + message, locations, path, None, extensions, + None, // apollo_id is not serialized, so it will never exist in a serialized vc error )) } diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs index 13c81f2846..569ad5dcfe 100644 --- a/apollo-router/src/metrics/mod.rs +++ b/apollo-router/src/metrics/mod.rs @@ -1815,7 +1815,7 @@ mod test { async move { u64_counter!("apollo.router.test", "metric", 2); } - .with_current_meter_provider(), + .with_current_meter_provider(), ); // Wait for the spawned task to complete @@ -1824,7 +1824,7 @@ mod test { // The metric should now be 3 since both tasks contributed assert_counter!("apollo.router.test", 3); } - .with_metrics() - .await; + .with_metrics() + .await; } } diff --git a/apollo-router/src/plugins/license_enforcement/mod.rs b/apollo-router/src/plugins/license_enforcement/mod.rs index 029fe1771d..7287fbb0f5 100644 --- a/apollo-router/src/plugins/license_enforcement/mod.rs +++ b/apollo-router/src/plugins/license_enforcement/mod.rs @@ -108,17 +108,15 @@ register_private_plugin!("apollo", "license_enforcement", LicenseEnforcement); #[cfg(test)] mod test { - use serde_json::{json, Value}; + use serde_json::Value; use tower_service::Service; - use tracing_subscriber::filter::FilterExt; + use super::*; - use crate::metrics::{meter_provider, FutureMetricsExt}; + use crate::TestHarness; + use crate::metrics::FutureMetricsExt; use crate::plugin::test::MockRouterService; - use crate::plugins::telemetry::apollo_exporter::Sender; use crate::plugins::telemetry::Telemetry; - use crate::plugins::test::{FakeDefault, PluginTestHarness, RequestTestExt}; - use crate::services::supergraph; - use crate::{Context, TestHarness, _private}; + use crate::plugins::test::PluginTestHarness; use crate::uplink::license_enforcement::LicenseLimits; use crate::uplink::license_enforcement::LicenseState; use crate::uplink::license_enforcement::TpsLimit; @@ -198,10 +196,13 @@ mod test { PluginInit::fake_builder() .config(LicenseEnforcementConfig {}) .license(license) - .build() - ).await.expect("license plugin"); + .build(), + ) + .await + .expect("license plugin"); - let full_config = serde_yaml::from_str::(r#" + let full_config = serde_yaml::from_str::( + r#" telemetry: apollo: endpoint: "http://example.com" @@ -209,7 +210,9 @@ mod test { client_version_header: "version_header" buffer_size: 10000 schema_id: "schema_sha" - "#).unwrap(); + "#, + ) + .unwrap(); let telemetry_config = full_config .as_object() @@ -223,16 +226,12 @@ mod test { .build() .with_deserialized_config() .expect("unable to deserialize telemetry config"); - let telemetry_plugin = Telemetry::new(init) - .await - .expect("telemetry plugin"); + let telemetry_plugin = Telemetry::new(init).await.expect("telemetry plugin"); let mut router_service = MockRouterService::new(); router_service.expect_clone().return_once(move || { let mut mock_service = test::MockRouterService::new(); - mock_service.expect_call() - .times(2) - .returning(move |_| { + mock_service.expect_call().times(2).returning(move |_| { // TODO do we need the async wait? Ok(router::Response::fake_builder() .data(serde_json::json!({"data": {"field": "value"}})) @@ -240,8 +239,8 @@ mod test { .build() .unwrap()) }); - mock_service - }); + mock_service + }); let mut router_service = TestHarness::builder() .extra_private_plugin(license_plugin) @@ -257,10 +256,11 @@ mod test { .ready() .await .unwrap() - .call(router::Request::fake_builder() - .header("content-type", "application/json") - .build() - .unwrap() + .call( + router::Request::fake_builder() + .header("content-type", "application/json") + .build() + .unwrap(), ) .await .unwrap() @@ -271,10 +271,11 @@ mod test { .ready() .await .unwrap() - .call(router::Request::fake_builder() - .header("content-type", "application/json") - .build() - .unwrap() + .call( + router::Request::fake_builder() + .header("content-type", "application/json") + .build() + .unwrap(), ) .await .unwrap() @@ -282,7 +283,6 @@ mod test { .await .unwrap(); - // THEN // * we get a metric from the telemetry plugin saying the tps limit was enforced assert_counter!( diff --git a/apollo-router/src/plugins/telemetry/error_counter.rs b/apollo-router/src/plugins/telemetry/error_counter.rs index ff93394984..e1f4f75020 100644 --- a/apollo-router/src/plugins/telemetry/error_counter.rs +++ b/apollo-router/src/plugins/telemetry/error_counter.rs @@ -15,7 +15,6 @@ use crate::context::OPERATION_NAME; use crate::context::ROUTER_RESPONSE_ERRORS; use crate::graphql; use crate::graphql::Error; -use crate::plugins::content_negotiation::ClientRequestAccepts; use crate::plugins::telemetry::CLIENT_NAME; use crate::plugins::telemetry::CLIENT_VERSION; use crate::plugins::telemetry::apollo::ErrorsConfiguration; diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 1b183a84fc..d036becffb 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -2046,9 +2046,11 @@ mod tests { use std::sync::atomic::Ordering; use axum_extra::headers::HeaderName; - use http::{HeaderMap, Method, Uri}; + use http::HeaderMap; use http::HeaderValue; + use http::Method; use http::StatusCode; + use http::Uri; use http::header::CONTENT_TYPE; use insta::assert_snapshot; use itertools::Itertools; @@ -2069,18 +2071,23 @@ mod tests { use tower::ServiceExt; use tower::util::BoxService; - use super::{CustomTraceIdPropagator, CLIENT_NAME, CLIENT_VERSION}; + use super::CLIENT_NAME; + use super::CLIENT_VERSION; + use super::CustomTraceIdPropagator; use super::EnabledFeatures; use super::Telemetry; use super::apollo::ForwardHeaders; + use crate::Context; + use crate::context::OPERATION_KIND; + use crate::context::OPERATION_NAME; use crate::error::FetchError; - use crate::{graphql, Configuration, Context, TestHarness}; - use crate::context::{OPERATION_KIND, OPERATION_NAME}; + use crate::graphql; use crate::graphql::Error; use crate::graphql::IntoGraphQLErrors; use crate::graphql::Request; use crate::http_ext; - use crate::json_ext::{Object, Path}; + use crate::json_ext::Object; + use crate::json_ext::Path; use crate::metrics::FutureMetricsExt; use crate::plugin::DynPlugin; use crate::plugin::PluginInit; @@ -2096,14 +2103,14 @@ mod tests { use crate::plugins::telemetry::config::TraceIdFormat; use crate::plugins::test::PluginTestHarness; use crate::query_planner::APOLLO_OPERATION_ID; - use crate::services::{supergraph, RouterRequest}; + use crate::services::RouterRequest; use crate::services::RouterResponse; use crate::services::SubgraphRequest; use crate::services::SubgraphResponse; use crate::services::SupergraphRequest; use crate::services::SupergraphResponse; use crate::services::router; - use crate::services::router::service::from_supergraph_mock_callback_and_configuration; + use crate::services::supergraph; use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; macro_rules! assert_prometheus_metrics { @@ -3405,7 +3412,8 @@ mod tests { } } } - }).to_string(); + }) + .to_string(); let test_harness: PluginTestHarness = PluginTestHarness::builder() .config(&config) @@ -3413,9 +3421,8 @@ mod tests { .await .expect("test harness"); - let router_service = test_harness - .supergraph_service(|req| async { - let example_response = graphql::Response::builder() + let router_service = test_harness.supergraph_service(|req| async { + let example_response = graphql::Response::builder() .data(json!({"data": null})) .extension(EXTENSIONS_VALUE_COMPLETION_KEY, json!([{ "message": "Cannot return null for non-nullable field SomeType.someField", @@ -3442,11 +3449,11 @@ mod tests { .build(), ]) .build(); - Ok(SupergraphResponse::new_from_graphql_response( - example_response, - req.context, - )) - }); + Ok(SupergraphResponse::new_from_graphql_response( + example_response, + req.context, + )) + }); let context = Context::new(); context.insert_json_value(APOLLO_OPERATION_ID, operation_id.into()); @@ -3455,87 +3462,86 @@ mod tests { context.insert_json_value(CLIENT_NAME, client_name.into()); context.insert_json_value(CLIENT_VERSION, client_version.into()); - let post_request = supergraph::Request::builder() - .query(query) - .operation_name(operation_name) - .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) - .uri(Uri::from_static("/")) - .method(Method::POST) - .context(context) - .build() - .unwrap(); - router_service - .call(post_request.try_into().unwrap()) + .call( + supergraph::Request::builder() + .query(query) + .operation_name(operation_name) + .header(CONTENT_TYPE, APPLICATION_JSON.essence_str()) + .uri(Uri::from_static("/")) + .method(Method::POST) + .context(context) + .build() + .unwrap(), + ) .await .unwrap(); assert_counter!( - "apollo.router.operations.error", - 1, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new("graphql.error.extensions.code", "SOME_ERROR_CODE"), - KeyValue::new("graphql.error.extensions.severity", "ERROR"), - KeyValue::new("graphql.error.path", "/obj/field"), - KeyValue::new("apollo.router.error.service", "mySubgraph"), - ] - ); + "apollo.router.operations.error", + 1, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new("graphql.error.extensions.code", "SOME_ERROR_CODE"), + KeyValue::new("graphql.error.extensions.severity", "ERROR"), + KeyValue::new("graphql.error.path", "/obj/field"), + KeyValue::new("apollo.router.error.service", "mySubgraph"), + ] + ); assert_counter!( - "apollo.router.operations.error", - 1, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new("graphql.error.extensions.code", "SOME_OTHER_ERROR_CODE"), - KeyValue::new("graphql.error.extensions.severity", "ERROR"), - KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), - KeyValue::new("apollo.router.error.service", "myOtherSubgraph"), - ] - ); + "apollo.router.operations.error", + 1, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new("graphql.error.extensions.code", "SOME_OTHER_ERROR_CODE"), + KeyValue::new("graphql.error.extensions.severity", "ERROR"), + KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), + KeyValue::new("apollo.router.error.service", "myOtherSubgraph"), + ] + ); assert_counter!( - "apollo.router.operations.error", - 1, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new( - "graphql.error.extensions.code", - "RESPONSE_VALIDATION_FAILED" - ), - KeyValue::new("graphql.error.extensions.severity", "WARN"), - KeyValue::new("graphql.error.path", "/someType/someField"), - KeyValue::new("apollo.router.error.service", ""), - ] - ); + "apollo.router.operations.error", + 1, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new( + "graphql.error.extensions.code", + "RESPONSE_VALIDATION_FAILED" + ), + KeyValue::new("graphql.error.extensions.severity", "WARN"), + KeyValue::new("graphql.error.path", "/someType/someField"), + KeyValue::new("apollo.router.error.service", ""), + ] + ); assert_counter_not_exists!( - "apollo.router.operations.error", - u64, - &[ - KeyValue::new("apollo.operation.id", operation_id), - KeyValue::new("graphql.operation.name", operation_name), - KeyValue::new("graphql.operation.type", operation_type), - KeyValue::new("apollo.client.name", client_name), - KeyValue::new("apollo.client.version", client_version), - KeyValue::new("graphql.error.extensions.code", "SOME_IGNORED_ERROR_CODE"), - KeyValue::new("graphql.error.extensions.severity", "ERROR"), - KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), - KeyValue::new("apollo.router.error.service", "myIgnoredSubgraph"), - ] - ); + "apollo.router.operations.error", + u64, + &[ + KeyValue::new("apollo.operation.id", operation_id), + KeyValue::new("graphql.operation.name", operation_name), + KeyValue::new("graphql.operation.type", operation_type), + KeyValue::new("apollo.client.name", client_name), + KeyValue::new("apollo.client.version", client_version), + KeyValue::new("graphql.error.extensions.code", "SOME_IGNORED_ERROR_CODE"), + KeyValue::new("graphql.error.extensions.severity", "ERROR"), + KeyValue::new("graphql.error.path", "/obj/arr/@/firstElementField"), + KeyValue::new("apollo.router.error.service", "myIgnoredSubgraph"), + ] + ); } - .with_metrics() - .await; + .with_metrics() + .await; } - } diff --git a/apollo-router/src/services/router/tests.rs b/apollo-router/src/services/router/tests.rs index ac0e747842..9308af92b5 100644 --- a/apollo-router/src/services/router/tests.rs +++ b/apollo-router/src/services/router/tests.rs @@ -6,37 +6,22 @@ use http::Request; use http::Uri; use http::header::CONTENT_TYPE; use mime::APPLICATION_JSON; -use opentelemetry::KeyValue; use parking_lot::Mutex; use serde_json_bytes::json; use tower::ServiceExt; use tower_service::Service; -use crate::{Configuration, TestHarness}; use crate::Context; -use crate::context::OPERATION_KIND; -use crate::context::OPERATION_NAME; use crate::graphql; -use crate::json_ext::Path; -use crate::metrics::{meter_provider, FutureMetricsExt}; -use crate::metrics::test_utils::Metrics; -use crate::plugin::{PluginInit, PluginPrivate}; -use crate::plugin::test::MockSupergraphService; +use crate::metrics::FutureMetricsExt; use crate::plugins::content_negotiation::MULTIPART_DEFER_CONTENT_TYPE_HEADER_VALUE; -use crate::plugins::telemetry::{Telemetry, CLIENT_NAME}; -use crate::plugins::telemetry::CLIENT_VERSION; -use crate::query_planner::APOLLO_OPERATION_ID; -use crate::services::layers::query_analysis::QueryAnalysisLayer; -use crate::services::{HasSchema, SupergraphRequest}; -use crate::services::layers::persisted_queries::PersistedQueryLayer; +use crate::services::SupergraphRequest; use crate::services::SupergraphResponse; use crate::services::router; use crate::services::router::body::RouterBody; -use crate::services::router::service::{from_supergraph_mock_callback, RouterCreator}; -use crate::services::router::service::from_supergraph_mock_callback_and_configuration; +use crate::services::router::service::from_supergraph_mock_callback; use crate::services::subgraph; use crate::services::supergraph; -use crate::spec::query::EXTENSIONS_VALUE_COMPLETION_KEY; use crate::test_harness::make_fake_batch; #[tokio::test] From 7ef58581b93bf8451fa552b40882456e09823c0e Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 13 Jun 2025 14:54:07 -0400 Subject: [PATCH 41/46] fix licesnse enforcement test --- .../src/plugins/license_enforcement/mod.rs | 121 ++++++------------ 1 file changed, 41 insertions(+), 80 deletions(-) diff --git a/apollo-router/src/plugins/license_enforcement/mod.rs b/apollo-router/src/plugins/license_enforcement/mod.rs index 7287fbb0f5..a9694a1836 100644 --- a/apollo-router/src/plugins/license_enforcement/mod.rs +++ b/apollo-router/src/plugins/license_enforcement/mod.rs @@ -72,12 +72,6 @@ impl PluginPrivate for LicenseEnforcement { match response { Ok(ok) => Ok(ok), Err(err) if err.is::() => { - u64_counter!( // TODO TEMP REMOVE - "apollo.router.graphql_error", - "Number of GraphQL error responses returned by the router", - 1, - code = "ROUTER_FREE_PLAN_RATE_LIMIT_REACHED" - ); let error = graphql::Error::builder() .message("Your request has been rate limited. You've reached the limits for the Free plan. Consider upgrading to a higher plan for increased limits.") .extension_code("ROUTER_FREE_PLAN_RATE_LIMIT_REACHED") @@ -108,13 +102,11 @@ register_private_plugin!("apollo", "license_enforcement", LicenseEnforcement); #[cfg(test)] mod test { - use serde_json::Value; - use tower_service::Service; + use std::sync::Arc; + use std::sync::Mutex; use super::*; - use crate::TestHarness; use crate::metrics::FutureMetricsExt; - use crate::plugin::test::MockRouterService; use crate::plugins::telemetry::Telemetry; use crate::plugins::test::PluginTestHarness; use crate::uplink::license_enforcement::LicenseLimits; @@ -177,7 +169,6 @@ mod test { } #[tokio::test] - // TODO CONVERT THIS INTO INTEGRATION TEST WITH free license plugin + telemetry async fn it_emits_metrics_when_tps_enforced() { async { // GIVEN @@ -192,85 +183,58 @@ mod test { }), }; - let license_plugin = LicenseEnforcement::new( - PluginInit::fake_builder() - .config(LicenseEnforcementConfig {}) - .license(license) - .build(), - ) - .await - .expect("license plugin"); - - let full_config = serde_yaml::from_str::( - r#" - telemetry: - apollo: - endpoint: "http://example.com" - client_name_header: "name_header" - client_version_header: "version_header" - buffer_size: 10000 - schema_id: "schema_sha" - "#, - ) - .unwrap(); - - let telemetry_config = full_config - .as_object() - .expect("must be an object") - .get("telemetry") - .expect("telemetry must be a root key"); - - let init = PluginInit::fake_builder() - .config(telemetry_config.clone()) - .full_config(full_config) + let license_service = PluginTestHarness::::builder() + .license(license) .build() - .with_deserialized_config() - .expect("unable to deserialize telemetry config"); - let telemetry_plugin = Telemetry::new(init).await.expect("telemetry plugin"); - - let mut router_service = MockRouterService::new(); - router_service.expect_clone().return_once(move || { - let mut mock_service = test::MockRouterService::new(); - mock_service.expect_call().times(2).returning(move |_| { - // TODO do we need the async wait? + .await + .unwrap() + .router_service(|req| async { Ok(router::Response::fake_builder() .data(serde_json::json!({"data": {"field": "value"}})) .header("x-custom-header", "test-value") + .context(req.context) .build() .unwrap()) }); - mock_service - }); - - let mut router_service = TestHarness::builder() - .extra_private_plugin(license_plugin) - .extra_private_plugin(telemetry_plugin) - .router_hook(move |_| router_service.clone().boxed()) - .build_router() - .await - .unwrap(); // WHEN // * two reqs happen - let _first_response = router_service - .ready() - .await - .unwrap() - .call( - router::Request::fake_builder() - .header("content-type", "application/json") - .build() - .unwrap(), + // * and the telemetry plugin receives the second response with errors to count + + let _first_response = license_service.call_default().await; + let license_plugin_error_response = license_service.call_default().await.unwrap(); + + // Put the error response in an arc and mutex so we can share it with telemetry threads + let slot = Arc::new(Mutex::new(Some(license_plugin_error_response))); + // We have to do a weird thing where we take the response from the license plugin and feed + // it as the mock response of the telemetry plugin so that telemetry plugin will count + // the errors. Ideally this would be done using a TestHarness, but using a "full" + // router with the Telemetry plugin will hit reload_metrics() on activation thus + // breaking async(){}.with_metrics() by shutting down its metrics provider. + // Ultimately this is the best way anyone could think of to simulate this scenario. + let _telemetry_service = PluginTestHarness::::builder() + .config( + r#" + telemetry: + apollo: + endpoint: "http://example.com" + client_name_header: "name_header" + client_version_header: "version_header" + buffer_size: 10000 + "#, ) + .build() .await .unwrap() - .next_response() - .await - .unwrap(); - let _second_response = router_service - .ready() - .await - .unwrap() + .router_service(move |_req| { + let slot = Arc::clone(&slot); + async move { + // pull out our one error‐response + let mut guard = slot.lock().unwrap(); + let resp = guard.take().unwrap(); + Ok(resp) + } + }) .call( router::Request::fake_builder() .header("content-type", "application/json") @@ -278,9 +242,6 @@ mod test { .unwrap(), ) .await - .unwrap() - .next_response() - .await .unwrap(); // THEN From 127decc7e204a6a80e3d815a619f0dd7f927b644 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 13 Jun 2025 15:04:03 -0400 Subject: [PATCH 42/46] fix sugraph convert errors to gql test --- apollo-router/src/services/subgraph_service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apollo-router/src/services/subgraph_service.rs b/apollo-router/src/services/subgraph_service.rs index 2488d773f5..3824743ddb 100644 --- a/apollo-router/src/services/subgraph_service.rs +++ b/apollo-router/src/services/subgraph_service.rs @@ -3367,7 +3367,7 @@ mod tests { .to_graphql_error(None) .with_null_id(), ) - .error(error) + .error(error.with_null_id()) .build(); assert_eq!(actual, expected); } From 8578da5a6145bc69cae7b7d391814856f1ac72f4 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Fri, 13 Jun 2025 15:16:18 -0400 Subject: [PATCH 43/46] carry over apollo id after removing _entities --- apollo-router/src/query_planner/fetch.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apollo-router/src/query_planner/fetch.rs b/apollo-router/src/query_planner/fetch.rs index 9468a62a04..6ddeb39922 100644 --- a/apollo-router/src/query_planner/fetch.rs +++ b/apollo-router/src/query_planner/fetch.rs @@ -373,6 +373,8 @@ impl FetchNode { .message(error.message.clone()) .and_extension_code(error.extension_code()) .extensions(error.extensions.clone()) + // re-use the original ID so we don't double count this error + .apollo_id(error.apollo_id()) .build(), ) } From 496c33caf793f82faa9a3d60ef85f833baa00f32 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 16 Jun 2025 10:38:25 -0400 Subject: [PATCH 44/46] fix some integration tests --- apollo-router/src/services/layers/apq.rs | 10 +--- apollo-router/tests/integration_tests.rs | 75 +++++++++++++++++------- 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs index 5d3baf754f..87eae59d3a 100644 --- a/apollo-router/src/services/layers/apq.rs +++ b/apollo-router/src/services/layers/apq.rs @@ -133,7 +133,6 @@ async fn apq_request( crate::error::Error::builder() .message("provided sha does not match query".to_string()) .locations(Default::default()) - .path(Path::default()) .extension_code("PERSISTED_QUERY_HASH_MISMATCH") .build(), ]; @@ -164,9 +163,7 @@ async fn apq_request( let errors = vec![ crate::error::Error::builder() .message("PersistedQueryNotFound".to_string()) - .locations(Default::default()) - .path(Path::default()) - .extension_code("PERSISTED_QUERY_NOT_FOUND") + .locations(Default::default()).extension_code("PERSISTED_QUERY_NOT_FOUND") .build(), ]; let res = SupergraphResponse::builder() @@ -217,7 +214,6 @@ async fn disabled_apq_request( crate::error::Error::builder() .message("PersistedQueryNotSupported".to_string()) .locations(Default::default()) - .path(Path::default()) .extension_code("PERSISTED_QUERY_NOT_SUPPORTED") .build(), ]; @@ -261,7 +257,6 @@ mod apq_tests { let expected_apq_miss_error = Error::builder() .message("PersistedQueryNotFound".to_string()) .locations(Default::default()) - .path(Path::default()) .extension_code("PERSISTED_QUERY_NOT_FOUND") .build(); @@ -384,7 +379,6 @@ mod apq_tests { let expected_apq_miss_error = Error::builder() .message("PersistedQueryNotFound".to_string()) .locations(Default::default()) - .path(Path::default()) .extension_code("PERSISTED_QUERY_NOT_FOUND") .build(); @@ -483,7 +477,6 @@ mod apq_tests { let expected_apq_insert_failed_error = Error::builder() .message("provided sha does not match query".to_string()) .locations(Default::default()) - .path(Path::default()) .extension_code("PERSISTED_QUERY_HASH_MISMATCH") .build(); assert_eq!( @@ -514,7 +507,6 @@ mod apq_tests { let expected_apq_miss_error = Error::builder() .message("PersistedQueryNotSupported".to_string()) .locations(Default::default()) - .path(Path::default()) .extension_code("PERSISTED_QUERY_NOT_SUPPORTED") .build(); diff --git a/apollo-router/tests/integration_tests.rs b/apollo-router/tests/integration_tests.rs index 55b16d9e36..191ab2f1a4 100644 --- a/apollo-router/tests/integration_tests.rs +++ b/apollo-router/tests/integration_tests.rs @@ -30,8 +30,11 @@ use parking_lot::Mutex; use serde_json_bytes::json; use tower::BoxError; use tower::ServiceExt; +use tower_http::follow_redirect::policy::PolicyExt; +use uuid::Uuid; use walkdir::DirEntry; use walkdir::WalkDir; +use apollo_router::graphql::{Error, Response}; mod integration; @@ -118,10 +121,6 @@ async fn simple_queries_should_not_work() { Please either specify a 'content-type' header \ (with a mime-type that is not one of application/x-www-form-urlencoded, multipart/form-data, text/plain) \ or provide one of the following headers: x-apollo-operation-name, apollo-require-preflight"; - let expected_error = graphql::Error::builder() - .message(message) - .extension_code("CSRF_ERROR") - .build(); let mut get_request: router::Request = supergraph::Request::builder() .query("{ topProducts { upc name reviews {id product { name } author { id name } } } }") @@ -145,6 +144,13 @@ async fn simple_queries_should_not_work() { let actual = query_with_router(router, get_request).await; + let expected_error = graphql::Error::builder() + .message(message) + .extension_code("CSRF_ERROR") + // Overwrite error ID to avoid comparing random Uuids + .apollo_id(actual.errors[0].apollo_id()) + .build(); + assert_eq!( 1, actual.errors.len(), @@ -179,6 +185,7 @@ async fn empty_posts_should_not_work() { .message(message) .extension_code("INVALID_GRAPHQL_REQUEST") .extensions(extensions_map) + .apollo_id(actual.errors[0].apollo_id()) .build(); assert_eq!(expected_error, actual.errors[0]); assert_eq!(registry.totals(), hashmap! {}); @@ -235,11 +242,6 @@ async fn service_errors_should_be_propagated() { let message = "Unknown operation named \"invalidOperationName\""; let mut extensions_map = serde_json_bytes::map::Map::new(); extensions_map.insert("code", "GRAPHQL_UNKNOWN_OPERATION_NAME".into()); - let expected_error = apollo_router::graphql::Error::builder() - .message(message) - .extensions(extensions_map) - .extension_code("VALIDATION_ERROR") - .build(); let request = supergraph::Request::fake_builder() .query(r#"{ topProducts { name } }"#) @@ -251,6 +253,15 @@ async fn service_errors_should_be_propagated() { let (actual, registry) = query_rust(request).await; + let expected_error = apollo_router::graphql::Error::builder() + .message(message) + .extensions(extensions_map) + .extension_code("VALIDATION_ERROR") + // Overwrite error ID to avoid comparing random Uuids + .apollo_id(actual.errors[0].apollo_id()) + .build(); + + assert_eq!(expected_error, actual.errors[0]); assert_eq!(registry.totals(), expected_service_hits); } @@ -339,11 +350,6 @@ async fn mutation_should_work_over_post() { async fn automated_persisted_queries() { let (router, registry) = setup_router_and_registry(serde_json::json!({})).await; - let expected_apq_miss_error = apollo_router::graphql::Error::builder() - .message("PersistedQueryNotFound") - .extension_code("PERSISTED_QUERY_NOT_FOUND") - .build(); - let persisted = json!({ "version" : 1u8, "sha256Hash" : "9d1474aa069127ff795d3412b11dfc1f1be0853aed7a54c4a619ee0b1725382e" @@ -361,6 +367,12 @@ async fn automated_persisted_queries() { let actual = query_with_router(router.clone(), apq_only_request.try_into().unwrap()).await; + let expected_apq_miss_error = apollo_router::graphql::Error::builder() + .message("PersistedQueryNotFound") + .extension_code("PERSISTED_QUERY_NOT_FOUND") + .apollo_id(actual.errors[0].apollo_id()) + .build(); + assert_eq!(expected_apq_miss_error, actual.errors[0]); assert_eq!(1, actual.errors.len()); assert_eq!(registry.totals(), expected_service_hits); @@ -473,6 +485,8 @@ async fn persisted_queries() { "Persisted query '{UNKNOWN_QUERY_ID}' not found in the persisted query list" )) .extension_code("PERSISTED_QUERY_NOT_IN_LIST") + // Overwrite error ID to avoid comparing random Uuids + .apollo_id(actual.errors[0].apollo_id()) .build() ] ); @@ -572,11 +586,15 @@ async fn missing_variables() { ) .unwrap(); - let mut expected = vec![ + let mut normalized_actual_errors = normalize_errors(response.errors); + normalized_actual_errors.sort_by_key(|e| e.message.clone()); + + let mut expected_errors = vec![ graphql::Error::builder() .message("missing variable `$missingVariable`: for required GraphQL type `Int!`") .extension_code("VALIDATION_INVALID_TYPE_VARIABLE") .extension("name", "missingVariable") + .apollo_id(Uuid::nil()) .build(), graphql::Error::builder() .message( @@ -584,11 +602,12 @@ async fn missing_variables() { ) .extension_code("VALIDATION_INVALID_TYPE_VARIABLE") .extension("name", "yetAnotherMissingVariable") + .apollo_id(Uuid::nil()) .build(), ]; - response.errors.sort_by_key(|e| e.message.clone()); - expected.sort_by_key(|e| e.message.clone()); - assert_eq!(response.errors, expected); + + expected_errors.sort_by_key(|e| e.message.clone()); + assert_eq!(normalized_actual_errors, expected_errors); } /// @@ -674,7 +693,8 @@ async fn input_object_variable_validation() { .next_response() .await .unwrap(); - insta::assert_debug_snapshot!(&response.errors, @r###" + let normalized_errors = normalize_errors(response.errors); + insta::assert_debug_snapshot!(normalized_errors, @r###" [ Error { message: "missing input value at `$x.coordinates[0].longitude`: for required GraphQL type `Float!`", @@ -688,6 +708,7 @@ async fn input_object_variable_validation() { "VALIDATION_INVALID_TYPE_VARIABLE", ), }, + apollo_id: 00000000-0000-0000-0000-000000000000, }, ] "###); @@ -695,9 +716,9 @@ async fn input_object_variable_validation() { const PARSER_LIMITS_TEST_QUERY: &str = r#"{ me { reviews { author { reviews { author { name } } } } } }"#; + const PARSER_LIMITS_TEST_QUERY_TOKEN_COUNT: usize = 36; const PARSER_LIMITS_TEST_QUERY_RECURSION: usize = 6; - #[tokio::test(flavor = "multi_thread")] async fn query_just_under_recursion_limit() { let config = serde_json::json!({ @@ -1521,3 +1542,17 @@ fn it_will_not_start_with_loose_file_permissions() { "Apollo key file permissions (0o777) are too permissive\n" ) } + +fn normalize_errors(errors: Vec) -> Vec { + let normalized_actual_errors: Vec<_> = errors.into_iter().map(|e| { + Error::builder() + // Overwrite error ID to avoid comparing random Uuids + .apollo_id(Uuid::nil()) + .message(e.message) + .locations(e.locations) + .and_path(e.path) + .extensions(e.extensions) + .build() + }).collect(); + normalized_actual_errors +} From 41439abf63ada88694771673bbecae4a170b0554 Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 16 Jun 2025 11:18:15 -0400 Subject: [PATCH 45/46] fix fleet detector test. Revert to parts builder --- apollo-router/src/plugins/fleet_detector.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index 2867d30df3..b9c3d6400e 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -617,17 +617,14 @@ mod tests { .expect_call() .times(1) .returning(|req: router::Request| { - // making sure the request body is consumed - req.router_request.into_body(); - router::Response::error_builder() + router::Response::http_response_builder() .context(req.context) - .status_code(StatusCode::BAD_REQUEST) - .header("content-type", "application/json") - .error( - graphql::Error::builder() - .message("bad request") - .extension_code(StatusCode::BAD_REQUEST.to_string()) - .build(), + .response( http::Response::builder() + .status(StatusCode::BAD_REQUEST) + .header("content-type", "application/json") + // making sure the request body is consumed + .body(req.router_request.into_body()) + .unwrap() ) .build() }); @@ -649,7 +646,6 @@ mod tests { .unwrap(); // THEN operation size metrics should exist - // TODO check with fleet people to see if the value here actually matters assert_counter!("apollo.router.operations.request_size", 7, &[]); assert_counter!("apollo.router.operations.response_size", 7, &[]); } From b3c1c4e74f2484431d0a79933aa775ebaeeb91eb Mon Sep 17 00:00:00 2001 From: Ross Regitsky Date: Mon, 16 Jun 2025 11:19:11 -0400 Subject: [PATCH 46/46] lint --- apollo-router/src/plugins/fleet_detector.rs | 13 ++++----- apollo-router/src/services/layers/apq.rs | 4 +-- apollo-router/tests/integration_tests.rs | 30 ++++++++++----------- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index b9c3d6400e..7d36fa15e6 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -619,12 +619,13 @@ mod tests { .returning(|req: router::Request| { router::Response::http_response_builder() .context(req.context) - .response( http::Response::builder() - .status(StatusCode::BAD_REQUEST) - .header("content-type", "application/json") - // making sure the request body is consumed - .body(req.router_request.into_body()) - .unwrap() + .response( + http::Response::builder() + .status(StatusCode::BAD_REQUEST) + .header("content-type", "application/json") + // making sure the request body is consumed + .body(req.router_request.into_body()) + .unwrap(), ) .build() }); diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs index 87eae59d3a..11026060a3 100644 --- a/apollo-router/src/services/layers/apq.rs +++ b/apollo-router/src/services/layers/apq.rs @@ -12,7 +12,6 @@ use sha2::Digest; use sha2::Sha256; use crate::cache::DeduplicatingCache; -use crate::json_ext::Path; use crate::services::SupergraphRequest; use crate::services::SupergraphResponse; @@ -163,7 +162,8 @@ async fn apq_request( let errors = vec![ crate::error::Error::builder() .message("PersistedQueryNotFound".to_string()) - .locations(Default::default()).extension_code("PERSISTED_QUERY_NOT_FOUND") + .locations(Default::default()) + .extension_code("PERSISTED_QUERY_NOT_FOUND") .build(), ]; let res = SupergraphResponse::builder() diff --git a/apollo-router/tests/integration_tests.rs b/apollo-router/tests/integration_tests.rs index 2d0470bc32..f5c9136f82 100644 --- a/apollo-router/tests/integration_tests.rs +++ b/apollo-router/tests/integration_tests.rs @@ -11,6 +11,7 @@ use apollo_router::_private::create_test_service_factory_from_yaml; use apollo_router::Configuration; use apollo_router::Context; use apollo_router::graphql; +use apollo_router::graphql::Error; use apollo_router::plugin::Plugin; use apollo_router::plugin::PluginInit; use apollo_router::services::router; @@ -30,11 +31,9 @@ use parking_lot::Mutex; use serde_json_bytes::json; use tower::BoxError; use tower::ServiceExt; -use tower_http::follow_redirect::policy::PolicyExt; use uuid::Uuid; use walkdir::DirEntry; use walkdir::WalkDir; -use apollo_router::graphql::{Error, Response}; mod integration; @@ -261,7 +260,6 @@ async fn service_errors_should_be_propagated() { .apollo_id(actual.errors[0].apollo_id()) .build(); - assert_eq!(expected_error, actual.errors[0]); assert_eq!(registry.totals(), expected_service_hits); } @@ -575,7 +573,7 @@ async fn missing_variables() { assert_eq!(StatusCode::BAD_REQUEST, http_response.response.status()); - let mut response = serde_json::from_slice::( + let response = serde_json::from_slice::( http_response .next_response() .await @@ -1542,15 +1540,17 @@ fn it_will_not_start_with_loose_file_permissions() { } fn normalize_errors(errors: Vec) -> Vec { - let normalized_actual_errors: Vec<_> = errors.into_iter().map(|e| { - Error::builder() - // Overwrite error ID to avoid comparing random Uuids - .apollo_id(Uuid::nil()) - .message(e.message) - .locations(e.locations) - .and_path(e.path) - .extensions(e.extensions) - .build() - }).collect(); - normalized_actual_errors + errors + .into_iter() + .map(|e| { + Error::builder() + // Overwrite error ID to avoid comparing random Uuids + .apollo_id(Uuid::nil()) + .message(e.message) + .locations(e.locations) + .and_path(e.path) + .extensions(e.extensions) + .build() + }) + .collect() }