diff --git a/.changesets/fix_njm_pq_usage_by_body.md b/.changesets/fix_njm_pq_usage_by_body.md new file mode 100644 index 0000000000..c0e9db6e86 --- /dev/null +++ b/.changesets/fix_njm_pq_usage_by_body.md @@ -0,0 +1,5 @@ +### Report PQ usage when an operation is requested by safelisted operation body ([PR #8168](https://github.com/apollographql/router/pull/8168)) + +Previously we would only record PQ metrics for operations that were requested by the PQ ID. This change updates usage reporting so that we also report usage if the PQ operation is requested by the safelisted operation body. + +By [@bonnici](https://github.com/bonnici) in https://github.com/apollographql/router/pull/8168 \ No newline at end of file diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 2d7e310791..c4e565ec8f 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -150,7 +150,7 @@ use crate::services::SupergraphResponse; use crate::services::connector; use crate::services::execution; use crate::services::layers::apq::PERSISTED_QUERY_CACHE_HIT; -use crate::services::layers::persisted_queries::UsedQueryIdFromManifest; +use crate::services::layers::persisted_queries::RequestPersistedQueryId; use crate::services::router; use crate::services::subgraph; use crate::services::supergraph; @@ -1580,7 +1580,7 @@ impl Telemetry { let maybe_pq_id = context .extensions() - .with_lock(|lock| lock.get::().cloned()) + .with_lock(|lock| lock.get::().cloned()) .map(|u| u.pq_id); let usage_reporting = if let Some(pq_id) = maybe_pq_id { Arc::new(usage_reporting.with_pq_id(pq_id)) diff --git a/apollo-router/src/services/layers/persisted_queries/freeform_graphql_behavior.rs b/apollo-router/src/services/layers/persisted_queries/freeform_graphql_behavior.rs index 05c6e100f5..4c2d354cb8 100644 --- a/apollo-router/src/services/layers/persisted_queries/freeform_graphql_behavior.rs +++ b/apollo-router/src/services/layers/persisted_queries/freeform_graphql_behavior.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::HashMap; use apollo_compiler::ast; @@ -10,6 +10,7 @@ use crate::Configuration; pub(crate) struct FreeformGraphQLAction { pub(crate) should_allow: bool, pub(crate) should_log: bool, + pub(crate) pq_id: Option, } /// How the router should respond to requests that are not resolved as the IDs @@ -45,41 +46,49 @@ impl FreeformGraphQLBehavior { FreeformGraphQLBehavior::AllowAll { .. } => FreeformGraphQLAction { should_allow: true, should_log: false, + pq_id: None, }, // Note that this branch doesn't get called in practice, because we catch // DenyAll at an earlier phase with never_allows_freeform_graphql. FreeformGraphQLBehavior::DenyAll { log_unknown, .. } => FreeformGraphQLAction { should_allow: false, should_log: *log_unknown, + pq_id: None, }, FreeformGraphQLBehavior::AllowIfInSafelist { safelist, log_unknown, .. } => { - if safelist.is_allowed(ast) { + let pq_id = safelist.get_pq_id_for_body(ast); + if pq_id.is_some() { FreeformGraphQLAction { should_allow: true, should_log: false, + pq_id, } } else { FreeformGraphQLAction { should_allow: false, should_log: *log_unknown, + pq_id: None, } } } FreeformGraphQLBehavior::LogUnlessInSafelist { safelist, .. } => { + let pq_id = safelist.get_pq_id_for_body(ast); FreeformGraphQLAction { should_allow: true, - should_log: !safelist.is_allowed(ast), + should_log: pq_id.is_none(), + pq_id, } } } } } -/// The normalized bodies of all operations in the PQ manifest. +/// The normalized bodies of all operations in the PQ manifest. This is a map of +/// normalized body string to PQ operation ID (usually a hash of the operation body). /// /// Normalization currently consists of: /// - Sorting the top-level definitions (operation and fragment definitions) @@ -99,37 +108,39 @@ impl FreeformGraphQLBehavior { /// formatting. #[derive(Debug)] pub(crate) struct FreeformGraphQLSafelist { - normalized_bodies: HashSet, + normalized_bodies: HashMap, } impl FreeformGraphQLSafelist { pub(super) fn new(manifest: &PersistedQueryManifest) -> Self { let mut safelist = Self { - normalized_bodies: HashSet::new(), + normalized_bodies: HashMap::new(), }; - for body in manifest.values() { - safelist.insert_from_manifest(body); + for (key, body) in manifest.iter() { + safelist.insert_from_manifest(body, &key.operation_id); } safelist } - fn insert_from_manifest(&mut self, body_from_manifest: &str) { - self.normalized_bodies.insert( - self.normalize_body( - ast::Document::parse(body_from_manifest, "from_manifest") - .as_ref() - .map_err(|_| body_from_manifest), - ), + fn insert_from_manifest(&mut self, body_from_manifest: &str, operation_id: &str) { + let normalized_body = self.normalize_body( + ast::Document::parse(body_from_manifest, "from_manifest") + .as_ref() + .map_err(|_| body_from_manifest), ); + self.normalized_bodies + .insert(normalized_body, operation_id.to_string()); } - pub(super) fn is_allowed(&self, ast: Result<&ast::Document, &str>) -> bool { + pub(super) fn get_pq_id_for_body(&self, ast: Result<&ast::Document, &str>) -> Option { // Note: consider adding an LRU cache that caches this function's return // value based solely on body_from_request without needing to normalize // the body. - self.normalized_bodies.contains(&self.normalize_body(ast)) + self.normalized_bodies + .get(&self.normalize_body(ast)) + .cloned() } pub(super) fn normalize_body(&self, ast: Result<&ast::Document, &str>) -> String { @@ -229,7 +240,9 @@ mod tests { ])); let is_allowed = |body: &str| -> bool { - safelist.is_allowed(ast::Document::parse(body, "").as_ref().map_err(|_| body)) + safelist + .get_pq_id_for_body(ast::Document::parse(body, "").as_ref().map_err(|_| body)) + .is_some() }; // Precise string matches. diff --git a/apollo-router/src/services/layers/persisted_queries/mod.rs b/apollo-router/src/services/layers/persisted_queries/mod.rs index 3150ab7c1f..0eb75436cc 100644 --- a/apollo-router/src/services/layers/persisted_queries/mod.rs +++ b/apollo-router/src/services/layers/persisted_queries/mod.rs @@ -30,9 +30,13 @@ const PERSISTED_QUERIES_CLIENT_NAME_CONTEXT_KEY: &str = "apollo_persisted_querie const PERSISTED_QUERIES_SAFELIST_SKIP_ENFORCEMENT_CONTEXT_KEY: &str = "apollo_persisted_queries::safelist::skip_enforcement"; -/// Used to identify requests that were expanded from a persisted query ID +/// Marker type for request context to identify requests that were expanded from a persisted query +/// ID. +struct UsedQueryIdFromManifest; + +/// Stores the PQ ID for an operation expanded from a PQ ID or an operation that matches a PQ body in the manifest. #[derive(Clone)] -pub(crate) struct UsedQueryIdFromManifest { +pub(crate) struct RequestPersistedQueryId { pub(crate) pq_id: String, } @@ -178,13 +182,14 @@ impl PersistedQueryLayer { let body = request.supergraph_request.body_mut(); body.query = Some(persisted_query_body); body.extensions.remove("persistedQuery"); - // Record that we actually used our ID, so we can skip the - // safelist check later. - request.context.extensions().with_lock(|lock| { - lock.insert(UsedQueryIdFromManifest { + // Record that we actually used our ID, so we can skip the + // safelist check later. + lock.insert(UsedQueryIdFromManifest); + // Also store the actual PQ ID for usage reporting. + lock.insert(RequestPersistedQueryId { pq_id: persisted_query_id.into(), - }) + }); }); u64_counter!( "apollo.router.operations.persisted_queries", @@ -309,6 +314,15 @@ impl PersistedQueryLayer { true, )); } + + // Store PQ ID for reporting if it was used + if let Some(pq_id) = freeform_graphql_action.pq_id { + request + .context + .extensions() + .with_lock(|lock| lock.insert(RequestPersistedQueryId { pq_id })); + } + u64_counter!( "apollo.router.operations.persisted_queries", "Total requests with persisted queries enabled", diff --git a/apollo-router/tests/apollo_reports.rs b/apollo-router/tests/apollo_reports.rs index 999071ccde..54a3ac3ce5 100644 --- a/apollo-router/tests/apollo_reports.rs +++ b/apollo-router/tests/apollo_reports.rs @@ -806,3 +806,95 @@ async fn test_features_disabled() { .await; assert_report!(report); } + +#[tokio::test(flavor = "multi_thread")] +async fn test_persisted_query_by_id_stats() { + let request = supergraph::Request::fake_builder() + .extension( + "persistedQuery", + serde_json::json!({ + "version": 1, + "sha256Hash": "test_pq_id" + }), + ) + .build() + .unwrap(); + let req: router::Request = request.try_into().expect("could not convert request"); + let reports = Arc::new(Mutex::new(vec![])); + let report = get_metrics_report( + reports, + req, + false, + false, + Some(include_str!("fixtures/reports/pq_enabled.router.yaml")), + ) + .await; + + assert_report!(report); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_persisted_query_by_safelist_body_stats() { + let request = supergraph::Request::fake_builder() + .query("query{topProducts{name}}") + .build() + .unwrap(); + let req: router::Request = request.try_into().expect("could not convert request"); + let reports = Arc::new(Mutex::new(vec![])); + let report = get_metrics_report( + reports, + req, + false, + false, + Some(include_str!("fixtures/reports/pq_enabled.router.yaml")), + ) + .await; + + assert_report!(report); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_persisted_query_by_id_logging_only_stats() { + let request = supergraph::Request::fake_builder() + .extension( + "persistedQuery", + serde_json::json!({ + "version": 1, + "sha256Hash": "test_pq_id" + }), + ) + .build() + .unwrap(); + let req: router::Request = request.try_into().expect("could not convert request"); + let reports = Arc::new(Mutex::new(vec![])); + let report = get_metrics_report( + reports, + req, + false, + false, + Some(include_str!("fixtures/reports/pq_logging.router.yaml")), + ) + .await; + + assert_report!(report); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_persisted_query_by_safelist_body_logging_pq_only_stats() { + let request = supergraph::Request::fake_builder() + .query("query{topProducts{name}}") + .build() + .unwrap(); + let req: router::Request = request.try_into().expect("could not convert request"); + let reports = Arc::new(Mutex::new(vec![])); + let report = get_metrics_report( + reports, + req, + false, + false, + Some(include_str!("fixtures/reports/pq_logging.router.yaml")), + ) + .await; + + assert_report!(report); +} diff --git a/apollo-router/tests/fixtures/reports/pq_enabled.router.yaml b/apollo-router/tests/fixtures/reports/pq_enabled.router.yaml new file mode 100644 index 0000000000..34963dcdad --- /dev/null +++ b/apollo-router/tests/fixtures/reports/pq_enabled.router.yaml @@ -0,0 +1,23 @@ +persisted_queries: + enabled: true + safelist: + enabled: true + require_id: false + local_manifests: + - tests/fixtures/reports/pq_manifest.json + +apq: + enabled: false + +telemetry: + apollo: + client_name_header: apollographql-client-name + client_version_header: apollographql-client-version + endpoint: ENDPOINT + batch_processor: + scheduled_delay: 10ms + experimental_local_field_metrics: false + experimental_otlp_endpoint: "http://127.0.0.1" + otlp_tracing_sampler: always_off + experimental_otlp_tracing_protocol: http + field_level_instrumentation_sampler: always_on diff --git a/apollo-router/tests/fixtures/reports/pq_logging.router.yaml b/apollo-router/tests/fixtures/reports/pq_logging.router.yaml new file mode 100644 index 0000000000..37d74cc55f --- /dev/null +++ b/apollo-router/tests/fixtures/reports/pq_logging.router.yaml @@ -0,0 +1,18 @@ +persisted_queries: + enabled: true + log_unknown: true + local_manifests: + - tests/fixtures/reports/pq_manifest.json + +telemetry: + apollo: + client_name_header: apollographql-client-name + client_version_header: apollographql-client-version + endpoint: ENDPOINT + batch_processor: + scheduled_delay: 10ms + experimental_local_field_metrics: false + experimental_otlp_endpoint: "http://127.0.0.1" + otlp_tracing_sampler: always_off + experimental_otlp_tracing_protocol: http + field_level_instrumentation_sampler: always_on diff --git a/apollo-router/tests/fixtures/reports/pq_manifest.json b/apollo-router/tests/fixtures/reports/pq_manifest.json new file mode 100644 index 0000000000..871c7c4589 --- /dev/null +++ b/apollo-router/tests/fixtures/reports/pq_manifest.json @@ -0,0 +1,10 @@ +{ + "format": "apollo-persisted-query-manifest", + "version": 1, + "operations": [ + { + "id": "test_pq_id", + "body": "query{topProducts{name}}" + } + ] +} \ No newline at end of file diff --git a/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_id_logging_only_stats.snap b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_id_logging_only_stats.snap new file mode 100644 index 0000000000..fe2771f86d --- /dev/null +++ b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_id_logging_only_stats.snap @@ -0,0 +1,98 @@ +--- +source: apollo-router/tests/apollo_reports.rs +expression: report +--- +header: + graph_ref: test + hostname: "[hostname]" + agent_version: "[agent_version]" + service_version: "" + runtime_version: rust + uname: "[uname]" + executable_schema_id: "[executable_schema_id]" + agent_id: "[agent_id]" +traces_per_query: + "pq# 473719089c85d3c08b9eb559570ec69824bbec10": + trace: [] + stats_with_context: + - context: + client_name: "" + client_version: "" + operation_type: query + operation_subtype: "" + result: "" + client_library_name: "" + client_library_version: "" + query_latency_stats: + latency_count: "[latency_count]" + request_count: 1 + cache_hits: 0 + persisted_query_hits: 0 + persisted_query_misses: 0 + cache_latency_count: "[cache_latency_count]" + root_error_stats: + children: {} + errors_count: 0 + requests_with_errors_count: 0 + requests_with_errors_count: 0 + public_cache_ttl_count: "[public_cache_ttl_count]" + private_cache_ttl_count: "[private_cache_ttl_count]" + registered_operation_count: 0 + forbidden_operation_count: 0 + requests_without_field_instrumentation: 0 + per_type_stat: + Product: + per_field_stat: + name: + return_type: String + errors_count: 0 + observed_execution_count: 4 + estimated_execution_count: 4 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + Query: + per_field_stat: + topProducts: + return_type: "[Product]" + errors_count: 0 + observed_execution_count: 1 + estimated_execution_count: 1 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + extended_references: ~ + local_per_type_stat: {} + limits_stats: + strategy: "" + cost_estimated: + - 0 + max_cost_estimated: 0 + cost_actual: + - 0 + max_cost_actual: 0 + depth: 2 + height: 2 + alias_count: 0 + root_field_count: 1 + operation_count: 0 + referenced_fields_by_type: + Product: + field_names: + - name + is_interface: false + Query: + field_names: + - topProducts + is_interface: false + query_metadata: + name: "" + signature: "{topProducts{name}}" + pq_id: test_pq_id +end_time: "[end_time]" +operation_count: 0 +operation_count_by_type: + - type: query + subtype: "" + operation_count: 1 +traces_pre_aggregated: true +extended_references_enabled: true +router_features_enabled: [] diff --git a/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_id_stats.snap b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_id_stats.snap new file mode 100644 index 0000000000..fe2771f86d --- /dev/null +++ b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_id_stats.snap @@ -0,0 +1,98 @@ +--- +source: apollo-router/tests/apollo_reports.rs +expression: report +--- +header: + graph_ref: test + hostname: "[hostname]" + agent_version: "[agent_version]" + service_version: "" + runtime_version: rust + uname: "[uname]" + executable_schema_id: "[executable_schema_id]" + agent_id: "[agent_id]" +traces_per_query: + "pq# 473719089c85d3c08b9eb559570ec69824bbec10": + trace: [] + stats_with_context: + - context: + client_name: "" + client_version: "" + operation_type: query + operation_subtype: "" + result: "" + client_library_name: "" + client_library_version: "" + query_latency_stats: + latency_count: "[latency_count]" + request_count: 1 + cache_hits: 0 + persisted_query_hits: 0 + persisted_query_misses: 0 + cache_latency_count: "[cache_latency_count]" + root_error_stats: + children: {} + errors_count: 0 + requests_with_errors_count: 0 + requests_with_errors_count: 0 + public_cache_ttl_count: "[public_cache_ttl_count]" + private_cache_ttl_count: "[private_cache_ttl_count]" + registered_operation_count: 0 + forbidden_operation_count: 0 + requests_without_field_instrumentation: 0 + per_type_stat: + Product: + per_field_stat: + name: + return_type: String + errors_count: 0 + observed_execution_count: 4 + estimated_execution_count: 4 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + Query: + per_field_stat: + topProducts: + return_type: "[Product]" + errors_count: 0 + observed_execution_count: 1 + estimated_execution_count: 1 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + extended_references: ~ + local_per_type_stat: {} + limits_stats: + strategy: "" + cost_estimated: + - 0 + max_cost_estimated: 0 + cost_actual: + - 0 + max_cost_actual: 0 + depth: 2 + height: 2 + alias_count: 0 + root_field_count: 1 + operation_count: 0 + referenced_fields_by_type: + Product: + field_names: + - name + is_interface: false + Query: + field_names: + - topProducts + is_interface: false + query_metadata: + name: "" + signature: "{topProducts{name}}" + pq_id: test_pq_id +end_time: "[end_time]" +operation_count: 0 +operation_count_by_type: + - type: query + subtype: "" + operation_count: 1 +traces_pre_aggregated: true +extended_references_enabled: true +router_features_enabled: [] diff --git a/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_safelist_body_logging_pq_only_stats.snap b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_safelist_body_logging_pq_only_stats.snap new file mode 100644 index 0000000000..fe2771f86d --- /dev/null +++ b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_safelist_body_logging_pq_only_stats.snap @@ -0,0 +1,98 @@ +--- +source: apollo-router/tests/apollo_reports.rs +expression: report +--- +header: + graph_ref: test + hostname: "[hostname]" + agent_version: "[agent_version]" + service_version: "" + runtime_version: rust + uname: "[uname]" + executable_schema_id: "[executable_schema_id]" + agent_id: "[agent_id]" +traces_per_query: + "pq# 473719089c85d3c08b9eb559570ec69824bbec10": + trace: [] + stats_with_context: + - context: + client_name: "" + client_version: "" + operation_type: query + operation_subtype: "" + result: "" + client_library_name: "" + client_library_version: "" + query_latency_stats: + latency_count: "[latency_count]" + request_count: 1 + cache_hits: 0 + persisted_query_hits: 0 + persisted_query_misses: 0 + cache_latency_count: "[cache_latency_count]" + root_error_stats: + children: {} + errors_count: 0 + requests_with_errors_count: 0 + requests_with_errors_count: 0 + public_cache_ttl_count: "[public_cache_ttl_count]" + private_cache_ttl_count: "[private_cache_ttl_count]" + registered_operation_count: 0 + forbidden_operation_count: 0 + requests_without_field_instrumentation: 0 + per_type_stat: + Product: + per_field_stat: + name: + return_type: String + errors_count: 0 + observed_execution_count: 4 + estimated_execution_count: 4 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + Query: + per_field_stat: + topProducts: + return_type: "[Product]" + errors_count: 0 + observed_execution_count: 1 + estimated_execution_count: 1 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + extended_references: ~ + local_per_type_stat: {} + limits_stats: + strategy: "" + cost_estimated: + - 0 + max_cost_estimated: 0 + cost_actual: + - 0 + max_cost_actual: 0 + depth: 2 + height: 2 + alias_count: 0 + root_field_count: 1 + operation_count: 0 + referenced_fields_by_type: + Product: + field_names: + - name + is_interface: false + Query: + field_names: + - topProducts + is_interface: false + query_metadata: + name: "" + signature: "{topProducts{name}}" + pq_id: test_pq_id +end_time: "[end_time]" +operation_count: 0 +operation_count_by_type: + - type: query + subtype: "" + operation_count: 1 +traces_pre_aggregated: true +extended_references_enabled: true +router_features_enabled: [] diff --git a/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_safelist_body_stats.snap b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_safelist_body_stats.snap new file mode 100644 index 0000000000..fe2771f86d --- /dev/null +++ b/apollo-router/tests/snapshots/apollo_reports__persisted_query_by_safelist_body_stats.snap @@ -0,0 +1,98 @@ +--- +source: apollo-router/tests/apollo_reports.rs +expression: report +--- +header: + graph_ref: test + hostname: "[hostname]" + agent_version: "[agent_version]" + service_version: "" + runtime_version: rust + uname: "[uname]" + executable_schema_id: "[executable_schema_id]" + agent_id: "[agent_id]" +traces_per_query: + "pq# 473719089c85d3c08b9eb559570ec69824bbec10": + trace: [] + stats_with_context: + - context: + client_name: "" + client_version: "" + operation_type: query + operation_subtype: "" + result: "" + client_library_name: "" + client_library_version: "" + query_latency_stats: + latency_count: "[latency_count]" + request_count: 1 + cache_hits: 0 + persisted_query_hits: 0 + persisted_query_misses: 0 + cache_latency_count: "[cache_latency_count]" + root_error_stats: + children: {} + errors_count: 0 + requests_with_errors_count: 0 + requests_with_errors_count: 0 + public_cache_ttl_count: "[public_cache_ttl_count]" + private_cache_ttl_count: "[private_cache_ttl_count]" + registered_operation_count: 0 + forbidden_operation_count: 0 + requests_without_field_instrumentation: 0 + per_type_stat: + Product: + per_field_stat: + name: + return_type: String + errors_count: 0 + observed_execution_count: 4 + estimated_execution_count: 4 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + Query: + per_field_stat: + topProducts: + return_type: "[Product]" + errors_count: 0 + observed_execution_count: 1 + estimated_execution_count: 1 + requests_with_errors_count: 0 + latency_count: "[latency_count]" + extended_references: ~ + local_per_type_stat: {} + limits_stats: + strategy: "" + cost_estimated: + - 0 + max_cost_estimated: 0 + cost_actual: + - 0 + max_cost_actual: 0 + depth: 2 + height: 2 + alias_count: 0 + root_field_count: 1 + operation_count: 0 + referenced_fields_by_type: + Product: + field_names: + - name + is_interface: false + Query: + field_names: + - topProducts + is_interface: false + query_metadata: + name: "" + signature: "{topProducts{name}}" + pq_id: test_pq_id +end_time: "[end_time]" +operation_count: 0 +operation_count_by_type: + - type: query + subtype: "" + operation_count: 1 +traces_pre_aggregated: true +extended_references_enabled: true +router_features_enabled: []