Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .changesets/feat_zelda_feature_cooperative_cancellation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
## Cooperative Cancellation for Query Planning

This release introduces cooperative cancellation support for query planning operations. This feature allows the router
to gracefully handle query planning timeouts and cancellations, improving resource utilization.
Metrics are emitted for cooperative cancellation:

- Records the "outcome" of query planning on the `apollo.router.query_planning.plan.duration` metric.
- Records the "outcome" of query planning on the `query_planning` span.

### Example

Configuring a timeout in Measure Mode:
```yaml
supergraph:
query_planning:
experimental_cooperative_cancellation:
enabled: true
mode: measure
timeout: 1s
```

By [@Velfi](https://github.com/Velfi) in https://github.com/apollographql/router/pull/7604
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ dependencies = [
"rustls-pemfile",
"ryu",
"schemars",
"scopeguard",
"semver",
"serde",
"serde_derive_default",
Expand Down
11 changes: 9 additions & 2 deletions apollo-federation/src/query_plan/query_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub struct QueryPlannerConfig {
// Side-note: implemented as an object instead of single boolean because we expect to add more
// to this soon enough. In particular, once defer-passthrough to subgraphs is implemented, the
// idea would be to add a new `passthrough_subgraphs` option that is the list of subgraphs to
// which we can pass-through some @defer (and it would be empty by default). Similarly, once we
// which we can pass through some @defer (and it would be empty by default). Similarly, once we
// support @stream, grouping the options here will make sense too.
pub incremental_delivery: QueryPlanIncrementalDeliveryConfig,

Expand Down Expand Up @@ -185,7 +185,14 @@ pub struct QueryPlanOptions<'a> {
/// progressive @override feature.
// PORT_NOTE: In JS implementation this was a Map
pub override_conditions: Vec<String>,

/// An optional function that will be called to check if the query plan should be cancelled.
///
/// Cooperative cancellation occurs when the original client has abandoned the query.
/// When this happens, the query plan should be cancelled to free up resources.
///
/// This function should return `ControlFlow::Break` if the query plan should be cancelled.
///
/// Defaults to `None`.
pub check_for_cooperative_cancellation: Option<&'a dyn Fn() -> ControlFlow<()>>,
/// Impose a limit on the number of non-local selections, which can be a
/// performance hazard. On by default.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ fn it_handles_progressive_override_on_root_fields() {
"#,
QueryPlanOptions {
override_conditions: vec!["test".to_string()],
check_for_cooperative_cancellation: None,
..Default::default()
},
@r###"
Expand Down Expand Up @@ -120,7 +119,6 @@ fn it_handles_progressive_override_on_entity_fields() {
"#,
QueryPlanOptions {
override_conditions: vec!["test".to_string()],
check_for_cooperative_cancellation: None,
..Default::default()
},
@r###"
Expand Down Expand Up @@ -281,7 +279,6 @@ fn it_handles_progressive_override_on_nested_entity_fields() {
"#,
QueryPlanOptions {
override_conditions: vec!["test".to_string()],
check_for_cooperative_cancellation: None,
..Default::default()
},
@r###"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ fn it_overrides_to_s2_when_label_is_provided() {
"#,
QueryPlanOptions {
override_conditions: vec!["test".to_string()],
check_for_cooperative_cancellation: None,
..Default::default()
},
@r###"
Expand Down Expand Up @@ -160,7 +159,6 @@ fn it_overrides_f1_to_s3_when_label_is_provided() {
"#,
QueryPlanOptions {
override_conditions: vec!["test".to_string()],
check_for_cooperative_cancellation: None,
..Default::default()
},
@r###"
Expand Down
1 change: 1 addition & 0 deletions apollo-router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ form_urlencoded = "1.2.1"
apollo-environment-detector = "0.1.0"
log = "0.4.22"
encoding_rs = "0.8.35"
scopeguard = "1.2.0"

[target.'cfg(macos)'.dependencies]
uname = "0.1.1"
Expand Down
74 changes: 74 additions & 0 deletions apollo-router/src/configuration/cooperative_cancellation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
use std::time::Duration;

use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;

use crate::configuration::mode::Mode;

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
#[serde(deny_unknown_fields, default)]
pub(crate) struct CooperativeCancellation {
/// When true, cooperative cancellation is enabled.
enabled: bool,
/// When enabled, this sets whether the router will cancel query planning or
/// merely emit a metric when it would have happened.
mode: Mode,
#[serde(deserialize_with = "humantime_serde::deserialize")]
#[serde(serialize_with = "humantime_serde::serialize")]
#[schemars(with = "Option<String>")]
/// Enable timeout for query planning.
timeout: Option<Duration>,
}

impl Default for CooperativeCancellation {
fn default() -> Self {
Self {
enabled: true,
mode: Mode::Measure,
timeout: None,
}
}
}

impl CooperativeCancellation {
/// Returns the timeout, if configured.
pub(crate) fn timeout(&self) -> Option<Duration> {
self.timeout
}

#[cfg(test)]
/// Create a new `CooperativeCancellation` config in enforcement mode.
pub(crate) fn enabled() -> Self {
Self {
enabled: true,
mode: Mode::Enforce,
timeout: None,
}
}

/// Returns true if cooperative cancellation is enabled.
pub(crate) fn is_enabled(&self) -> bool {
self.enabled
}

/// Returns true if this config is in measure mode.
pub(crate) fn is_measure_mode(&self) -> bool {
self.mode.is_measure_mode()
}

/// Returns true if this config is in enforce mode.
pub(crate) fn is_enforce_mode(&self) -> bool {
self.mode.is_enforce_mode()
}

#[cfg(test)]
/// Create a new `CooperativeCancellation` config in enforcement mode with a timeout.
pub(crate) fn enabled_with_timeout(timeout: Duration) -> Self {
Self {
enabled: true,
mode: Mode::Enforce,
timeout: Some(timeout),
}
}
}
36 changes: 34 additions & 2 deletions apollo-router/src/configuration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ use regex::Regex;
use rustls::ServerConfig;
use rustls::pki_types::CertificateDer;
use rustls::pki_types::PrivateKeyDer;
use schema::Mode;
use schemars::JsonSchema;
use schemars::r#gen::SchemaGenerator;
use schemars::schema::ObjectValidation;
Expand All @@ -50,6 +49,7 @@ use self::server::Server;
use self::subgraph::SubgraphConfiguration;
use crate::ApolloRouterError;
use crate::cache::DEFAULT_CACHE_CAPACITY;
use crate::configuration::cooperative_cancellation::CooperativeCancellation;
use crate::graphql;
use crate::notification::Notify;
use crate::plugin::plugins;
Expand All @@ -63,10 +63,12 @@ use crate::plugins::subscription::SubscriptionConfig;
use crate::uplink::UplinkConfig;

pub(crate) mod connector;
pub(crate) mod cooperative_cancellation;
pub(crate) mod cors;
pub(crate) mod expansion;
mod experimental;
pub(crate) mod metrics;
pub(crate) mod mode;
mod persisted_queries;
pub(crate) mod schema;
pub(crate) mod server;
Expand Down Expand Up @@ -573,7 +575,8 @@ impl FromStr for Configuration {
type Err = ConfigurationError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
schema::validate_yaml_configuration(s, Expansion::default()?, Mode::Upgrade)?.validate()
schema::validate_yaml_configuration(s, Expansion::default()?, schema::Mode::Upgrade)?
.validate()
}
}

Expand Down Expand Up @@ -900,6 +903,35 @@ pub(crate) struct QueryPlanning {
/// If cache warm up is configured, this will allow the router to keep a query plan created with
/// the old schema, if it determines that the schema update does not affect the corresponding query
pub(crate) experimental_reuse_query_plans: bool,

/// Configures cooperative cancellation of query planning
///
/// See [`CooperativeCancellation`] for more details.
pub(crate) experimental_cooperative_cancellation: CooperativeCancellation,
}

#[buildstructor::buildstructor]
impl QueryPlanning {
#[builder]
#[allow(dead_code)]
pub(crate) fn new(
cache: Option<QueryPlanCache>,
warmed_up_queries: Option<usize>,
experimental_plans_limit: Option<u32>,
experimental_paths_limit: Option<u32>,
experimental_reuse_query_plans: Option<bool>,
experimental_cooperative_cancellation: Option<CooperativeCancellation>,
) -> Self {
Self {
cache: cache.unwrap_or_default(),
warmed_up_queries,
experimental_plans_limit,
experimental_paths_limit,
experimental_reuse_query_plans: experimental_reuse_query_plans.unwrap_or_default(),
experimental_cooperative_cancellation: experimental_cooperative_cancellation
.unwrap_or_default(),
}
}
}

/// Cache configuration
Expand Down
24 changes: 24 additions & 0 deletions apollo-router/src/configuration/mode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;

// Don't add a default here. Instead, Default should be implemented for
// individual cases of Mode<T>.
#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub(crate) enum Mode {
Measure,
Enforce,
}

impl Mode {
/// Returns true if this config is in measure mode.
pub(crate) fn is_measure_mode(&self) -> bool {
matches!(self, Mode::Measure)
}

/// Returns true if this config is in enforce mode.
pub(crate) fn is_enforce_mode(&self) -> bool {
matches!(self, Mode::Enforce)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2311,6 +2311,27 @@ expression: "&schema"
],
"description": "Configures the context"
},
"CooperativeCancellation": {
"additionalProperties": false,
"properties": {
"enabled": {
"default": true,
"description": "When true, cooperative cancellation is enabled.",
"type": "boolean"
},
"mode": {
"$ref": "#/definitions/Mode",
"description": "#/definitions/Mode"
},
"timeout": {
"default": null,
"description": "Enable timeout for query planning.",
"nullable": true,
"type": "string"
}
},
"type": "object"
},
"Cors": {
"additionalProperties": false,
"description": "Cross origin request configuration.",
Expand Down Expand Up @@ -2658,8 +2679,8 @@ expression: "&schema"
"type": "boolean"
},
"mode": {
"$ref": "#/definitions/Mode",
"description": "#/definitions/Mode"
"$ref": "#/definitions/Mode2",
"description": "#/definitions/Mode2"
},
"strategy": {
"$ref": "#/definitions/StrategyConfig",
Expand Down Expand Up @@ -4574,6 +4595,13 @@ expression: "&schema"
],
"type": "string"
},
"Mode2": {
"enum": [
"measure",
"enforce"
],
"type": "string"
},
"MultipartRequest": {
"additionalProperties": false,
"description": "Configuration for a multipart request for file uploads.\n\nThis protocol conforms to [jaydenseric's multipart spec](https://github.com/jaydenseric/graphql-multipart-request-spec)",
Expand Down Expand Up @@ -5076,6 +5104,10 @@ expression: "&schema"
"$ref": "#/definitions/QueryPlanCache",
"description": "#/definitions/QueryPlanCache"
},
"experimental_cooperative_cancellation": {
"$ref": "#/definitions/CooperativeCancellation",
"description": "#/definitions/CooperativeCancellation"
},
"experimental_paths_limit": {
"default": null,
"description": "Before creating query plans, for each path of fields in the query we compute all the possible options to traverse that path via the subgraphs. Multiple options can arise because fields in the path can be provided by multiple subgraphs, and abstract types (i.e. unions and interfaces) returned by fields sometimes require the query planner to traverse through each constituent object type. The number of options generated in this computation can grow large if the schema or query are sufficiently complex, and that will increase the time spent planning.\n\nThis config allows specifying a per-path limit to the number of options considered. If any path's options exceeds this limit, query planning will abort and the operation will fail.\n\nThe default value is None, which specifies no limit.",
Expand Down
8 changes: 8 additions & 0 deletions apollo-router/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,9 @@ pub(crate) enum QueryPlannerError {

/// Federation error: {0}
FederationError(FederationErrorBridge),

/// Query planning timed out: {0}
Timeout(String),
}

impl From<FederationErrorBridge> for QueryPlannerError {
Expand All @@ -314,6 +317,8 @@ pub(crate) enum FederationErrorBridge {
OperationNameNotProvided(String),
/// {0}
Other(String),
/// {0}
Cancellation(String),
}

impl From<FederationError> for FederationErrorBridge {
Expand All @@ -325,6 +330,9 @@ impl From<FederationError> for FederationErrorBridge {
err @ FederationError::SingleFederationError(
apollo_federation::error::SingleFederationError::OperationNameNotProvided,
) => Self::OperationNameNotProvided(err.to_string()),
err @ FederationError::SingleFederationError(
apollo_federation::error::SingleFederationError::PlanningCancelled,
) => Self::Cancellation(err.to_string()),
err => Self::Other(err.to_string()),
}
}
Expand Down
Loading