diff --git a/.changesets/fix_aaron_nullable_@keys.md b/.changesets/fix_aaron_nullable_@keys.md new file mode 100644 index 0000000000..1e7ee4f7fc --- /dev/null +++ b/.changesets/fix_aaron_nullable_@keys.md @@ -0,0 +1,5 @@ +### Allow nullable `@key`s for response caching + +`@key`s can be nullable, but there was a bug in the response caching feature that blocked nullable `@key`s from being used. This fixes that behavior. Be careful when caching nullable data because it can be null! Docs added to call that out, but be very sure of what you're caching and write cache keys to be as simple and non-nullable as possible. + +By [@aaronArinder](https://github.com/aaronArinder) in https://github.com/apollographql/router/pull/8767 diff --git a/apollo-federation/src/lib.rs b/apollo-federation/src/lib.rs index 253c94a350..f6ea04c4d8 100644 --- a/apollo-federation/src/lib.rs +++ b/apollo-federation/src/lib.rs @@ -466,8 +466,7 @@ mod test_supergraph { .map(|err| err.to_string()) .collect::>(), vec![ - "Each entity field referenced in a @cacheTag format (applied on entity type) must be a member of every @key field set. In other words, when there are multiple @key fields on the type, the referenced field(s) must be limited to their intersection. Bad cacheTag format \"product-{$key.upc}\" on type \"Product\"", - "@cacheTag format references a nullable argument \"first\"" + "Each entity field referenced in a @cacheTag format (applied on entity type) must be a member of every @key field set. In other words, when there are multiple @key fields on the type, the referenced field(s) must be limited to their intersection. Bad cacheTag format \"product-{$key.upc}\" on type \"Product\"" ] ); diff --git a/apollo-federation/src/schema/validators/cache_tag.rs b/apollo-federation/src/schema/validators/cache_tag.rs index cc0c64e7d4..2986069340 100644 --- a/apollo-federation/src/schema/validators/cache_tag.rs +++ b/apollo-federation/src/schema/validators/cache_tag.rs @@ -153,7 +153,7 @@ fn validate_args_on_field( .iter() .map(|arg| (arg.name.clone(), arg.ty.as_ref())) .collect::>(); - validate_args_selection(schema, None, &fields, &var_ref.selection).err() + validate_args_selection(schema, &fields, &var_ref.selection).err() } None => None, }, @@ -166,7 +166,6 @@ fn validate_args_on_field( /// parent_type_name: The name of the parent composite type; None if selection is a field argument. fn validate_args_selection( schema: &FederationSchema, - parent_type_name: Option<&Name>, fields: &IndexMap, selection: &SelectionTrie, ) -> Result<(), CacheTagValidationError> { @@ -190,18 +189,7 @@ fn validate_args_selection( .ok_or_else(|| CacheTagValidationError::CacheTagInvalidFormat { message: format!("unknown field \"{name}\""), })?; - if !is_fully_non_null(field) { - if let Some(parent_type_name) = parent_type_name { - return Err(CacheTagValidationError::CacheTagFormatNullableField { - field_name: name.clone(), - parent_type: parent_type_name.to_string(), - }); - } else { - return Err(CacheTagValidationError::CacheTagFormatNullableArgument { - arg_name: name.clone(), - }); - } - } + let type_name = field.inner_named_type(); let type_def = schema.get_type(type_name.clone())?; if !sel.is_leaf() { @@ -225,7 +213,7 @@ fn validate_args_selection( )) }) .collect::, _>>()?; - validate_args_selection(schema, Some(type_name), &next_fields, sel)?; + validate_args_selection(schema, &next_fields, sel)?; } else { // A leaf field must not be a list. if field.is_list() { @@ -249,17 +237,6 @@ fn validate_args_selection( Ok(()) } -/// Similar to `Type::is_non_null`, but checks if the type is non-null at all nested levels of -/// lists. -fn is_fully_non_null(ty: &Type) -> bool { - match ty { - Type::Named(_) => false, - Type::List(_) => false, - Type::NonNullNamed(_) => true, - Type::NonNullList(inner) => is_fully_non_null(inner), - } -} - fn validate_args_on_object_type( schema: &FederationSchema, errors: &mut Vec, @@ -397,13 +374,6 @@ fn build_selection_set( message: format!("invalid field selection name \"{key}\""), })?; - if !is_fully_non_null(new_field.ty()) { - return Err(CacheTagValidationError::CacheTagFormatNullableField { - field_name: name.clone(), - parent_type: selection_set.ty.to_string(), - }); - } - if !sel.is_leaf() { ObjectOrInterfaceTypeDefinitionPosition::try_from(new_field_type_def).map_err( |_| CacheTagValidationError::CacheTagInvalidFormat { @@ -500,13 +470,6 @@ enum CacheTagValidationError { "Each entity field referenced in a @cacheTag format (applied on entity type) must be a member of every @key field set. In other words, when there are multiple @key fields on the type, the referenced field(s) must be limited to their intersection. Bad cacheTag format \"{format}\" on type \"{type_name}\"" )] CacheTagInvalidFormatFieldSetOnEntity { type_name: Name, format: String }, - #[error("@cacheTag format references a nullable field \"{parent_type}.{field_name}\"")] - CacheTagFormatNullableField { - field_name: Name, - parent_type: String, - }, - #[error("@cacheTag format references a nullable argument \"{arg_name}\"")] - CacheTagFormatNullableArgument { arg_name: Name }, } impl CacheTagValidationError { @@ -647,7 +610,7 @@ mod tests { } #[test] - fn test_invalid_format_string_nullable_args() { + fn test_valid_format_string_nullable_args() { const SCHEMA: &str = r#" type Product @key(fields: "upc name") @cacheTag(format: "product-{$key.upc}-{$key.name}") @@ -660,18 +623,9 @@ mod tests { topProducts(first: Int): [Product] @cacheTag(format: "topProducts") @cacheTag(format: "topProducts-{$args.first}") - productsByCountry(country: [String]!): [Product] - @cacheTag(format: "productsByCountry-{$args.country}") } "#; - assert_eq!( - build_for_errors(SCHEMA), - vec![ - "@cacheTag format references a nullable field \"Product.name\"", - "@cacheTag format references a nullable argument \"first\"", - "@cacheTag format references a nullable argument \"country\"", - ] - ); + build_and_validate(SCHEMA); } #[test] @@ -760,7 +714,6 @@ mod tests { "cacheTag format is invalid: cannot create selection set with \"somethingElse\"", "cacheTag format is invalid: invalid path ending at \"test\", which is not a scalar type or an enum", "Each entity field referenced in a @cacheTag format (applied on entity type) must be a member of every @key field set. In other words, when there are multiple @key fields on the type, the referenced field(s) must be limited to their intersection. Bad cacheTag format \"product-{$key.test.b}\" on type \"Product\"", - "@cacheTag format references a nullable field \"Test.c\"", "cacheTag format is invalid: unknown field \"second\"" ] ); diff --git a/apollo-router/src/plugins/response_cache/plugin.rs b/apollo-router/src/plugins/response_cache/plugin.rs index 144314583d..aa73cb80a5 100644 --- a/apollo-router/src/plugins/response_cache/plugin.rs +++ b/apollo-router/src/plugins/response_cache/plugin.rs @@ -2199,6 +2199,15 @@ pub(in crate::plugins) fn matches_selection_set( matches_array_of_objects(arr, &field.selection_set) } + // We allow nullable fields in selection sets (ie, those fields that identify an entity, usually + // [if not always] listed in `@key`). That _doesn't_ mean that entities definitively + // must allow nullable fields, only that we happen to allow it right now; it's probably + // a bit of a schema-development smell to have an entity identifiable by nullable + // fields, but it makes practical sense if you're wanting to cache partial responses + Value::Null => { + return true; + } + // scalar values _other => { // Mismatch: object or array value was expected. @@ -3390,6 +3399,52 @@ mod tests { ); } + #[test] + fn test_matches_selection_set_handles_null() { + // Note the nullable type, Nullable; this represents when you have some entity you want to + // identify via nullable fields, which is most useful when you're wanting to cache partial + // responses (what does it mean to partially identify a thing? Everything is all and only + // itself, no more or less--be careful in reading this test as saying something important + // about how entities _should_ be identified with respect to nullable fields) + let schema_text = r#" + type Query { + test: Test + } + type Test { + id: ID! + nullable: Nullable + } + type Nullable { + id: ID! + } + "#; + + let schema = Schema::parse_and_validate(schema_text, "test.graphql").unwrap(); + let mut parser = Parser::new(); + let field_set = parser + .parse_field_set( + &schema, + apollo_compiler::ast::NamedType::new("Test").unwrap(), + "id nullable { id }", + "test.graphql", + ) + .unwrap(); + + // Note second location: it's `null` + let representation = json!({ + "id": "TEST123", + "nullable": null, + }) + .as_object() + .unwrap() + .clone(); + + assert!( + matches_selection_set(&representation, &field_set.selection_set), + "complex nested arrays should match" + ); + } + #[test] fn test_take_selection_set_handles_arrays() { // Simulate the real-world Availability type scenario diff --git a/apollo-router/tests/integration/fixtures/entity_key_with_null_fields.graphql b/apollo-router/tests/integration/fixtures/entity_key_with_null_fields.graphql new file mode 100644 index 0000000000..7bc431aa30 --- /dev/null +++ b/apollo-router/tests/integration/fixtures/entity_key_with_null_fields.graphql @@ -0,0 +1,89 @@ +schema + @link(url: "https://specs.apollo.dev/link/v1.0") + @link(url: "https://specs.apollo.dev/cacheTag/v0.1", for: EXECUTION) + @link(url: "https://specs.apollo.dev/join/v0.3", for: EXECUTION) { + query: Query +} + +directive @join__enumValue(graph: join__Graph!) repeatable on ENUM_VALUE + +directive @join__field( + graph: join__Graph + requires: join__FieldSet + provides: join__FieldSet + type: String + external: Boolean + override: String + usedOverridden: Boolean +) repeatable on FIELD_DEFINITION | INPUT_FIELD_DEFINITION + +directive @join__graph(name: String!, url: String!) on ENUM_VALUE + +directive @join__implements( + graph: join__Graph! + interface: String! +) repeatable on OBJECT | INTERFACE + +directive @join__directive( + graphs: [join__Graph!] + name: String! + args: join__DirectiveArguments +) repeatable on SCHEMA | OBJECT | INTERFACE | FIELD_DEFINITION + +directive @join__type( + graph: join__Graph! + key: join__FieldSet + extension: Boolean! = false + resolvable: Boolean! = true + isInterfaceObject: Boolean! = false +) repeatable on OBJECT | INTERFACE | UNION | ENUM | INPUT_OBJECT | SCALAR + +directive @join__unionMember( + graph: join__Graph! + member: String! +) repeatable on UNION + +directive @link( + url: String + as: String + for: link__Purpose + import: [link__Import] +) repeatable on SCHEMA + +scalar join__FieldSet +scalar join__DirectiveArguments +scalar link__Import + +enum join__Graph { + STUFF @join__graph(name: "stuff", url: "http://localhost:4001") + STATUS @join__graph(name: "status", url: "http://localhost:4002") +} + +enum link__Purpose { + SECURITY + EXECUTION +} + +type Query @join__type(graph: STUFF) @join__type(graph: STATUS) { + getStatus(id: String!): Status @join__field(graph: STUFF) +} + +type Status + @join__type(graph: STUFF, key: "id items { id name }") + @join__type(graph: STATUS, key: "id items { id name }") + @join__directive( + graphs: [STATUS] + name: "federation__cacheTag" + args: { format: "status-{$key.id}-{$key.name}" } + ) { + id: String! + items: [Item]! + stuffDetails: String @join__field(graph: STUFF) + statusDetails: String @join__field(graph: STATUS) +} + +type Item @join__type(graph: STUFF) @join__type(graph: STATUS) { + id: String! + # NOTE: intentionally nullable + name: String +} diff --git a/apollo-router/tests/integration/response_cache.rs b/apollo-router/tests/integration/response_cache.rs index a7b6574967..623aa1140c 100644 --- a/apollo-router/tests/integration/response_cache.rs +++ b/apollo-router/tests/integration/response_cache.rs @@ -867,6 +867,96 @@ async fn complex_entity_key_response_cache() { }); } +#[tokio::test] +async fn test_cache_keys_nullable_data() { + // GIVEN: + // * that graphOS is enabled + // * that we have a supergraph with: + // * join__type with a complex key (ie, using an array) + // * a complex key with a nullable field + // * a router running the above + if !graph_os_enabled() { + return; + } + + // NOTE: nulls here to represent nullable data to cache + let subgraphs = json!({ + "stuff": { + "query": { + "getStatus": { + "id": "1", + "items": [{"id": "i1", "name": null}], + "stuffDetails": "stuff we have" + } + } + }, + "status": { + "entities": [{ + "__typename": "Status", + "id": "1", + "items": [{"id": "i1", "name": null}], + "statusDetails": "status details" + }] + } + }); + + let mut config = base_config(); + { + let config_mut = config.as_object_mut().unwrap(); + config_mut.insert("experimental_mock_subgraphs".into(), subgraphs); + config_mut + .get_mut("response_cache") + .unwrap() + .as_object_mut() + .unwrap() + .insert("debug".into(), true.into()); + } + + let router = apollo_router::TestHarness::builder() + .schema(include_str!( + "./fixtures/entity_key_with_null_fields.graphql" + )) + .configuration_json(config) + .unwrap() + .build_http_service() + .await + .unwrap(); + + let mut router = router; + + // WHEN: + // * a query for the Status type but with data from both the stuff and + // status subgraphs + + let query = r#"{ + getStatus(id: "1") { + id + items { id name } + stuffDetails + statusDetails + } + }"#; + + let mut http_req: http::Request = graphql_request(query).into(); + http_req.headers_mut().insert( + HeaderName::from_static("apollo-cache-debugging"), + HeaderValue::from_static("true"), + ); + let (_, body) = make_http_request::(&mut router, http_req).await; + + // THEN: + // * no errors emitted! The key parses correctly + // * we get data from both subgraphs + assert!(body.errors.is_empty()); + + // NOTE: nulls here are what we're testing + let expectation: serde_json_bytes::Value = json!({"getStatus":{"id":"1","items":[{"id":"i1","name": null}],"stuffDetails":"stuff we have","statusDetails":"status details"}}).into(); + assert_eq!(body.data, Some(expectation)); + insta::assert_json_snapshot!(body.extensions, { + ".apolloCacheDebugging.data[].cacheControl.created" => 0 + }); +} + #[tokio::test] async fn cache_control_merging_multi_fetch() { if !graph_os_enabled() { diff --git a/apollo-router/tests/integration/snapshots/integration_tests__integration__response_cache__cache_keys_nullable_data.snap b/apollo-router/tests/integration/snapshots/integration_tests__integration__response_cache__cache_keys_nullable_data.snap new file mode 100644 index 0000000000..4b913a9d93 --- /dev/null +++ b/apollo-router/tests/integration/snapshots/integration_tests__integration__response_cache__cache_keys_nullable_data.snap @@ -0,0 +1,154 @@ +--- +source: apollo-router/tests/integration/response_cache.rs +expression: body.extensions +--- +{ + "apolloCacheDebugging": { + "version": "1.0", + "data": [ + { + "key": "version:1.1:subgraph:stuff:type:Query:hash:d5c0f6a8c8761283e964c030813e6867f9370c1574677b0de5277102f41f02bb:data:070af9367f9025bd796a1b7e0cd1335246f658aa4857c3a4d6284673b7d07fa6", + "invalidationKeys": [], + "kind": { + "rootFields": [ + "getStatus" + ] + }, + "subgraphName": "stuff", + "subgraphRequest": { + "query": "{ getStatus(id: \"1\") { __typename id items { id name } stuffDetails } }" + }, + "source": "subgraph", + "cacheControl": { + "created": 0, + "noStore": true + }, + "shouldStore": false, + "data": { + "data": { + "getStatus": { + "__typename": "Status", + "id": "1", + "items": [ + { + "id": "i1", + "name": null + } + ], + "stuffDetails": "stuff we have" + } + } + }, + "warnings": [ + { + "code": "CACHE_CONTROL_NO_STORE", + "links": [ + { + "url": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Cache-Control", + "title": "Cache-Control header documentation" + } + ], + "message": "The subgraph returned a Cache-Control header containing no-store, so the data was not cached" + }, + { + "code": "CACHE_CONTROL_WITHOUT_MAX_AGE", + "links": [ + { + "url": "https://www.apollographql.com/docs/graphos/routing/performance/caching/response-caching/invalidation#configure-default-ttl", + "title": "Configure default TTL in the Router" + }, + { + "url": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Cache-Control", + "title": "Cache-Control header documentation" + } + ], + "message": "The subgraph returned a 'Cache-Control' header without any max-age set, so the Router will use the default (configured in the Router configuration file)." + }, + { + "code": "NO_CACHE_TAG_ON_ROOT_FIELD", + "links": [ + { + "url": "https://www.apollographql.com/docs/graphos/routing/performance/caching/response-caching/invalidation#invalidation-methods", + "title": "Add '@cacheTag' in your schema" + } + ], + "message": "No cache tags are specified on your root fields query. If you want to use active invalidation, you'll need to add cache tags on your root field." + } + ] + }, + { + "key": "version:1.1:subgraph:status:type:Status:representation:db667609a8d53a2cd37059aed86e44088a10ec65213783b7aeac4ffe6cb761a4:hash:41749e92390f34c2c244668da28c61f4b680fd1997a98d547fe06e6b2a3c5af1:data:070af9367f9025bd796a1b7e0cd1335246f658aa4857c3a4d6284673b7d07fa6", + "invalidationKeys": [ + "status-1-" + ], + "kind": { + "typename": "Status", + "entityKey": { + "id": "1", + "items": [ + { + "id": "i1", + "name": null + } + ] + } + }, + "subgraphName": "status", + "subgraphRequest": { + "query": "query($representations: [_Any!]!) { _entities(representations: $representations) { ... on Status { statusDetails } } }", + "variables": { + "representations": [ + { + "items": [ + { + "id": "i1", + "name": null + } + ], + "id": "1", + "__typename": "Status" + } + ] + } + }, + "source": "subgraph", + "cacheControl": { + "created": 0, + "noStore": true + }, + "shouldStore": false, + "data": { + "data": { + "statusDetails": "status details" + } + }, + "warnings": [ + { + "code": "CACHE_CONTROL_NO_STORE", + "links": [ + { + "url": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Cache-Control", + "title": "Cache-Control header documentation" + } + ], + "message": "The subgraph returned a Cache-Control header containing no-store, so the data was not cached" + }, + { + "code": "CACHE_CONTROL_WITHOUT_MAX_AGE", + "links": [ + { + "url": "https://www.apollographql.com/docs/graphos/routing/performance/caching/response-caching/invalidation#configure-default-ttl", + "title": "Configure default TTL in the Router" + }, + { + "url": "https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Cache-Control", + "title": "Cache-Control header documentation" + } + ], + "message": "The subgraph returned a 'Cache-Control' header without any max-age set, so the Router will use the default (configured in the Router configuration file)." + } + ] + } + ] + } +} diff --git a/docs/source/routing/performance/caching/response-caching/faq.mdx b/docs/source/routing/performance/caching/response-caching/faq.mdx index 1e34a97448..6aae4728d9 100644 --- a/docs/source/routing/performance/caching/response-caching/faq.mdx +++ b/docs/source/routing/performance/caching/response-caching/faq.mdx @@ -31,6 +31,14 @@ Cached data remains valid across schema updates. When a new schema is deployed, No. Responses containing errors aren't cached—this prevents transient errors from being served repeatedly from the cache. +### Can I use compound `@key`s? + +Yes. Compound `@key`s, including lists, are supported. However, their complexity introduces overhead. Prioritize simplicity in your cache key design. + +### Can `@key`s represent nullable data in `@cacheTag`? + +Yes. However, `null` data is interpolated as an empty string. For example, consider an entity key with fields `id` and `name`, where `name` is nullable. If you specify `@cacheTag(format: "user-{$key.id}-{$key.name}")` and the entity key is `{"id": 1, "name": null}`, the generated cache tag is `user-1-`. + ### What if one of my subgraphs is unavailable? The router returns whatever cached data is available and populates the rest of the response tree according to your schema's nullability rules. Fields that can't be resolved are nulled out with corresponding error messages, while successfully cached portions of the response are still returned. diff --git a/docs/source/routing/performance/caching/response-caching/quickstart.mdx b/docs/source/routing/performance/caching/response-caching/quickstart.mdx index 3ac109ca2c..25cf283507 100644 --- a/docs/source/routing/performance/caching/response-caching/quickstart.mdx +++ b/docs/source/routing/performance/caching/response-caching/quickstart.mdx @@ -97,6 +97,10 @@ The left chart shows cache hits per subgraph and type. No cache hits appear beca ### Integrate with your schema Consider an example with two subgraphs: users and posts. + + This example uses a simple, non-nullable `@key`. Simple, non-nullable `@key`s improve reliability and performance, but the router also supports compound, non-nullable `@key`s. For details, see the [FAQ](/routing/performance/caching/response-caching/faq) + + ```graphql title="users.graphql" extend schema @link(