diff --git a/.changesets/fix_caroline_revert_8765.md b/.changesets/fix_caroline_revert_8765.md new file mode 100644 index 0000000000..3abde15307 --- /dev/null +++ b/.changesets/fix_caroline_revert_8765.md @@ -0,0 +1,11 @@ +### Return 503 for rate limit traffic shaping ([PR #9013](https://github.com/apollographql/router/pull/9013)) + +Reverts [PR #8765](https://github.com/apollographql/router/pull/8765). + +When the router's rate limit or buffer capacity is exceeded, it now returns HTTP 503 (Service Unavailable) instead of HTTP 429 (Too Many Requests). + +HTTP 429 implies that a specific client has sent too many requests and should back off. HTTP 503 more accurately reflects the situation: the router is temporarily unable to handle the request due to overall service load, not because of the behavior of any individual client. + +This change affects both router-level and subgraph-level rate limiting. Documentation has been updated to reflect the new status code. + +By [@carodewig](https://github.com/carodewig) in https://github.com/apollographql/router/pull/9013 diff --git a/apollo-router/src/plugins/traffic_shaping/mod.rs b/apollo-router/src/plugins/traffic_shaping/mod.rs index 8ecef81504..03532e70d5 100644 --- a/apollo-router/src/plugins/traffic_shaping/mod.rs +++ b/apollo-router/src/plugins/traffic_shaping/mod.rs @@ -318,7 +318,7 @@ impl PluginPrivate for TrafficShaping { let response: Result = future.await; if matches!(response, Err(ref err) if err.is::()) { Ok(RouterResponse::error_builder() - .status_code(StatusCode::TOO_MANY_REQUESTS) + .status_code(StatusCode::SERVICE_UNAVAILABLE) .error(rate_limit_error()) .context(ctx) .build() @@ -382,7 +382,7 @@ impl PluginPrivate for TrafficShaping { Err(err) if err.is::() => { // TODO add metrics Ok(SubgraphResponse::error_builder() - .status_code(StatusCode::TOO_MANY_REQUESTS) + .status_code(StatusCode::SERVICE_UNAVAILABLE) .subgraph_name(subgraph_name) .error(rate_limit_error()) .context(ctx) @@ -1055,7 +1055,7 @@ mod test { .await .expect("it responded"); - assert_eq!(StatusCode::TOO_MANY_REQUESTS, response.response.status()); + assert_eq!(StatusCode::SERVICE_UNAVAILABLE, response.response.status()); tokio::time::sleep(Duration::from_millis(300)).await; @@ -1186,7 +1186,7 @@ mod test { .call(RouterRequest::fake_builder().build().unwrap()) .await .unwrap(); - assert_eq!(StatusCode::TOO_MANY_REQUESTS, response.response.status()); + assert_eq!(StatusCode::SERVICE_UNAVAILABLE, response.response.status()); let j: serde_json::Value = serde_json::from_slice( &router::body::into_bytes(response.response) .await @@ -1284,7 +1284,7 @@ mod test { let mut results = tasks.join_all().await.into_iter(); let response = results.next().unwrap().unwrap().response; - assert_eq!(StatusCode::TOO_MANY_REQUESTS, response.status()); + assert_eq!(StatusCode::SERVICE_UNAVAILABLE, response.status()); let response = results.next().unwrap().unwrap().response; assert_eq!(StatusCode::GATEWAY_TIMEOUT, response.status()); diff --git a/apollo-router/tests/integration/traffic_shaping.rs b/apollo-router/tests/integration/traffic_shaping.rs index 4060001405..816f4102db 100644 --- a/apollo-router/tests/integration/traffic_shaping.rs +++ b/apollo-router/tests/integration/traffic_shaping.rs @@ -261,12 +261,12 @@ async fn test_router_rate_limit() -> Result<(), BoxError> { assert_yaml_snapshot!(response); let (_, response) = router.execute_default_query().await; - assert_eq!(response.status(), 429); + assert_eq!(response.status(), 503); let response = response.text().await?; assert!(response.contains("REQUEST_RATE_LIMITED")); assert_yaml_snapshot!(response); - router.assert_metrics_contains(r#"http_server_request_duration_seconds_count{error_type="Too Many Requests",http_request_method="POST",http_response_status_code="429""#, None).await; + router.assert_metrics_contains(r#"http_server_request_duration_seconds_count{error_type="Service Unavailable",http_request_method="POST",http_response_status_code="503""#, None).await; router.graceful_shutdown().await; Ok(()) diff --git a/docs/source/routing/errors.mdx b/docs/source/routing/errors.mdx index a5ec7277fe..968069f495 100644 --- a/docs/source/routing/errors.mdx +++ b/docs/source/routing/errors.mdx @@ -42,11 +42,6 @@ A request's HTTP `Accept` header didn't contain any of the router's supported mi - - -Request traffic exceeded configured rate limits. See [client side traffic shaping](/router/configuration/traffic-shaping/#client-side-traffic-shaping). - - The request was canceled because the client closed the connection, possibly due to a client side timeout. @@ -58,6 +53,12 @@ The router encountered an unexpected issue. [Report](https://github.com/apollogr + + +Request traffic exceeded your configured rate limits. See [client side traffic shaping](/router/configuration/traffic-shaping/#client-side-traffic-shaping). + + + The request was not able to complete within a configured amount of time. See [client side traffic shaping timeouts](/router/configuration/traffic-shaping/#timeouts). diff --git a/docs/source/routing/performance/traffic-shaping.mdx b/docs/source/routing/performance/traffic-shaping.mdx index f7228ebc0b..09a51a40ea 100644 --- a/docs/source/routing/performance/traffic-shaping.mdx +++ b/docs/source/routing/performance/traffic-shaping.mdx @@ -55,6 +55,8 @@ traffic_shaping: This rate limiting applies to all requests, there is no filtering per IP or other criteria. +If the router-level rate limit is hit, Apollo Router returns a [HTTP 503 status code](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/503) to indicate that the service is unavailable. + ### Timeouts The router applies a default timeout of 30 seconds for all requests, including the following: