diff --git a/docs/root/configuration/http_filters/router_filter.rst b/docs/root/configuration/http_filters/router_filter.rst index 0d416ac865026..c235c9faecae9 100644 --- a/docs/root/configuration/http_filters/router_filter.rst +++ b/docs/root/configuration/http_filters/router_filter.rst @@ -79,6 +79,9 @@ gateway-error This policy is similar to the *5xx* policy but will only retry requests that result in a 502, 503, or 504. +reset + Envoy will attempt a retry if the upstream server does not respond at all (disconnect/reset/read timeout.) + connect-failure Envoy will attempt a retry if a request is failed because of a connection failure to the upstream server (connect timeout, etc.). (Included in *5xx*) diff --git a/docs/root/faq/transient_failures.rst b/docs/root/faq/transient_failures.rst index aadfa09865186..2e31976b5eb4a 100644 --- a/docs/root/faq/transient_failures.rst +++ b/docs/root/faq/transient_failures.rst @@ -108,7 +108,7 @@ of times the host has been ejected). .. code-block:: json { - "retry_on": "cancelled,connect-failure,gateway-error,refused-stream,resource-exhausted,unavailable", + "retry_on": "cancelled,connect-failure,gateway-error,refused-stream,reset,resource-exhausted,unavailable", "num_retries": 1, "retry_host_predicate": [ { diff --git a/include/envoy/router/router.h b/include/envoy/router/router.h index c705a6499300b..a14864b4b2871 100644 --- a/include/envoy/router/router.h +++ b/include/envoy/router/router.h @@ -163,6 +163,7 @@ class RetryPolicy { static const uint32_t RETRY_ON_GRPC_UNAVAILABLE = 0x100; static const uint32_t RETRY_ON_GRPC_INTERNAL = 0x200; static const uint32_t RETRY_ON_RETRIABLE_STATUS_CODES = 0x400; + static const uint32_t RETRY_ON_RESET = 0x800; // clang-format on virtual ~RetryPolicy() = default; diff --git a/source/common/http/headers.h b/source/common/http/headers.h index 2b9247747e562..281216f29611a 100644 --- a/source/common/http/headers.h +++ b/source/common/http/headers.h @@ -161,6 +161,7 @@ class HeaderValues { const std::string RefusedStream{"refused-stream"}; const std::string Retriable4xx{"retriable-4xx"}; const std::string RetriableStatusCodes{"retriable-status-codes"}; + const std::string Reset{"reset"}; } EnvoyRetryOnValues; struct { diff --git a/source/common/router/retry_state_impl.cc b/source/common/router/retry_state_impl.cc index c09f485a1c7a2..e7674bd9ba93d 100644 --- a/source/common/router/retry_state_impl.cc +++ b/source/common/router/retry_state_impl.cc @@ -22,6 +22,7 @@ const uint32_t RetryPolicy::RETRY_ON_GATEWAY_ERROR; const uint32_t RetryPolicy::RETRY_ON_CONNECT_FAILURE; const uint32_t RetryPolicy::RETRY_ON_RETRIABLE_4XX; const uint32_t RetryPolicy::RETRY_ON_RETRIABLE_STATUS_CODES; +const uint32_t RetryPolicy::RETRY_ON_RESET; const uint32_t RetryPolicy::RETRY_ON_GRPC_CANCELLED; const uint32_t RetryPolicy::RETRY_ON_GRPC_DEADLINE_EXCEEDED; const uint32_t RetryPolicy::RETRY_ON_GRPC_RESOURCE_EXHAUSTED; @@ -130,6 +131,8 @@ std::pair RetryStateImpl::parseRetryOn(absl::string_view config) ret |= RetryPolicy::RETRY_ON_REFUSED_STREAM; } else if (retry_on == Http::Headers::get().EnvoyRetryOnValues.RetriableStatusCodes) { ret |= RetryPolicy::RETRY_ON_RETRIABLE_STATUS_CODES; + } else if (retry_on == Http::Headers::get().EnvoyRetryOnValues.Reset) { + ret |= RetryPolicy::RETRY_ON_RESET; } else { all_fields_valid = false; } @@ -293,10 +296,13 @@ bool RetryStateImpl::wouldRetryFromReset(const Http::StreamResetReason reset_rea return false; } + if (retry_on_ & RetryPolicy::RETRY_ON_RESET) { + return true; + } + if (retry_on_ & (RetryPolicy::RETRY_ON_5XX | RetryPolicy::RETRY_ON_GATEWAY_ERROR)) { // Currently we count an upstream reset as a "5xx" (since it will result in - // one). We may eventually split this out into its own type. I.e., - // RETRY_ON_RESET. + // one). With RETRY_ON_RESET we may eventually remove these policies. return true; } diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index 20dba970a5c78..755ef68d98d99 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -2319,7 +2319,7 @@ TEST_F(RouteMatcherTest, Retry) { retry_policy: per_try_timeout: 1s num_retries: 3 - retry_on: 5xx,gateway-error,connect-failure + retry_on: 5xx,gateway-error,connect-failure,reset )EOF"; TestConfigImpl config(parseRouteConfigurationFromV2Yaml(yaml), factory_context_, true); @@ -2363,7 +2363,7 @@ TEST_F(RouteMatcherTest, Retry) { ->retryPolicy() .numRetries()); EXPECT_EQ(RetryPolicy::RETRY_ON_CONNECT_FAILURE | RetryPolicy::RETRY_ON_5XX | - RetryPolicy::RETRY_ON_GATEWAY_ERROR, + RetryPolicy::RETRY_ON_GATEWAY_ERROR | RetryPolicy::RETRY_ON_RESET, config.route(genHeaders("www.lyft.com", "/", "GET"), 0) ->routeEntry() ->retryPolicy() @@ -2376,7 +2376,7 @@ name: RetryVirtualHostLevel virtual_hosts: - domains: [www.lyft.com] name: www - retry_policy: {num_retries: 3, per_try_timeout: 1s, retry_on: '5xx,gateway-error,connect-failure'} + retry_policy: {num_retries: 3, per_try_timeout: 1s, retry_on: '5xx,gateway-error,connect-failure,reset'} routes: - match: {prefix: /foo} route: @@ -2417,7 +2417,7 @@ name: RetryVirtualHostLevel ->retryPolicy() .numRetries()); EXPECT_EQ(RetryPolicy::RETRY_ON_CONNECT_FAILURE | RetryPolicy::RETRY_ON_5XX | - RetryPolicy::RETRY_ON_GATEWAY_ERROR, + RetryPolicy::RETRY_ON_GATEWAY_ERROR | RetryPolicy::RETRY_ON_RESET, config.route(genHeaders("www.lyft.com", "/bar", "GET"), 0) ->routeEntry() ->retryPolicy() @@ -2432,7 +2432,7 @@ name: RetryVirtualHostLevel ->retryPolicy() .numRetries()); EXPECT_EQ(RetryPolicy::RETRY_ON_CONNECT_FAILURE | RetryPolicy::RETRY_ON_5XX | - RetryPolicy::RETRY_ON_GATEWAY_ERROR, + RetryPolicy::RETRY_ON_GATEWAY_ERROR | RetryPolicy::RETRY_ON_RESET, config.route(genHeaders("www.lyft.com", "/", "GET"), 0) ->routeEntry() ->retryPolicy() diff --git a/test/common/router/retry_state_impl_test.cc b/test/common/router/retry_state_impl_test.cc index 415fa5b32ec28..7650fd5a3e547 100644 --- a/test/common/router/retry_state_impl_test.cc +++ b/test/common/router/retry_state_impl_test.cc @@ -412,6 +412,19 @@ TEST_F(RouterRetryStateImplTest, RetriableStatusCodesHeader) { } } +TEST_F(RouterRetryStateImplTest, PolicyResetRemoteReset) { + Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-on", "reset"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + expectTimerCreateAndEnable(); + EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(remote_reset_, callback_)); + EXPECT_CALL(callback_ready_, ready()); + retry_timer_->callback_(); + + EXPECT_EQ(RetryStatus::NoRetryLimitExceeded, state_->shouldRetryReset(remote_reset_, callback_)); +} + TEST_F(RouterRetryStateImplTest, RouteConfigNoHeaderConfig) { policy_.num_retries_ = 1; policy_.retry_on_ = RetryPolicy::RETRY_ON_CONNECT_FAILURE; diff --git a/test/common/router/router_test.cc b/test/common/router/router_test.cc index b9e2f9dc8172f..45e7d6eaa406b 100644 --- a/test/common/router/router_test.cc +++ b/test/common/router/router_test.cc @@ -4305,8 +4305,8 @@ TEST(RouterFilterUtilityTest, StrictCheckValidHeaders) { {"x-envoy-upstream-rq-per-try-timeout-ms", "100"}, {"x-envoy-max-retries", "2"}, {"not-checked", "always passes"}, - {"x-envoy-retry-on", - "5xx,gateway-error,retriable-4xx,refused-stream,connect-failure,retriable-status-codes"}, + {"x-envoy-retry-on", "5xx,gateway-error,retriable-4xx,refused-stream,connect-failure," + "retriable-status-codes,reset"}, {"x-envoy-retry-grpc-on", "cancelled,internal,deadline-exceeded,resource-exhausted,unavailable"}, };