diff --git a/docs/configuration/http_conn_man/route_config/route.rst b/docs/configuration/http_conn_man/route_config/route.rst index 39554c89d8e4d..75a1edbbd6a19 100644 --- a/docs/configuration/http_conn_man/route_config/route.rst +++ b/docs/configuration/http_conn_man/route_config/route.rst @@ -216,7 +216,8 @@ HTTP retry :ref:`architecture overview `. retry_on *(required, string)* specifies the conditions under which retry takes place. These are the same - conditions documented for :ref:`config_http_filters_router_x-envoy-retry-on`. + conditions documented for :ref:`config_http_filters_router_x-envoy-retry-on` and + :ref:`config_http_filters_router_x-envoy-grpc-retry-on`. num_retries *(optional, integer)* specifies the allowed number of retries. This parameter is optional and diff --git a/docs/configuration/http_filters/router_filter.rst b/docs/configuration/http_filters/router_filter.rst index 70acc80ef82cf..be8db6bf3bfb0 100644 --- a/docs/configuration/http_filters/router_filter.rst +++ b/docs/configuration/http_filters/router_filter.rst @@ -51,8 +51,9 @@ x-envoy-max-retries If a :ref:`retry policy ` is in place, Envoy will default to retrying one time unless explicitly specified. The number of retries can be explicitly set in the :ref:`route retry config ` or by using this header. -If a :ref:`retry policy ` is not configured or a -:ref:`config_http_filters_router_x-envoy-retry-on` header is not specified, Envoy will not retry a failed request. +If a :ref:`retry policy ` is not configured and +:ref:`config_http_filters_router_x-envoy-retry-on` or +:ref:`config_http_filters_router_x-envoy-grpc-retry-on` headers are not specified, Envoy will not retry a failed request. A few notes on how Envoy does retries: @@ -120,6 +121,35 @@ Note that retry policies can also be applied at the :ref:`route level By default, Envoy will *not* perform retries unless you've configured them per above. +.. _config_http_filters_router_x-envoy-grpc-retry-on: + +x-envoy-grpc-retry-on +^^^^^^^^^^^^^^^^^^^^^ +Setting this header on egress requests will cause Envoy to attempt to retry failed requests (number of +retries defaults to 1, and can be controlled by +:ref:`x-envoy-max-retries ` +header or the :ref:`route config retry policy `). +gRPC retries are currently only supported for gRPC status codes in response headers. gRPC status codes in +trailers will not trigger retry logic. One or more policies can be specified using a ',' delimited +list. The supported policies are: + +cancelled + Envoy will attempt a retry if the gRPC status code in the response headers is "cancelled" (1) + +deadline-exceeded + Envoy will attempt a retry if the gRPC status code in the response headers is "deadline-exceeded" (4) + +resource-exhausted + Envoy will attempt a retry if the gRPC status code in the response headers is "resource-exhausted" (8) + +As with the x-envoy-grpc-retry-on header, the number of retries can be controlled via the +:ref:`config_http_filters_router_x-envoy-max-retries` header + +Note that retry policies can also be applied at the :ref:`route level +`. + +By default, Envoy will *not* perform retries unless you've configured them per above. + x-envoy-upstream-alt-stat-name ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/include/envoy/grpc/BUILD b/include/envoy/grpc/BUILD index e8d0855242f8b..da93fb6638735 100644 --- a/include/envoy/grpc/BUILD +++ b/include/envoy/grpc/BUILD @@ -17,3 +17,8 @@ envoy_cc_library( "//include/envoy/http:header_map_interface", ], ) + +envoy_cc_library( + name = "status", + hdrs = ["status.h"], +) diff --git a/include/envoy/grpc/status.h b/include/envoy/grpc/status.h new file mode 100644 index 0000000000000..f8c95b520cfcf --- /dev/null +++ b/include/envoy/grpc/status.h @@ -0,0 +1,51 @@ +#pragma once + +namespace Envoy { +namespace Grpc { + +class Status { +public: + enum GrpcStatus { + // The RPC completed successfully. + Ok = 0, + // The RPC was canceled. + Canceled = 1, + // Some unknown error occurred. + Unknown = 2, + // An argument to the RPC was invalid. + InvalidArgument = 3, + // The deadline for the RPC expired before the RPC completed. + DeadlineExceeded = 4, + // Some resource for the RPC was not found. + NotFound = 5, + // A resource the RPC attempted to create already exists. + AlreadyExists = 6, + // Permission was denied for the RPC. + PermissionDenied = 7, + // Some resource is exhausted, resulting in RPC failure. + ResourceExhausted = 8, + // Some precondition for the RPC failed. + FailedPrecondition = 9, + // The RPC was aborted. + Aborted = 10, + // Some operation was requested outside of a legal range. + OutOfRange = 11, + // The RPC requested was not implemented. + Unimplemented = 12, + // Some internal error occurred. + Internal = 13, + // The RPC endpoint is current unavailable. + Unavailable = 14, + // There was some data loss resulting in RPC failure. + DataLoss = 15, + // The RPC does not have required credentials for the RPC to succeed. + Unauthenticated = 16, + + // This is a non-GRPC error code, indicating the status code in gRPC headers + // was invalid. + InvalidCode = -1, + }; +}; + +} // Grpc +} // Envoy diff --git a/include/envoy/http/header_map.h b/include/envoy/http/header_map.h index 50c940dc185d7..3122356d03e57 100644 --- a/include/envoy/http/header_map.h +++ b/include/envoy/http/header_map.h @@ -205,6 +205,7 @@ class HeaderEntry { HEADER_FUNC(EnvoyMaxRetries) \ HEADER_FUNC(EnvoyOriginalPath) \ HEADER_FUNC(EnvoyRetryOn) \ + HEADER_FUNC(EnvoyRetryGrpcOn) \ HEADER_FUNC(EnvoyUpstreamAltStatName) \ HEADER_FUNC(EnvoyUpstreamCanary) \ HEADER_FUNC(EnvoyUpstreamHealthCheckedCluster) \ diff --git a/include/envoy/router/router.h b/include/envoy/router/router.h index 3d91a15f3f363..d0bb03ba991f7 100644 --- a/include/envoy/router/router.h +++ b/include/envoy/router/router.h @@ -37,10 +37,13 @@ class RedirectEntry { class RetryPolicy { public: // clang-format off - static const uint32_t RETRY_ON_5XX = 0x1; - static const uint32_t RETRY_ON_CONNECT_FAILURE = 0x2; - static const uint32_t RETRY_ON_RETRIABLE_4XX = 0x4; - static const uint32_t RETRY_ON_REFUSED_STREAM = 0x8; + static const uint32_t RETRY_ON_5XX = 0x1; + static const uint32_t RETRY_ON_CONNECT_FAILURE = 0x2; + static const uint32_t RETRY_ON_RETRIABLE_4XX = 0x4; + static const uint32_t RETRY_ON_REFUSED_STREAM = 0x8; + static const uint32_t RETRY_ON_GRPC_CANCELLED = 0x10; + static const uint32_t RETRY_ON_GRPC_DEADLINE_EXCEEDED = 0x20; + static const uint32_t RETRY_ON_GRPC_RESOURCE_EXHAUSTED = 0x40; // clang-format on virtual ~RetryPolicy() {} diff --git a/source/common/grpc/BUILD b/source/common/grpc/BUILD index 0f235ef49def4..cdc291da453da 100644 --- a/source/common/grpc/BUILD +++ b/source/common/grpc/BUILD @@ -25,6 +25,7 @@ envoy_cc_library( external_deps = ["protobuf"], deps = [ "//include/envoy/common:optional", + "//include/envoy/grpc:status", "//include/envoy/http:header_map_interface", "//include/envoy/http:message_interface", "//include/envoy/stats:stats_interface", diff --git a/source/common/grpc/common.cc b/source/common/grpc/common.cc index 790b539a4a60c..6ab0d6ff6c155 100644 --- a/source/common/grpc/common.cc +++ b/source/common/grpc/common.cc @@ -24,6 +24,20 @@ namespace Grpc { const std::string Common::GRPC_CONTENT_TYPE{"application/grpc"}; +Optional Common::getGrpcStatus(const Http::HeaderMap& trailers) { + const Http::HeaderEntry* grpc_status_header = trailers.GrpcStatus(); + + uint64_t grpc_status_code; + if (!grpc_status_header || grpc_status_header->value().empty()) { + return Optional(); + } + if (!StringUtil::atoul(grpc_status_header->value().c_str(), grpc_status_code) || + grpc_status_code > Status::GrpcStatus::Unauthenticated) { + return Optional(Status::GrpcStatus::InvalidCode); + } + return Optional(static_cast(grpc_status_code)); +} + void Common::chargeStat(const Upstream::ClusterInfo& cluster, const std::string& grpc_service, const std::string& grpc_method, bool success) { cluster.statsScope() @@ -73,18 +87,17 @@ Http::MessagePtr Common::prepareHeaders(const std::string& upstream_cluster, void Common::checkForHeaderOnlyError(Http::Message& http_response) { // First check for grpc-status in headers. If it is here, we have an error. - const Http::HeaderEntry* grpc_status_header = http_response.headers().GrpcStatus(); - if (!grpc_status_header) { + Optional grpc_status_code = Common::getGrpcStatus(http_response.headers()); + if (!grpc_status_code.valid()) { return; } - uint64_t grpc_status_code; - if (!StringUtil::atoul(grpc_status_header->value().c_str(), grpc_status_code)) { + if (grpc_status_code.value() == Status::GrpcStatus::InvalidCode) { throw Exception(Optional(), "bad grpc-status header"); } const Http::HeaderEntry* grpc_status_message = http_response.headers().GrpcMessage(); - throw Exception(grpc_status_code, + throw Exception(grpc_status_code.value(), grpc_status_message ? grpc_status_message->value().c_str() : EMPTY_STRING); } @@ -100,16 +113,14 @@ void Common::validateResponse(Http::Message& http_response) { throw Exception(Optional(), "no response trailers"); } - const Http::HeaderEntry* grpc_status_header = http_response.trailers()->GrpcStatus(); - uint64_t grpc_status_code; - if (!grpc_status_header || - !StringUtil::atoul(grpc_status_header->value().c_str(), grpc_status_code)) { + Optional grpc_status_code = Common::getGrpcStatus(*http_response.trailers()); + if (!grpc_status_code.valid() || grpc_status_code.value() < 0) { throw Exception(Optional(), "bad grpc-status trailer"); } - if (grpc_status_code != 0) { + if (grpc_status_code.value() != 0) { const Http::HeaderEntry* grpc_status_message = http_response.trailers()->GrpcMessage(); - throw Exception(grpc_status_code, + throw Exception(grpc_status_code.value(), grpc_status_message ? grpc_status_message->value().c_str() : EMPTY_STRING); } } diff --git a/source/common/grpc/common.h b/source/common/grpc/common.h index 0adf2703dcb1c..055fbd0617d83 100644 --- a/source/common/grpc/common.h +++ b/source/common/grpc/common.h @@ -5,6 +5,7 @@ #include "envoy/common/exception.h" #include "envoy/common/optional.h" +#include "envoy/grpc/status.h" #include "envoy/http/header_map.h" #include "envoy/http/message.h" #include "envoy/stats/stats.h" @@ -25,6 +26,13 @@ class Exception : public EnvoyException { class Common { public: + /** + * Returns the GrpcStatus code from a given set of headers, if present. + * @headers the headers to parse. + * @returns the parsed status code or InvalidCode if no valid status is found. + */ + static Optional getGrpcStatus(const Http::HeaderMap& headers); + /** * Charge a success/failure stat to a cluster/service/method. * @param cluster supplies the target cluster. diff --git a/source/common/http/headers.h b/source/common/http/headers.h index 0f67bf11552e2..8010b1f933eb1 100644 --- a/source/common/http/headers.h +++ b/source/common/http/headers.h @@ -30,6 +30,7 @@ class HeaderValues { const LowerCaseString EnvoyMaxRetries{"x-envoy-max-retries"}; const LowerCaseString EnvoyOriginalPath{"x-envoy-original-path"}; const LowerCaseString EnvoyRetryOn{"x-envoy-retry-on"}; + const LowerCaseString EnvoyRetryGrpcOn{"x-envoy-retry-grpc-on"}; const LowerCaseString EnvoyUpstreamAltStatName{"x-envoy-upstream-alt-stat-name"}; const LowerCaseString EnvoyUpstreamCanary{"x-envoy-upstream-canary"}; const LowerCaseString EnvoyUpstreamRequestTimeoutAltResponse{ @@ -92,6 +93,12 @@ class HeaderValues { const std::string Retriable4xx{"retriable-4xx"}; } EnvoyRetryOnValues; + struct { + const std::string Cancelled{"cancelled"}; + const std::string DeadlineExceeded{"deadline-exceeded"}; + const std::string ResourceExhausted{"resource-exhausted"}; + } EnvoyRetryOnGrpcValues; + struct { const std::string _100Continue{"100-continue"}; } ExpectValues; diff --git a/source/common/router/BUILD b/source/common/router/BUILD index d91f5a8c68585..c216c14c97114 100644 --- a/source/common/router/BUILD +++ b/source/common/router/BUILD @@ -79,6 +79,7 @@ envoy_cc_library( "//include/envoy/upstream:upstream_interface", "//source/common/common:assert_lib", "//source/common/common:utility_lib", + "//source/common/grpc:common_lib", "//source/common/http:codes_lib", "//source/common/http:headers_lib", "//source/common/http:utility_lib", diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc index d2cbf864c0659..f715e2eba160c 100644 --- a/source/common/router/config_impl.cc +++ b/source/common/router/config_impl.cc @@ -40,6 +40,8 @@ RetryPolicyImpl::RetryPolicyImpl(const Json::Object& config) { config.getObject("retry_policy")->getInteger("per_try_timeout_ms", 0)); num_retries_ = config.getObject("retry_policy")->getInteger("num_retries", 1); retry_on_ = RetryStateImpl::parseRetryOn(config.getObject("retry_policy")->getString("retry_on")); + retry_on_ |= + RetryStateImpl::parseRetryGrpcOn(config.getObject("retry_policy")->getString("retry_on")); } ShadowPolicyImpl::ShadowPolicyImpl(const Json::Object& config) { diff --git a/source/common/router/retry_state_impl.cc b/source/common/router/retry_state_impl.cc index 44a677f539e9d..d1c16a534a5d1 100644 --- a/source/common/router/retry_state_impl.cc +++ b/source/common/router/retry_state_impl.cc @@ -7,6 +7,7 @@ #include "common/common/assert.h" #include "common/common/utility.h" +#include "common/grpc/common.h" #include "common/http/codes.h" #include "common/http/headers.h" #include "common/http/utility.h" @@ -19,6 +20,9 @@ namespace Router { const uint32_t RetryPolicy::RETRY_ON_5XX; const uint32_t RetryPolicy::RETRY_ON_CONNECT_FAILURE; const uint32_t RetryPolicy::RETRY_ON_RETRIABLE_4XX; +const uint32_t RetryPolicy::RETRY_ON_GRPC_CANCELLED; +const uint32_t RetryPolicy::RETRY_ON_GRPC_DEADLINE_EXCEEDED; +const uint32_t RetryPolicy::RETRY_ON_GRPC_RESOURCE_EXHAUSTED; RetryStatePtr RetryStateImpl::create(const RetryPolicy& route_policy, Http::HeaderMap& request_headers, @@ -29,7 +33,8 @@ RetryStatePtr RetryStateImpl::create(const RetryPolicy& route_policy, RetryStatePtr ret; // We short circuit here and do not both with an allocation if there is no chance we will retry. - if (request_headers.EnvoyRetryOn() || route_policy.retryOn()) { + if (request_headers.EnvoyRetryOn() || request_headers.EnvoyRetryGrpcOn() || + route_policy.retryOn()) { ret.reset(new RetryStateImpl(route_policy, request_headers, cluster, runtime, random, dispatcher, priority)); } @@ -48,12 +53,15 @@ RetryStateImpl::RetryStateImpl(const RetryPolicy& route_policy, Http::HeaderMap& if (request_headers.EnvoyRetryOn()) { retry_on_ = parseRetryOn(request_headers.EnvoyRetryOn()->value().c_str()); - if (retry_on_ != 0 && request_headers.EnvoyMaxRetries()) { - const char* max_retries = request_headers.EnvoyMaxRetries()->value().c_str(); - uint64_t temp; - if (StringUtil::atoul(max_retries, temp)) { - retries_remaining_ = temp; - } + } + if (request_headers.EnvoyRetryGrpcOn()) { + retry_on_ |= parseRetryGrpcOn(request_headers.EnvoyRetryGrpcOn()->value().c_str()); + } + if (retry_on_ != 0 && request_headers.EnvoyMaxRetries()) { + const char* max_retries = request_headers.EnvoyMaxRetries()->value().c_str(); + uint64_t temp; + if (StringUtil::atoul(max_retries, temp)) { + retries_remaining_ = temp; } } @@ -96,6 +104,22 @@ uint32_t RetryStateImpl::parseRetryOn(const std::string& config) { return ret; } +uint32_t RetryStateImpl::parseRetryGrpcOn(const std::string& retry_grpc_on_header) { + uint32_t ret = 0; + std::vector retry_on_list = StringUtil::split(retry_grpc_on_header, ','); + for (const std::string& retry_on : retry_on_list) { + if (retry_on == Http::Headers::get().EnvoyRetryOnGrpcValues.Cancelled) { + ret |= RetryPolicy::RETRY_ON_GRPC_CANCELLED; + } else if (retry_on == Http::Headers::get().EnvoyRetryOnGrpcValues.DeadlineExceeded) { + ret |= RetryPolicy::RETRY_ON_GRPC_DEADLINE_EXCEEDED; + } else if (retry_on == Http::Headers::get().EnvoyRetryOnGrpcValues.ResourceExhausted) { + ret |= RetryPolicy::RETRY_ON_GRPC_RESOURCE_EXHAUSTED; + } + } + + return ret; +} + void RetryStateImpl::resetRetry() { if (callback_) { cluster_.resourceManager(priority_).retries().dec(); @@ -169,6 +193,23 @@ bool RetryStateImpl::wouldRetry(const Http::HeaderMap* response_headers, } } + if (retry_on_ & + (RetryPolicy::RETRY_ON_GRPC_CANCELLED | RetryPolicy::RETRY_ON_GRPC_DEADLINE_EXCEEDED | + RetryPolicy::RETRY_ON_GRPC_RESOURCE_EXHAUSTED) && + response_headers) { + Optional status = Grpc::Common::getGrpcStatus(*response_headers); + if (status.valid()) { + if ((status.value() == Grpc::Status::Canceled && + (retry_on_ & RetryPolicy::RETRY_ON_GRPC_CANCELLED)) || + (status.value() == Grpc::Status::DeadlineExceeded && + (retry_on_ & RetryPolicy::RETRY_ON_GRPC_DEADLINE_EXCEEDED)) || + (status.value() == Grpc::Status::ResourceExhausted && + (retry_on_ & RetryPolicy::RETRY_ON_GRPC_RESOURCE_EXHAUSTED))) { + return true; + } + } + } + return false; } diff --git a/source/common/router/retry_state_impl.h b/source/common/router/retry_state_impl.h index c83519d177aac..e4133f7e7178b 100644 --- a/source/common/router/retry_state_impl.h +++ b/source/common/router/retry_state_impl.h @@ -27,6 +27,9 @@ class RetryStateImpl : public RetryState { static uint32_t parseRetryOn(const std::string& config); + // Returns the RetryPolicy extracted from the x-envoy-retry-grpc-on header. + static uint32_t parseRetryGrpcOn(const std::string& retry_grpc_on_header); + // Router::RetryState bool enabled() override { return retry_on_ != 0; } bool shouldRetry(const Http::HeaderMap* response_headers, diff --git a/test/common/grpc/BUILD b/test/common/grpc/BUILD index fcd6116a58a82..a34964177bdda 100644 --- a/test/common/grpc/BUILD +++ b/test/common/grpc/BUILD @@ -26,6 +26,7 @@ envoy_cc_test( "//source/common/http:headers_lib", "//test/mocks/upstream:upstream_mocks", "//test/proto:helloworld_proto", + "//test/test_common:utility_lib", ], ) diff --git a/test/common/grpc/common_test.cc b/test/common/grpc/common_test.cc index 7487ff6d0b6f9..56b27ce6ec8c2 100644 --- a/test/common/grpc/common_test.cc +++ b/test/common/grpc/common_test.cc @@ -3,12 +3,30 @@ #include "test/mocks/upstream/mocks.h" #include "test/proto/helloworld.pb.h" +#include "test/test_common/utility.h" #include "gtest/gtest.h" namespace Envoy { namespace Grpc { +TEST(GrpcCommonTest, getGrpcStatus) { + Http::TestHeaderMapImpl ok_trailers{{"grpc-status", "0"}}; + EXPECT_EQ(Status::Ok, Common::getGrpcStatus(ok_trailers).value()); + + Http::TestHeaderMapImpl no_status_trailers{{"foo", "bar"}}; + EXPECT_FALSE(Common::getGrpcStatus(no_status_trailers).valid()); + + Http::TestHeaderMapImpl aborted_trailers{{"grpc-status", "10"}}; + EXPECT_EQ(Status::Aborted, Common::getGrpcStatus(aborted_trailers).value()); + + Http::TestHeaderMapImpl unauth_trailers{{"grpc-status", "16"}}; + EXPECT_EQ(Status::Unauthenticated, Common::getGrpcStatus(unauth_trailers).value()); + + Http::TestHeaderMapImpl invalid_trailers{{"grpc-status", "-1"}}; + EXPECT_EQ(Status::InvalidCode, Common::getGrpcStatus(invalid_trailers).value()); +} + TEST(GrpcCommonTest, chargeStats) { NiceMock cluster; Common::chargeStat(cluster, "service", "method", true); diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index 3544da4ea7a60..8148a50e8d394 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -1119,6 +1119,90 @@ TEST(RouteMatcherTest, Retry) { .retryOn()); } +TEST(RouteMatcherTest, GrpcRetry) { + std::string json = R"EOF( +{ + "virtual_hosts": [ + { + "name": "www2", + "domains": ["www.lyft.com"], + "routes": [ + { + "prefix": "/foo", + "cluster": "www2", + "retry_policy": { + "retry_on": "connect-failure" + } + }, + { + "prefix": "/bar", + "cluster": "www2" + }, + { + "prefix": "/", + "cluster": "www2", + "retry_policy": { + "per_try_timeout_ms" : 1000, + "num_retries": 3, + "retry_on": "5xx,deadline-exceeded,resource-exhausted" + } + } + ] + } + ] +} + )EOF"; + + Json::ObjectSharedPtr loader = Json::Factory::loadFromString(json); + NiceMock runtime; + NiceMock cm; + ConfigImpl config(*loader, runtime, cm, true); + + EXPECT_FALSE(config.usesRuntime()); + + EXPECT_EQ(std::chrono::milliseconds(0), config.route(genHeaders("www.lyft.com", "/foo", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .perTryTimeout()); + EXPECT_EQ(1U, config.route(genHeaders("www.lyft.com", "/foo", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .numRetries()); + EXPECT_EQ(RetryPolicy::RETRY_ON_CONNECT_FAILURE, + config.route(genHeaders("www.lyft.com", "/foo", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .retryOn()); + + EXPECT_EQ(std::chrono::milliseconds(0), config.route(genHeaders("www.lyft.com", "/foo", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .perTryTimeout()); + EXPECT_EQ(0U, config.route(genHeaders("www.lyft.com", "/bar", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .numRetries()); + EXPECT_EQ(0U, config.route(genHeaders("www.lyft.com", "/bar", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .retryOn()); + + EXPECT_EQ(std::chrono::milliseconds(1000), config.route(genHeaders("www.lyft.com", "/", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .perTryTimeout()); + EXPECT_EQ(3U, config.route(genHeaders("www.lyft.com", "/", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .numRetries()); + EXPECT_EQ(RetryPolicy::RETRY_ON_5XX | RetryPolicy::RETRY_ON_GRPC_DEADLINE_EXCEEDED | + RetryPolicy::RETRY_ON_GRPC_RESOURCE_EXHAUSTED, + config.route(genHeaders("www.lyft.com", "/", "GET"), 0) + ->routeEntry() + ->retryPolicy() + .retryOn()); +} + TEST(RouteMatcherTest, TestBadDefaultConfig) { std::string json = R"EOF( { diff --git a/test/common/router/retry_state_impl_test.cc b/test/common/router/retry_state_impl_test.cc index 16c7c2ee27773..c2dbcc84a7566 100644 --- a/test/common/router/retry_state_impl_test.cc +++ b/test/common/router/retry_state_impl_test.cc @@ -101,6 +101,48 @@ TEST_F(RouterRetryStateImplTest, Policy5xxRemote503) { EXPECT_FALSE(state_->shouldRetry(&response_headers, no_reset_, callback_)); } +TEST_F(RouterRetryStateImplTest, PolicyGrpcCancelled) { + Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-grpc-on", "cancelled"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestHeaderMapImpl response_headers{{":status", "200"}, {"grpc-status", "1"}}; + expectTimerCreateAndEnable(); + EXPECT_TRUE(state_->shouldRetry(&response_headers, no_reset_, callback_)); + EXPECT_CALL(callback_ready_, ready()); + retry_timer_->callback_(); + + EXPECT_FALSE(state_->shouldRetry(&response_headers, no_reset_, callback_)); +} + +TEST_F(RouterRetryStateImplTest, PolicyGrpcDeadlineExceeded) { + Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-grpc-on", "deadline-exceeded"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestHeaderMapImpl response_headers{{":status", "200"}, {"grpc-status", "4"}}; + expectTimerCreateAndEnable(); + EXPECT_TRUE(state_->shouldRetry(&response_headers, no_reset_, callback_)); + EXPECT_CALL(callback_ready_, ready()); + retry_timer_->callback_(); + + EXPECT_FALSE(state_->shouldRetry(&response_headers, no_reset_, callback_)); +} + +TEST_F(RouterRetryStateImplTest, PolicyGrpcResourceExhausted) { + Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-grpc-on", "resource-exhausted"}}; + setup(request_headers); + EXPECT_TRUE(state_->enabled()); + + Http::TestHeaderMapImpl response_headers{{":status", "200"}, {"grpc-status", "8"}}; + expectTimerCreateAndEnable(); + EXPECT_TRUE(state_->shouldRetry(&response_headers, no_reset_, callback_)); + EXPECT_CALL(callback_ready_, ready()); + retry_timer_->callback_(); + + EXPECT_FALSE(state_->shouldRetry(&response_headers, no_reset_, callback_)); +} + TEST_F(RouterRetryStateImplTest, Policy5xxRemote200RemoteReset) { // Don't retry after reply start. Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}}; diff --git a/test/common/router/router_test.cc b/test/common/router/router_test.cc index 96f10be1403f3..5333ea9955447 100644 --- a/test/common/router/router_test.cc +++ b/test/common/router/router_test.cc @@ -722,6 +722,49 @@ TEST_F(RouterTest, RetryUpstream5xxNotComplete) { .value()); } +TEST_F(RouterTest, RetryUpstreamGrpcCancelled) { + NiceMock encoder1; + Http::StreamDecoder* response_decoder = nullptr; + EXPECT_CALL(cm_.conn_pool_, newStream(_, _)) + .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks) + -> Http::ConnectionPool::Cancellable* { + response_decoder = &decoder; + callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_); + return nullptr; + })); + expectResponseTimerCreate(); + + Http::TestHeaderMapImpl headers{{"x-envoy-grpc-retry-on", "cancelled"}, + {"x-envoy-internal", "true"}}; + HttpTestUtility::addDefaultHeaders(headers); + router_.decodeHeaders(headers, true); + + // gRPC with status "cancelled" (1) + router_.retry_state_->expectRetry(); + Http::HeaderMapPtr response_headers1( + new Http::TestHeaderMapImpl{{":status", "200"}, {":grpc-status", "1"}}); + EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200)); + response_decoder->decodeHeaders(std::move(response_headers1), true); + + // We expect the grpc-status to result in a retried request. + EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0); + NiceMock encoder2; + EXPECT_CALL(cm_.conn_pool_, newStream(_, _)) + .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks) + -> Http::ConnectionPool::Cancellable* { + response_decoder = &decoder; + callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_); + return nullptr; + })); + router_.retry_state_->callback_(); + + // Normal response. + EXPECT_CALL(*router_.retry_state_, shouldRetry(_, _, _)).WillOnce(Return(false)); + Http::HeaderMapPtr response_headers(new Http::TestHeaderMapImpl{{":status", "200"}}); + EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200)); + response_decoder->decodeHeaders(std::move(response_headers), true); +} + TEST_F(RouterTest, Shadow) { callbacks_.route_->route_entry_.shadow_policy_.cluster_ = "foo"; callbacks_.route_->route_entry_.shadow_policy_.runtime_key_ = "bar"; diff --git a/test/integration/http2_integration_test.cc b/test/integration/http2_integration_test.cc index 9c4978ac3f31c..dfa280f173c22 100644 --- a/test/integration/http2_integration_test.cc +++ b/test/integration/http2_integration_test.cc @@ -91,6 +91,8 @@ TEST_P(Http2IntegrationTest, TwoRequests) { testTwoRequests(Http::CodecClient::T TEST_P(Http2IntegrationTest, Retry) { testRetry(Http::CodecClient::Type::HTTP2); } +TEST_P(Http2IntegrationTest, GrpcRetry) { testGrpcRetry(); } + TEST_P(Http2IntegrationTest, MaxHeadersInCodec) { Http::TestHeaderMapImpl big_headers{ {":method", "GET"}, {":path", "/test/long/url"}, {":scheme", "http"}, {":authority", "host"}}; diff --git a/test/integration/http2_upstream_integration_test.cc b/test/integration/http2_upstream_integration_test.cc index 5a5cbcc26e5ae..94f4dffc16e5a 100644 --- a/test/integration/http2_upstream_integration_test.cc +++ b/test/integration/http2_upstream_integration_test.cc @@ -93,6 +93,8 @@ TEST_P(Http2UpstreamIntegrationTest, TwoRequests) { TEST_P(Http2UpstreamIntegrationTest, Retry) { testRetry(Http::CodecClient::Type::HTTP2); } +TEST_P(Http2UpstreamIntegrationTest, GrpcRetry) { testGrpcRetry(); } + TEST_P(Http2UpstreamIntegrationTest, DownstreamResetBeforeResponseComplete) { testDownstreamResetBeforeResponseComplete(); } diff --git a/test/integration/integration.cc b/test/integration/integration.cc index f591c28bc0e4c..a615afff28b4d 100644 --- a/test/integration/integration.cc +++ b/test/integration/integration.cc @@ -699,6 +699,73 @@ void BaseIntegrationTest::testRetry(Http::CodecClient::Type type) { [&]() -> void { fake_upstream_connection->waitForDisconnect(); }}); } +void BaseIntegrationTest::testGrpcRetry() { + IntegrationCodecClientPtr codec_client; + FakeHttpConnectionPtr fake_upstream_connection; + IntegrationStreamDecoderPtr response(new IntegrationStreamDecoder(*dispatcher_)); + FakeStreamPtr request; + Http::TestHeaderMapImpl response_trailers{{"response1", "trailer1"}, {"grpc-status", "0"}}; + executeActions( + {[&]() -> void { + codec_client = makeHttpConnection(lookupPort("http"), Http::CodecClient::Type::HTTP2); + }, + [&]() -> void { + Http::StreamEncoder* request_encoder; + request_encoder = &codec_client->startRequest( + Http::TestHeaderMapImpl{{":method", "POST"}, + {":path", "/test/long/url"}, + {":scheme", "http"}, + {":authority", "host"}, + {"x-forwarded-for", "10.0.0.1"}, + {"x-envoy-retry-grpc-on", "cancelled"}}, + *response); + codec_client->sendData(*request_encoder, 1024, true); + }, + [&]() -> void { + fake_upstream_connection = fake_upstreams_[0]->waitForHttpConnection(*dispatcher_); + }, + [&]() -> void { request = fake_upstream_connection->waitForNewStream(); }, + [&]() -> void { request->waitForEndStream(*dispatcher_); }, + [&]() -> void { + request->encodeHeaders(Http::TestHeaderMapImpl{{":status", "200"}, {"grpc-status", "1"}}, + false); + }, + [&]() -> void { + if (fake_upstreams_[0]->httpType() == FakeHttpConnection::Type::HTTP1) { + fake_upstream_connection->waitForDisconnect(); + fake_upstream_connection = fake_upstreams_[0]->waitForHttpConnection(*dispatcher_); + } else { + request->waitForReset(); + } + }, + [&]() -> void { request = fake_upstream_connection->waitForNewStream(); }, + [&]() -> void { request->waitForEndStream(*dispatcher_); }, + [&]() -> void { + request->encodeHeaders(Http::TestHeaderMapImpl{{":status", "200"}}, false); + request->encodeData(512, + fake_upstreams_[0]->httpType() != FakeHttpConnection::Type::HTTP2); + if (fake_upstreams_[0]->httpType() == FakeHttpConnection::Type::HTTP2) { + request->encodeTrailers(response_trailers); + } + }, + [&]() -> void { + response->waitForEndStream(); + EXPECT_TRUE(request->complete()); + EXPECT_EQ(1024U, request->bodyLength()); + + EXPECT_TRUE(response->complete()); + EXPECT_STREQ("200", response->headers().Status()->value().c_str()); + EXPECT_EQ(512U, response->body().size()); + if (fake_upstreams_[0]->httpType() == FakeHttpConnection::Type::HTTP2) { + EXPECT_THAT(*response->trailers(), HeaderMapEqualRef(&response_trailers)); + } + }, + // Cleanup both downstream and upstream + [&]() -> void { codec_client->close(); }, + [&]() -> void { fake_upstream_connection->close(); }, + [&]() -> void { fake_upstream_connection->waitForDisconnect(); }}); +} + void BaseIntegrationTest::testTwoRequests(Http::CodecClient::Type type) { IntegrationCodecClientPtr codec_client; FakeHttpConnectionPtr fake_upstream_connection; diff --git a/test/integration/integration.h b/test/integration/integration.h index 6c9bddc4f7125..f37f71c39f5b9 100644 --- a/test/integration/integration.h +++ b/test/integration/integration.h @@ -204,6 +204,7 @@ class BaseIntegrationTest : Logger::Loggable { void testBadPath(); void testDrainClose(Http::CodecClient::Type type); void testRetry(Http::CodecClient::Type type); + void testGrpcRetry(); // HTTP/2 client tests. void testDownstreamResetBeforeResponseComplete();