diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index 2f07e2da2e44e..cad369fa6108c 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -22,6 +22,7 @@ Bug Fixes * active http health checks: properly handles HTTP/2 GOAWAY frames from the upstream. Previously a GOAWAY frame due to a graceful listener drain could cause improper failed health checks due to streams being refused by the upstream on a connection that is going away. To revert to old GOAWAY handling behavior, set the runtime feature `envoy.reloadable_features.health_check.graceful_goaway_handling` to false. * buffer: tighten network connection read and write buffer high watermarks in preparation to more careful enforcement of read limits. Buffer high-watermark is now set to the exact configured value; previously it was set to value + 1. +* http: reverting a behavioral change where upstream connect timeouts were temporarily treated differently from other connection failures. The change back to the original behavior can be temporarily reverted by setting `envoy.reloadable_features.treat_upstream_connect_timeout_as_connect_failure` to false. * upstream: fix handling of moving endpoints between priorities when active health checks are enabled. Previously moving to a higher numbered priority was a NOOP, and moving to a lower numbered priority caused an abort. Removed Config or Runtime diff --git a/source/common/router/upstream_request.cc b/source/common/router/upstream_request.cc index 6a9243370d27c..f0f42fb8c362d 100644 --- a/source/common/router/upstream_request.cc +++ b/source/common/router/upstream_request.cc @@ -337,7 +337,12 @@ void UpstreamRequest::onPoolFailure(ConnectionPool::PoolFailureReason reason, reset_reason = Http::StreamResetReason::ConnectionFailure; break; case ConnectionPool::PoolFailureReason::Timeout: - reset_reason = Http::StreamResetReason::LocalReset; + if (Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.treat_upstream_connect_timeout_as_connect_failure")) { + reset_reason = Http::StreamResetReason::ConnectionFailure; + } else { + reset_reason = Http::StreamResetReason::LocalReset; + } } // Mimic an upstream reset. diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index cf5187240fe30..4e870e8cc4945 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -85,6 +85,7 @@ constexpr const char* runtime_features[] = { "envoy.reloadable_features.stop_faking_paths", "envoy.reloadable_features.strict_1xx_and_204_response_headers", "envoy.reloadable_features.tls_use_io_handle_bio", + "envoy.reloadable_features.treat_upstream_connect_timeout_as_connect_failure", "envoy.reloadable_features.upstream_host_weight_change_causes_rebuild", "envoy.reloadable_features.vhds_heartbeats", "envoy.reloadable_features.unify_grpc_handling", diff --git a/test/common/router/router_test.cc b/test/common/router/router_test.cc index f28662ff31009..a540901ec2844 100644 --- a/test/common/router/router_test.cc +++ b/test/common/router/router_test.cc @@ -555,6 +555,78 @@ TEST_F(RouterTest, PoolFailureWithPriority) { "upstream_reset_before_response_started{connection failure,tls version mismatch}"); } +TEST_F(RouterTest, PoolFailureDueToConnectTimeout) { + ON_CALL(callbacks_.route_->route_entry_, priority()) + .WillByDefault(Return(Upstream::ResourcePriority::High)); + EXPECT_CALL(cm_.thread_local_cluster_, + httpConnPool(Upstream::ResourcePriority::High, _, &router_)); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_, newStream(_, _)) + .WillOnce(Invoke([&](Http::StreamDecoder&, Http::ConnectionPool::Callbacks& callbacks) + -> Http::ConnectionPool::Cancellable* { + callbacks.onPoolFailure(ConnectionPool::PoolFailureReason::Timeout, "connect_timeout", + cm_.thread_local_cluster_.conn_pool_.host_); + return nullptr; + })); + + Http::TestResponseHeaderMapImpl response_headers{ + {":status", "503"}, {"content-length", "134"}, {"content-type", "text/plain"}}; + EXPECT_CALL(callbacks_, encodeHeaders_(HeaderMapEqualRef(&response_headers), false)); + EXPECT_CALL(callbacks_, encodeData(_, true)); + EXPECT_CALL(callbacks_.stream_info_, + setResponseFlag(StreamInfo::ResponseFlag::UpstreamConnectionFailure)); + EXPECT_CALL(callbacks_.stream_info_, onUpstreamHostSelected(_)) + .WillOnce(Invoke([&](const Upstream::HostDescriptionConstSharedPtr host) -> void { + EXPECT_EQ(host_address_, host->address()); + })); + + Http::TestRequestHeaderMapImpl headers; + HttpTestUtility::addDefaultHeaders(headers); + router_.decodeHeaders(headers, true); + EXPECT_TRUE(verifyHostUpstreamStats(0, 1)); + // Pool failure, so upstream request was not initiated. + EXPECT_EQ(0U, + callbacks_.route_->route_entry_.virtual_cluster_.stats().upstream_rq_total_.value()); + EXPECT_EQ(callbacks_.details(), + "upstream_reset_before_response_started{connection failure,connect_timeout}"); +} + +TEST_F(RouterTest, PoolFailureDueToConnectTimeoutLegacy) { + TestScopedRuntime scoped_runtime; + Runtime::LoaderSingleton::getExisting()->mergeValues( + {{"envoy.reloadable_features.treat_upstream_connect_timeout_as_connect_failure", "false"}}); + ON_CALL(callbacks_.route_->route_entry_, priority()) + .WillByDefault(Return(Upstream::ResourcePriority::High)); + EXPECT_CALL(cm_.thread_local_cluster_, + httpConnPool(Upstream::ResourcePriority::High, _, &router_)); + EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_, newStream(_, _)) + .WillOnce(Invoke([&](Http::StreamDecoder&, Http::ConnectionPool::Callbacks& callbacks) + -> Http::ConnectionPool::Cancellable* { + callbacks.onPoolFailure(ConnectionPool::PoolFailureReason::Timeout, "connect_timeout", + cm_.thread_local_cluster_.conn_pool_.host_); + return nullptr; + })); + + Http::TestResponseHeaderMapImpl response_headers{ + {":status", "503"}, {"content-length", "127"}, {"content-type", "text/plain"}}; + EXPECT_CALL(callbacks_, encodeHeaders_(HeaderMapEqualRef(&response_headers), false)); + EXPECT_CALL(callbacks_, encodeData(_, true)); + EXPECT_CALL(callbacks_.stream_info_, setResponseFlag(StreamInfo::ResponseFlag::LocalReset)); + EXPECT_CALL(callbacks_.stream_info_, onUpstreamHostSelected(_)) + .WillOnce(Invoke([&](const Upstream::HostDescriptionConstSharedPtr host) -> void { + EXPECT_EQ(host_address_, host->address()); + })); + + Http::TestRequestHeaderMapImpl headers; + HttpTestUtility::addDefaultHeaders(headers); + router_.decodeHeaders(headers, true); + EXPECT_TRUE(verifyHostUpstreamStats(0, 1)); + // Pool failure, so upstream request was not initiated. + EXPECT_EQ(0U, + callbacks_.route_->route_entry_.virtual_cluster_.stats().upstream_rq_total_.value()); + EXPECT_EQ(callbacks_.details(), + "upstream_reset_before_response_started{local reset,connect_timeout}"); +} + TEST_F(RouterTest, Http1Upstream) { EXPECT_CALL(cm_.thread_local_cluster_, httpConnPool(_, absl::optional(), _)); EXPECT_CALL(cm_.thread_local_cluster_.conn_pool_, newStream(_, _))