diff --git a/api/envoy/config/overload/v3/overload.proto b/api/envoy/config/overload/v3/overload.proto index 55a66500c94ed..968480f270b03 100644 --- a/api/envoy/config/overload/v3/overload.proto +++ b/api/envoy/config/overload/v3/overload.proto @@ -100,6 +100,12 @@ message ScaleTimersOverloadActionConfig { // :ref:`HttpConnectionManager.stream_idle_timeout // ` HTTP_DOWNSTREAM_STREAM_IDLE = 2; + + // Adjusts the timer for how long downstream clients have to finish transport-level negotiations + // before the connection is closed. + // This affects the value of + // :ref:`FilterChain.transport_socket_connect_timeout `. + TRANSPORT_SOCKET_CONNECT = 3; } message ScaleTimer { diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index 62ab94f0ce40a..03b1b2808b591 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -46,6 +46,7 @@ New Features * dispatcher: supports a stack of `Envoy::ScopeTrackedObject` instead of a single tracked object. This will allow Envoy to dump more debug information on crash. * http: added support for :ref:`:ref:`preconnecting `. Preconnecting is off by default, but recommended for clusters serving latency-sensitive traffic, especially if using HTTP/1.1. * http: change frame flood and abuse checks to the upstream HTTP/2 codec to ON by default. It can be disabled by setting the `envoy.reloadable_features.upstream_http2_flood_checks` runtime key to false. +* overload: add support for scaling :ref:`transport connection timeouts`. This can be used to reduce the TLS handshake timeout in response to overload. * server: added :ref:`fips_mode ` statistic. * tcp_proxy: add support for converting raw TCP streams into HTTP/1.1 CONNECT requests. See :ref:`upgrade documentation ` for details. diff --git a/generated_api_shadow/envoy/config/overload/v3/overload.proto b/generated_api_shadow/envoy/config/overload/v3/overload.proto index 21dd02811a34a..567f9405c693f 100644 --- a/generated_api_shadow/envoy/config/overload/v3/overload.proto +++ b/generated_api_shadow/envoy/config/overload/v3/overload.proto @@ -98,6 +98,12 @@ message ScaleTimersOverloadActionConfig { // :ref:`HttpConnectionManager.stream_idle_timeout // ` HTTP_DOWNSTREAM_STREAM_IDLE = 2; + + // Adjusts the timer for how long downstream clients have to finish transport-level negotiations + // before the connection is closed. + // This affects the value of + // :ref:`FilterChain.transport_socket_connect_timeout `. + TRANSPORT_SOCKET_CONNECT = 3; } message ScaleTimer { diff --git a/include/envoy/event/scaled_timer.h b/include/envoy/event/scaled_timer.h index 63ac8a5f421e8..0d05c79614260 100644 --- a/include/envoy/event/scaled_timer.h +++ b/include/envoy/event/scaled_timer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "common/common/interval_value.h" @@ -75,6 +76,10 @@ enum class ScaledTimerType { // The amount of time an HTTP stream from a downstream client can remain idle. This corresponds to // the HTTP_DOWNSTREAM_STREAM_IDLE TimerType in overload.proto. HttpDownstreamIdleStreamTimeout, + // The amount of time a connection to a downstream client can spend waiting for the transport to + // report connection establishment before the connection is closed. This corresponds to the + // TRANSPORT_SOCKET_CONNECT_TIMEOUT TimerType in overload.proto. + TransportSocketConnectTimeout, }; using ScaledTimerTypeMap = absl::flat_hash_map; diff --git a/source/common/network/BUILD b/source/common/network/BUILD index a78cbd4276370..a2abb79098637 100644 --- a/source/common/network/BUILD +++ b/source/common/network/BUILD @@ -80,6 +80,7 @@ envoy_cc_library( "//include/envoy/event:timer_interface", "//include/envoy/network:connection_interface", "//include/envoy/network:filter_interface", + "//include/envoy/server/overload:thread_local_overload_state", "//source/common/buffer:buffer_lib", "//source/common/buffer:watermark_buffer_lib", "//source/common/common:assert_lib", diff --git a/source/common/network/connection_impl.cc b/source/common/network/connection_impl.cc index ac1b2edb5b258..19389f2f1aed1 100644 --- a/source/common/network/connection_impl.cc +++ b/source/common/network/connection_impl.cc @@ -7,6 +7,7 @@ #include "envoy/common/exception.h" #include "envoy/common/platform.h" #include "envoy/config/core/v3/base.pb.h" +#include "envoy/event/scaled_range_timer_manager.h" #include "envoy/event/timer.h" #include "envoy/network/filter.h" #include "envoy/network/socket.h" @@ -788,7 +789,8 @@ void ServerConnectionImpl::setTransportSocketConnectTimeout(std::chrono::millise } if (transport_socket_connect_timer_ == nullptr) { transport_socket_connect_timer_ = - dispatcher_.createTimer([this] { onTransportSocketConnectTimeout(); }); + dispatcher_.createScaledTimer(Event::ScaledTimerType::TransportSocketConnectTimeout, + [this] { onTransportSocketConnectTimeout(); }); } transport_socket_connect_timer_->enableTimer(timeout); } diff --git a/source/server/overload_manager_impl.cc b/source/server/overload_manager_impl.cc index 5c9b7266c4539..a7ec7eaa7e45c 100644 --- a/source/server/overload_manager_impl.cc +++ b/source/server/overload_manager_impl.cc @@ -129,6 +129,8 @@ Event::ScaledTimerType parseTimerType( return Event::ScaledTimerType::HttpDownstreamIdleConnectionTimeout; case Config::HTTP_DOWNSTREAM_STREAM_IDLE: return Event::ScaledTimerType::HttpDownstreamIdleStreamTimeout; + case Config::TRANSPORT_SOCKET_CONNECT: + return Event::ScaledTimerType::TransportSocketConnectTimeout; default: throw EnvoyException(fmt::format("Unknown timer type {}", config_timer_type)); } diff --git a/test/common/network/connection_impl_test.cc b/test/common/network/connection_impl_test.cc index c1c927a26ed43..107bb77f60a32 100644 --- a/test/common/network/connection_impl_test.cc +++ b/test/common/network/connection_impl_test.cc @@ -4,6 +4,7 @@ #include "envoy/common/platform.h" #include "envoy/config/core/v3/base.pb.h" +#include "envoy/event/scaled_range_timer_manager.h" #include "envoy/network/address.h" #include "common/api/os_sys_calls_impl.h" @@ -403,7 +404,10 @@ TEST_P(ConnectionImplTest, SetServerTransportSocketTimeout) { // Avoid setting noDelay on the fake fd of 0. auto local_addr = std::make_shared("/pipe/path"); - auto* mock_timer = new NiceMock(mocks.dispatcher_.get()); + auto* mock_timer = new NiceMock(); + EXPECT_CALL(*mocks.dispatcher_, + createScaledTypedTimer_(Event::ScaledTimerType::TransportSocketConnectTimeout, _)) + .WillOnce(DoAll(SaveArg<1>(&mock_timer->callback_), Return(mock_timer))); auto server_connection = std::make_unique( *mocks.dispatcher_, std::make_unique(std::move(io_handle), local_addr, nullptr), @@ -442,7 +446,10 @@ TEST_P(ConnectionImplTest, ServerTransportSocketTimeoutDisabledOnConnect) { IoHandlePtr io_handle = std::make_unique(0); auto local_addr = std::make_shared("/pipe/path"); - auto* mock_timer = new NiceMock(mocks.dispatcher_.get()); + auto* mock_timer = new NiceMock(); + EXPECT_CALL(*mocks.dispatcher_, + createScaledTypedTimer_(Event::ScaledTimerType::TransportSocketConnectTimeout, _)) + .WillOnce(DoAll(SaveArg<1>(&mock_timer->callback_), Return(mock_timer))); auto server_connection = std::make_unique( *mocks.dispatcher_, std::make_unique(std::move(io_handle), local_addr, nullptr), diff --git a/test/integration/overload_integration_test.cc b/test/integration/overload_integration_test.cc index a3b9eb930983b..3135a6c08254f 100644 --- a/test/integration/overload_integration_test.cc +++ b/test/integration/overload_integration_test.cc @@ -7,10 +7,13 @@ #include "test/common/config/dummy_config.pb.h" #include "test/integration/http_protocol_integration.h" +#include "test/integration/ssl_utility.h" #include "test/test_common/registry.h" #include "absl/strings/str_cat.h" +using testing::InvokeWithoutArgs; + namespace Envoy { using testing::HasSubstr; @@ -357,4 +360,86 @@ TEST_P(OverloadScaledTimerIntegrationTest, CloseIdleHttpStream) { EXPECT_THAT(response->body(), HasSubstr("stream timeout")); } +TEST_P(OverloadScaledTimerIntegrationTest, TlsHandshakeTimeout) { + // Set up the Envoy to expect a TLS connection, with a 20 second timeout that can scale down to 5 + // seconds. + config_helper_.addSslConfig(); + config_helper_.addConfigModifier([&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) -> void { + auto* filter_chain = + bootstrap.mutable_static_resources()->mutable_listeners(0)->mutable_filter_chains(0); + auto* connect_timeout = filter_chain->mutable_transport_socket_connect_timeout(); + connect_timeout->set_seconds(20); + connect_timeout->set_nanos(0); + }); + initializeOverloadManager( + TestUtility::parseYaml(R"EOF( + timer_scale_factors: + - timer: TRANSPORT_SOCKET_CONNECT + min_timeout: 5s + )EOF")); + + // Set up a delinquent transport socket that causes the dispatcher to exit on every read & write + // instead of actually doing anything useful. + auto bad_transport_socket = std::make_unique>(); + Network::TransportSocketCallbacks* transport_callbacks; + EXPECT_CALL(*bad_transport_socket, setTransportSocketCallbacks) + .WillOnce(SaveArgAddress(&transport_callbacks)); + ON_CALL(*bad_transport_socket, doRead).WillByDefault(InvokeWithoutArgs([&] { + Buffer::OwnedImpl buffer; + transport_callbacks->connection().dispatcher().exit(); + // Read some amount of data; what's more important is whether the socket was remote-closed. That + // needs to be propagated to the socket. + return Network::IoResult{transport_callbacks->ioHandle().read(buffer, 2 * 1024).rc_ == 0 + ? Network::PostIoAction::Close + : Network::PostIoAction::KeepOpen, + 0, false}; + })); + ON_CALL(*bad_transport_socket, doWrite).WillByDefault(InvokeWithoutArgs([&] { + transport_callbacks->connection().dispatcher().exit(); + return Network::IoResult{Network::PostIoAction::KeepOpen, 0, false}; + })); + + ConnectionStatusCallbacks connect_callbacks; + Network::Address::InstanceConstSharedPtr address = + Ssl::getSslAddress(version_, lookupPort("http")); + auto bad_ssl_client = + dispatcher_->createClientConnection(address, Network::Address::InstanceConstSharedPtr(), + std::move(bad_transport_socket), nullptr); + bad_ssl_client->addConnectionCallbacks(connect_callbacks); + bad_ssl_client->enableHalfClose(true); + bad_ssl_client->connect(); + + // Run the dispatcher until it exits due to a read/write. + dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); + + // At this point, the connection should be idle but the SSL handshake isn't done. + EXPECT_FALSE(connect_callbacks.connected()); + + // Set the load so the timer is reduced but not to the minimum value. + updateResource(0.8); + test_server_->waitForGaugeGe("overload.envoy.overload_actions.reduce_timeouts.scale_percent", 50); + + // Advancing past the minimum time shouldn't close the connection, but it shouldn't complete it + // either. + timeSystem().advanceTimeWait(std::chrono::seconds(5)); + EXPECT_FALSE(connect_callbacks.connected()); + + // At this point, Envoy has been waiting for the (bad) client to finish the TLS handshake for 5 + // seconds. Increase the load so that the minimum time has now elapsed. This should cause Envoy to + // close the connection on its end. + updateResource(0.9); + test_server_->waitForGaugeEq("overload.envoy.overload_actions.reduce_timeouts.scale_percent", + 100); + + // The bad client will continue attempting to read, eventually noticing the remote close and + // closing the connection. + while (!connect_callbacks.closed()) { + dispatcher_->run(Event::Dispatcher::RunType::NonBlock); + } + + // The transport-level connection was never completed, and the connection was closed. + EXPECT_FALSE(connect_callbacks.connected()); + EXPECT_TRUE(connect_callbacks.closed()); +} + } // namespace Envoy diff --git a/test/server/overload_manager_impl_test.cc b/test/server/overload_manager_impl_test.cc index cc0e8b29e2878..09d58bfa89e0e 100644 --- a/test/server/overload_manager_impl_test.cc +++ b/test/server/overload_manager_impl_test.cc @@ -4,6 +4,7 @@ #include "envoy/config/overload/v3/overload.pb.h" #include "envoy/event/scaled_range_timer_manager.h" #include "envoy/server/overload/overload_manager.h" +#include "envoy/server/overload/thread_local_overload_state.h" #include "envoy/server/resource_monitor.h" #include "envoy/server/resource_monitor_config.h" @@ -494,7 +495,9 @@ constexpr char kReducedTimeoutsConfig[] = R"YAML( - timer: HTTP_DOWNSTREAM_CONNECTION_IDLE min_timeout: 2s - timer: HTTP_DOWNSTREAM_STREAM_IDLE - min_scale: { value: 10 } + min_scale: { value: 10 } # percent + - timer: TRANSPORT_SOCKET_CONNECT + min_scale: { value: 40 } # percent triggers: - name: "envoy.resource_monitors.fake_resource1" scaled: @@ -507,6 +510,7 @@ constexpr std::pair kReducedTimeoutsMinimu {TimerType::HttpDownstreamIdleConnectionTimeout, Event::AbsoluteMinimum(std::chrono::seconds(2))}, {TimerType::HttpDownstreamIdleStreamTimeout, Event::ScaledMinimum(UnitFloat(0.1))}, + {TimerType::TransportSocketConnectTimeout, Event::ScaledMinimum(UnitFloat(0.4))}, }; TEST_F(OverloadManagerImplTest, CreateScaledTimerManager) { auto manager(createOverloadManager(kReducedTimeoutsConfig));