Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Bug Fixes

* access log: fix ``%UPSTREAM_CLUSTER%`` when used in http upstream access logs. Previously, it was always logging as an unset value.
* access log: fix ``%UPSTREAM_CLUSTER%`` when used in http upstream access logs. Previously, it was always logging as an unset value.
* active health checks: health checks using a TLS transport socket and secrets delivered via :ref:`SDS <config_secret_discovery_service>` will now wait until secrets are loaded before the first health check attempt. This should improve startup times by not having to wait for the :ref:`no_traffic_interval <envoy_v3_api_field_config.core.v3.HealthCheck.no_traffic_interval>` until the next attempt.
* aws request signer: fix the AWS Request Signer extension to correctly normalize the path and query string to be signed according to AWS' guidelines, so that the hash on the server side matches. See `AWS SigV4 documentaion <https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html>`_.
* cluster: delete pools when they're idle to fix unbounded memory use when using PROXY protocol upstream with tcp_proxy. This behavior can be temporarily reverted by setting the ``envoy.reloadable_features.conn_pool_delete_when_idle`` runtime guard to false.
* ext_authz: fix the ext_authz filter to correctly merge multiple same headers using the ',' as separator in the check request to the external authorization service.
Expand Down
7 changes: 7 additions & 0 deletions envoy/network/transport_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,13 @@ class TransportSocketFactory {
* negotiation.
*/
virtual bool supportsAlpn() const { return false; }

/**
* @param callback supplies a callback to be invoked when the secrets required by the created
* transport sockets are ready. Will be invoked immediately if no secrets are required or if they
* are already loaded.
*/
virtual void addReadyCb(std::function<void()> callback) PURE;
};

using TransportSocketFactoryPtr = std::unique_ptr<TransportSocketFactory>;
Expand Down
10 changes: 10 additions & 0 deletions envoy/upstream/upstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ class Host : virtual public HostDescription {
Network::TransportSocketOptionsConstSharedPtr transport_socket_options,
const envoy::config::core::v3::Metadata* metadata) const PURE;

/**
* Register a callback to be invoked when secrets are ready for the health
* checking transport socket that corresponds to the provided metadata.
* @param callback supplies the callback to be invoked.
* @param metadata supplies the metadata to be used for resolving transport socket matches.
*/
virtual void
addHealthCheckingReadyCb(std::function<void()> callback,
const envoy::config::core::v3::Metadata* metadata) const PURE;

/**
* @return host specific gauges.
*/
Expand Down
1 change: 1 addition & 0 deletions source/common/network/raw_buffer_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class RawBufferSocketFactory : public TransportSocketFactory {
createTransportSocket(TransportSocketOptionsConstSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override { return false; }
void addReadyCb(std::function<void()> callback) override { callback(); }
};

} // namespace Network
Expand Down
1 change: 1 addition & 0 deletions source/common/quic/quic_transport_socket_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class QuicTransportSocketFactoryBase : public Network::TransportSocketFactory,
bool implementsSecureTransport() const override { return true; }
bool usesProxyProtocolOptions() const override { return false; }
bool supportsAlpn() const override { return true; }
void addReadyCb(std::function<void()> callback) override { callback(); }

protected:
virtual void onSecretUpdated() PURE;
Expand Down
19 changes: 18 additions & 1 deletion source/common/upstream/health_checker_base_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ HealthCheckerImplBase::ActiveHealthCheckSession::ActiveHealthCheckSession(
HealthCheckerImplBase& parent, HostSharedPtr host)
: host_(host), parent_(parent),
interval_timer_(parent.dispatcher_.createTimer([this]() -> void { onIntervalBase(); })),
timeout_timer_(parent.dispatcher_.createTimer([this]() -> void { onTimeoutBase(); })) {
timeout_timer_(parent.dispatcher_.createTimer([this]() -> void { onTimeoutBase(); })),
lifetime_guard_(std::make_shared<uint32_t>(1)) {

if (!host->healthFlagGet(Host::HealthFlag::FAILED_ACTIVE_HC)) {
parent.incHealthy();
Expand Down Expand Up @@ -411,6 +412,22 @@ void HealthCheckerImplBase::ActiveHealthCheckSession::onTimeoutBase() {
handleFailure(envoy::data::core::v3::NETWORK_TIMEOUT);
}

void HealthCheckerImplBase::ActiveHealthCheckSession::start() {
// Start health checks only after secrets are ready for the transport socket that health checks
// will be performed on. If health checks start immediately, they may fail with "network" errors
// due to TLS credentials not yet being loaded, which can result in long startup times. The
// callback needs to make sure this ActiveHealthCheckSession wasn't deleted before starting the
// health check loop in case it takes a while for the socket to become ready.
std::weak_ptr<uint32_t> lifetime_guard = lifetime_guard_;
host_->addHealthCheckingReadyCb(
[this, lifetime_guard] {
if (lifetime_guard.lock()) {
onInitialInterval();
}
},
parent_.transportSocketMatchMetadata().get());
}

void HealthCheckerImplBase::ActiveHealthCheckSession::onInitialInterval() {
if (parent_.initial_jitter_.count() == 0) {
onIntervalBase();
Expand Down
6 changes: 5 additions & 1 deletion source/common/upstream/health_checker_base_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class HealthCheckerImplBase : public HealthChecker,
~ActiveHealthCheckSession() override;
HealthTransition setUnhealthy(envoy::data::core::v3::HealthCheckFailureType type);
void onDeferredDeleteBase();
void start() { onInitialInterval(); }
void start();

protected:
ActiveHealthCheckSession(HealthCheckerImplBase& parent, HostSharedPtr host);
Expand Down Expand Up @@ -107,6 +107,10 @@ class HealthCheckerImplBase : public HealthChecker,
uint32_t num_unhealthy_{};
uint32_t num_healthy_{};
bool first_check_{true};

// lifetime_guard_ is used to ensure health checks are not started via a callback after this
// ActiveHealthCheckSession has been deleted.
std::shared_ptr<uint32_t> lifetime_guard_{};
};

using ActiveHealthCheckSessionPtr = std::unique_ptr<ActiveHealthCheckSession>;
Expand Down
8 changes: 8 additions & 0 deletions source/common/upstream/upstream_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,14 @@ Network::ClientConnectionPtr HostImpl::createConnection(
return connection;
}

void HostImpl::addHealthCheckingReadyCb(std::function<void()> callback,
const envoy::config::core::v3::Metadata* metadata) const {
Network::TransportSocketFactory& factory =
(metadata != nullptr) ? resolveTransportSocketFactory(healthCheckAddress(), metadata)
: transportSocketFactory();
factory.addReadyCb(callback);
}

void HostImpl::weight(uint32_t new_weight) { weight_ = std::max(1U, new_weight); }

std::vector<HostsPerLocalityConstSharedPtr> HostsPerLocalityImpl::filter(
Expand Down
3 changes: 3 additions & 0 deletions source/common/upstream/upstream_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,9 @@ class HostImpl : public HostDescriptionImpl,
Network::TransportSocketOptionsConstSharedPtr transport_socket_options,
const envoy::config::core::v3::Metadata* metadata) const override;

void addHealthCheckingReadyCb(std::function<void()> callback,
const envoy::config::core::v3::Metadata* metadata) const override;

std::vector<std::pair<absl::string_view, Stats::PrimitiveGaugeReference>>
gauges() const override {
return stats().gauges();
Expand Down
2 changes: 2 additions & 0 deletions source/extensions/transport_sockets/alts/tsi_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ class TsiSocketFactory : public Network::TransportSocketFactory {
Network::TransportSocketPtr
createTransportSocket(Network::TransportSocketOptionsConstSharedPtr options) const override;

void addReadyCb(std::function<void()> callback) override { callback(); };

private:
HandshakerFactory handshaker_factory_;
HandshakeValidator handshake_validator_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class UpstreamProxyProtocolSocketFactory : public Network::TransportSocketFactor
createTransportSocket(Network::TransportSocketOptionsConstSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override { return true; }
void addReadyCb(std::function<void()> callback) override { callback(); };

private:
Network::TransportSocketFactoryPtr transport_socket_factory_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ class StartTlsSocketFactory : public Network::TransportSocketFactory,
bool implementsSecureTransport() const override { return false; }
bool usesProxyProtocolOptions() const override { return false; }

// TODO(mpuncel) only invoke callback() once secrets are ready.
void addReadyCb(std::function<void()> callback) override { callback(); }

private:
Network::TransportSocketFactoryPtr raw_socket_factory_;
Network::TransportSocketFactoryPtr tls_socket_factory_;
Expand Down
3 changes: 3 additions & 0 deletions source/extensions/transport_sockets/tap/tap.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class TapSocketFactory : public Network::TransportSocketFactory,
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override;

// TODO(mpuncel) only invoke callback() once secrets are ready.
void addReadyCb(std::function<void()> callback) override { callback(); };

private:
Network::TransportSocketFactoryPtr transport_socket_factory_;
};
Expand Down
57 changes: 57 additions & 0 deletions source/extensions/transport_sockets/tls/ssl_socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -379,13 +379,41 @@ bool ClientSslSocketFactory::implementsSecureTransport() const { return true; }

void ClientSslSocketFactory::onAddOrUpdateSecret() {
ENVOY_LOG(debug, "Secret is updated.");
bool should_run_callbacks = false;
{
absl::WriterMutexLock l(&ssl_ctx_mu_);
ssl_ctx_ = manager_.createSslClientContext(stats_scope_, *config_, ssl_ctx_);
if (ssl_ctx_) {
should_run_callbacks = true;
}
}

if (should_run_callbacks) {
absl::WriterMutexLock m(&secrets_ready_callbacks_mu_);
for (const auto& cb : secrets_ready_callbacks_) {
cb();
}
secrets_ready_callbacks_.clear();
}
stats_.ssl_context_update_by_sds_.inc();
}

void ClientSslSocketFactory::addReadyCb(std::function<void()> callback) {
bool immediately_run_callback = false;
{
absl::ReaderMutexLock l(&ssl_ctx_mu_);
if (ssl_ctx_) {
immediately_run_callback = true;
} else {
absl::WriterMutexLock m(&secrets_ready_callbacks_mu_);
secrets_ready_callbacks_.push_back(callback);
}
}
if (immediately_run_callback) {
callback();
}
}
Comment on lines +401 to +415
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider this change here and in ServerSslSocketFactory::addReadyCb(). This way the ssl_ctx lock could be released earlier.

Suggested change
void ClientSslSocketFactory::addReadyCb(std::function<void()> callback) {
bool immediately_run_callback = false;
{
absl::ReaderMutexLock l(&ssl_ctx_mu_);
if (ssl_ctx_) {
immediately_run_callback = true;
} else {
absl::WriterMutexLock m(&secrets_ready_callbacks_mu_);
secrets_ready_callbacks_.push_back(callback);
}
}
if (immediately_run_callback) {
callback();
}
}
void ClientSslSocketFactory::addReadyCb(std::function<void()> callback) {
bool immediately_run_callback = false;
{
absl::ReaderMutexLock l(&ssl_ctx_mu_);
if (ssl_ctx_) {
immediately_run_callback = true;
}
}
if (immediately_run_callback) {
callback();
} else {
absl::WriterMutexLock m(&secrets_ready_callbacks_mu_);
secrets_ready_callbacks_.push_back(callback);
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't I need to hold both locks when adding to the callbacks list? otherwise another thread could set ssl_ctx_mu and then not see this callback should be run

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the implications if the missed callback runs next time onAddOrUpdateSecret() is called?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the next onAddOrUpdateSecret() could potentially be hours or days later, e.g. served by an SDS server with a low certificate refresh rate. that would mean health checks don't start for that long and Envoy is stuck in a warming state

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, SGTM.


ServerSslSocketFactory::ServerSslSocketFactory(Envoy::Ssl::ServerContextConfigPtr config,
Envoy::Ssl::ContextManager& manager,
Stats::Scope& stats_scope,
Expand Down Expand Up @@ -425,13 +453,42 @@ bool ServerSslSocketFactory::implementsSecureTransport() const { return true; }

void ServerSslSocketFactory::onAddOrUpdateSecret() {
ENVOY_LOG(debug, "Secret is updated.");
bool should_run_callbacks = false;
{
absl::WriterMutexLock l(&ssl_ctx_mu_);
ssl_ctx_ = manager_.createSslServerContext(stats_scope_, *config_, server_names_, ssl_ctx_);

if (ssl_ctx_) {
should_run_callbacks = true;
}
}

if (should_run_callbacks) {
absl::WriterMutexLock l(&secrets_ready_callbacks_mu_);
for (const auto& cb : secrets_ready_callbacks_) {
cb();
}
secrets_ready_callbacks_.clear();
}
stats_.ssl_context_update_by_sds_.inc();
}

void ServerSslSocketFactory::addReadyCb(std::function<void()> callback) {
bool immediately_run_callback = false;
{
absl::ReaderMutexLock l(&ssl_ctx_mu_);
if (ssl_ctx_) {
immediately_run_callback = true;
} else {
absl::WriterMutexLock m(&secrets_ready_callbacks_mu_);
secrets_ready_callbacks_.push_back(callback);
}
}
if (immediately_run_callback) {
callback();
}
}

} // namespace Tls
} // namespace TransportSockets
} // namespace Extensions
Expand Down
10 changes: 10 additions & 0 deletions source/extensions/transport_sockets/tls/ssl_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ class ClientSslSocketFactory : public Network::TransportSocketFactory,
bool usesProxyProtocolOptions() const override { return false; }
bool supportsAlpn() const override { return true; }

void addReadyCb(std::function<void()> callback) override;

// Secret::SecretCallbacks
void onAddOrUpdateSecret() override;

Expand All @@ -126,6 +128,9 @@ class ClientSslSocketFactory : public Network::TransportSocketFactory,
Envoy::Ssl::ClientContextConfigPtr config_;
mutable absl::Mutex ssl_ctx_mu_;
Envoy::Ssl::ClientContextSharedPtr ssl_ctx_ ABSL_GUARDED_BY(ssl_ctx_mu_);
mutable absl::Mutex secrets_ready_callbacks_mu_;
std::list<std::function<void()>>
secrets_ready_callbacks_ ABSL_GUARDED_BY(secrets_ready_callbacks_mu_);
};

class ServerSslSocketFactory : public Network::TransportSocketFactory,
Expand All @@ -141,6 +146,8 @@ class ServerSslSocketFactory : public Network::TransportSocketFactory,
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override { return false; }

void addReadyCb(std::function<void()> callback) override;

// Secret::SecretCallbacks
void onAddOrUpdateSecret() override;

Expand All @@ -152,6 +159,9 @@ class ServerSslSocketFactory : public Network::TransportSocketFactory,
const std::vector<std::string> server_names_;
mutable absl::Mutex ssl_ctx_mu_;
Envoy::Ssl::ServerContextSharedPtr ssl_ctx_ ABSL_GUARDED_BY(ssl_ctx_mu_);
mutable absl::Mutex secrets_ready_callbacks_mu_;
std::list<std::function<void()>>
secrets_ready_callbacks_ ABSL_GUARDED_BY(secrets_ready_callbacks_mu_);
};

} // namespace Tls
Expand Down
57 changes: 51 additions & 6 deletions test/common/upstream/health_checker_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,8 @@ TEST_F(HttpHealthCheckerImplTest, TlsOptions) {
Network::TransportSocketFactoryPtr(socket_factory));
cluster_->info_->transport_socket_matcher_.reset(transport_socket_match);

EXPECT_CALL(*socket_factory, addReadyCb(_))
.WillOnce(Invoke([&](std::function<void()> callback) -> void { callback(); }));
EXPECT_CALL(*socket_factory, createTransportSocket(ApplicationProtocolListEq("http1")));

allocHealthChecker(yaml);
Expand Down Expand Up @@ -2582,13 +2584,19 @@ TEST_F(HttpHealthCheckerImplTest, TransportSocketMatchCriteria) {
ALL_TRANSPORT_SOCKET_MATCH_STATS(POOL_COUNTER_PREFIX(stats_store, "test"))};
auto health_check_only_socket_factory = std::make_unique<Network::MockTransportSocketFactory>();

// We expect resolve() to be called twice, once for endpoint socket matching (with no metadata in
// this test) and once for health check socket matching. In the latter we expect metadata that
// matches the above object.
// We expect resolve() to be called 3 times, once for endpoint socket matching (with no metadata
// in this test) and twice for health check socket matching (once for checking if secrets are
// ready on the transport socket, and again for actually getting the health check transport socket
// to create a connection). In the latter 2 calls, we expect metadata that matches the above
// object.
EXPECT_CALL(*transport_socket_match, resolve(nullptr));
EXPECT_CALL(*transport_socket_match, resolve(MetadataEq(metadata)))
.WillOnce(Return(TransportSocketMatcher::MatchData(
*health_check_only_socket_factory, health_transport_socket_stats, "health_check_only")));
.Times(2)
.WillRepeatedly(Return(TransportSocketMatcher::MatchData(
*health_check_only_socket_factory, health_transport_socket_stats, "health_check_only")))
.RetiresOnSaturation();
EXPECT_CALL(*health_check_only_socket_factory, addReadyCb(_))
.WillOnce(Invoke([&](std::function<void()> callback) -> void { callback(); }));
// The health_check_only_socket_factory should be used to create a transport socket for the health
// check connection.
EXPECT_CALL(*health_check_only_socket_factory, createTransportSocket(_));
Expand All @@ -2604,7 +2612,11 @@ TEST_F(HttpHealthCheckerImplTest, TransportSocketMatchCriteria) {
expectStreamCreate(0);
EXPECT_CALL(*test_sessions_[0]->timeout_timer_, enableTimer(_, _));
health_checker_->start();
EXPECT_EQ(health_transport_socket_stats.total_match_count_.value(), 1);

// We expect 2 transport socket matches: one for when
// addHealthCheckingReadyCb() evaluates the match to register a callback on
// the socket, and once when the health checks are actually performed.
EXPECT_EQ(health_transport_socket_stats.total_match_count_.value(), 2);
}

TEST_F(HttpHealthCheckerImplTest, NoTransportSocketMatchCriteria) {
Expand All @@ -2624,6 +2636,9 @@ TEST_F(HttpHealthCheckerImplTest, NoTransportSocketMatchCriteria) {
)EOF";

auto default_socket_factory = std::make_unique<Network::MockTransportSocketFactory>();

EXPECT_CALL(*default_socket_factory, addReadyCb(_))
.WillOnce(Invoke([&](std::function<void()> callback) -> void { callback(); }));
// The default_socket_factory should be used to create a transport socket for the health check
// connection.
EXPECT_CALL(*default_socket_factory, createTransportSocket(_));
Expand Down Expand Up @@ -3116,6 +3131,36 @@ TEST_F(HttpHealthCheckerImplTest, ServiceNameMismatch) {
cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health());
}

// Test that the underlying transport socket becoming ready after the health check session
// is destroyed doesn't attempt to start health checks.
TEST_F(HttpHealthCheckerImplTest, NoHealthCheckAfterSessionDestroyed) {
auto default_socket_factory = std::make_unique<Network::MockTransportSocketFactory>();
std::function<void()> callback = nullptr;

// Capture the callback to addReadyCb. We will invoke it later once the session is destroyed.
EXPECT_CALL(*default_socket_factory, addReadyCb(_))
.WillOnce(Invoke([&](std::function<void()> cb) -> void { callback = cb; }));

auto transport_socket_match =
std::make_unique<Upstream::MockTransportSocketMatcher>(std::move(default_socket_factory));
EXPECT_CALL(*transport_socket_match, resolve(nullptr));
cluster_->info_->transport_socket_matcher_ = std::move(transport_socket_match);

setupNoServiceValidationHC();

cluster_->prioritySet().getMockHostSet(0)->hosts_ = {
makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())};

health_checker_->start();

// Destroy the health checker object.
health_checker_.reset();

// Call the callback that would have started health checks had the health checker object not
// destroyed. This should not segfault or otherwise attempt to start health checking.
callback();
}

TEST_F(ProdHttpHealthCheckerTest, ProdHttpHealthCheckerH2HealthChecking) {
setupNoServiceValidationHCWithHttp2();
EXPECT_EQ(Http::CodecType::HTTP2,
Expand Down
Loading