Skip to content
Closed
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
b882bde
cluster manager: avoid immediate activation for dynamic inserted clus…
Shikugawa Aug 22, 2020
7e9332d
update count
Shikugawa Sep 13, 2020
8e99e81
cleanup
Shikugawa Sep 30, 2020
478f578
sds: keep warming when dynamic inserted cluster can't be extracted se…
Shikugawa Oct 1, 2020
b4af995
resolve conflict
Shikugawa Oct 1, 2020
8cda02f
add unit test
Shikugawa Oct 1, 2020
5841c9e
typo
Shikugawa Oct 1, 2020
99276bc
fix build
Shikugawa Oct 1, 2020
6e95a07
test coverage
Shikugawa Oct 2, 2020
3c600ec
Kick CI
Shikugawa Oct 2, 2020
fa718a9
resolve conflict
Shikugawa Oct 8, 2020
cb5a373
add comment
Shikugawa Oct 8, 2020
456b5b9
build fix
Shikugawa Oct 8, 2020
b77dae0
merge
Shikugawa Oct 16, 2020
c24b729
Merge branch 'master' of https://github.com/envoyproxy/envoy into fix…
Shikugawa Oct 17, 2020
476d706
transport socket abstraction
Shikugawa Oct 18, 2020
96a71c4
coverage
Shikugawa Oct 19, 2020
3a2cfea
simpler
Shikugawa Oct 21, 2020
34ea49c
coverage
Shikugawa Oct 22, 2020
b2b99be
cleanup and coverage
Shikugawa Oct 22, 2020
5bbae5f
Kick CI
Shikugawa Oct 22, 2020
1e4f950
follow up
Shikugawa Oct 22, 2020
31354e4
ci
Shikugawa Oct 22, 2020
39606e6
Merge branch 'master' of https://github.com/envoyproxy/envoy into fix…
Shikugawa Oct 22, 2020
0ad2773
reorder
Shikugawa Oct 22, 2020
186c305
add runtime feature flag
Shikugawa Oct 26, 2020
5f83864
typo
Shikugawa Oct 26, 2020
1cdfbe6
follow up
Shikugawa Oct 26, 2020
0e237a1
fix nullptr reference
Shikugawa Nov 4, 2020
2a46a40
Merge branch 'master' of https://github.com/envoyproxy/envoy into fix…
Shikugawa Nov 4, 2020
231bfff
fix
Shikugawa Nov 4, 2020
3c6902f
fix
Shikugawa Nov 4, 2020
d05a1a3
Merge remote-tracking branch 'upstream/master' into sds_fix
lizan Nov 6, 2020
a4fcf00
fix transport socket match and optimize flow
lizan Nov 6, 2020
1575c36
fix test
lizan Nov 10, 2020
a18c4a6
style fix
lizan Nov 10, 2020
9fb4528
Merge remote-tracking branch 'upstream/master' into sds_fix
lizan Nov 10, 2020
fb056ad
coverage
lizan Nov 10, 2020
31bfe83
Kick CI
Shikugawa Nov 11, 2020
6ff7ae2
check all transport socket factories
lizan Nov 12, 2020
d1cf6a2
Merge branch 'fix-sds-activate-timing' of https://github.com/Shikugaw…
lizan Nov 12, 2020
37a272f
fix wording
lizan Nov 12, 2020
c83b7c8
fix test
lizan Nov 12, 2020
85639eb
update comment
lizan Nov 13, 2020
edead46
spell
lizan Nov 13, 2020
150a789
fix comments
Shikugawa Nov 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/envoy/network/transport_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,11 @@ class TransportSocketFactory {
createTransportSocket(TransportSocketOptionsSharedPtr options) const PURE;

/**
* Check whether matched transport socket which required to use secret information is available.
*/
virtual bool isReady() const PURE;

/*
* @return bool whether the transport socket will use proxy protocol options.
*/
virtual bool usesProxyProtocolOptions() const PURE;
Expand Down
5 changes: 5 additions & 0 deletions include/envoy/ssl/context_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ class ClientContextConfig : public virtual ContextConfig {
* for names.
*/
virtual const std::string& signingAlgorithmsForTest() const PURE;

/**
* Check whether TLS certificate entity and certificate validation context entity is available
*/
virtual bool isSecretReady() const PURE;
};

using ClientContextConfigPtr = std::unique_ptr<ClientContextConfig>;
Expand Down
5 changes: 5 additions & 0 deletions include/envoy/upstream/host_description.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ class TransportSocketMatcher {
* @return the match information of the transport socket selected.
*/
virtual MatchData resolve(const envoy::config::core::v3::Metadata* metadata) const PURE;

/**
* Check if all transport socket factories are ready.
*/
virtual bool factoriesReady() const PURE;
};

using TransportSocketMatcherPtr = std::unique_ptr<TransportSocketMatcher>;
Expand Down
2 changes: 2 additions & 0 deletions source/common/network/raw_buffer_socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,7 @@ RawBufferSocketFactory::createTransportSocket(TransportSocketOptionsSharedPtr) c
}

bool RawBufferSocketFactory::implementsSecureTransport() const { return false; }

bool RawBufferSocketFactory::isReady() const { return true; }
} // namespace Network
} // namespace Envoy
1 change: 1 addition & 0 deletions source/common/network/raw_buffer_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class RawBufferSocketFactory : public TransportSocketFactory {
// Network::TransportSocketFactory
TransportSocketPtr createTransportSocket(TransportSocketOptionsSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool isReady() const override;
bool usesProxyProtocolOptions() const override { return false; }
};

Expand Down
3 changes: 2 additions & 1 deletion source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ constexpr const char* disabled_runtime_features[] = {
"envoy.reloadable_features.test_feature_false",
// gRPC Timeout header is missing (#13580)
"envoy.reloadable_features.ext_authz_measure_timeout_on_check_created",

// When a cluster can't extract secret entity by SDS, keep it warming and never activate.
"envoy.reloadable_features.cluster_keep_warming_no_secret_entity",
};

RuntimeFeatures::RuntimeFeatures() {
Expand Down
13 changes: 13 additions & 0 deletions source/common/upstream/cluster_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,19 @@ void ClusterManagerImpl::onClusterInit(Cluster& cluster) {
// We have a situation that clusters will be immediately active, such as static and primary
// cluster. So we must have this prevention logic here.
if (cluster_data != warming_clusters_.end()) {
// If there is no secret entity, currently supports only TLS Certificate and Validation
// Context, when it failed to extract them via SDS, it will fail to change cluster status from
// warming to active. In current implementation, there is no strategy to activate clusters
// which failed to initialize at once.
// TODO(shikugawa): To implement to be available by keeping warming after no-available secret
// entity behavior occurred. And remove
// `envoy.reloadable_features.cluster_keep_warming_no_secret_entity` runtime feature flag.
if (Runtime::runtimeFeatureEnabled(
"envoy.reloadable_features.cluster_keep_warming_no_secret_entity") &&
!cluster.info()->transportSocketMatcher().factoriesReady()) {
ENVOY_LOG(warn, "Failed to activate {}", cluster.info()->name());
return;

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does the cluster ever activate? I think this function never gets called again?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is keep the cluster warming unless it has the a valid secret entity since it can't be used in meaningful way, because the cluster will end up using NotReadySslSocket and return 503s always. That's what the TODO above is about (#13952) and the long term fix #13952

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But how can the cluster ever leave warming? It needs to get updated again by the management server?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, until #13777 (that's the TODO in the previous comment but I pasted wrong)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But stepping back, what does this change give us? If the cluster stays in warming and the server starts up, we will still get 503s? Or if a cluster gets stuck in warming even if the user sets a timeout, it seems like it would be extremely difficult to debug this?

What is the exact scenario we are trying to protect against? I assume the server came up and SDS is not ready during a CDS update? Because I don't think this woulds block initial server startup?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK that's fine, but I don't really understand what the implications of this are and whether it's going to cause more confusing behavior that is also hard to debug. Can you add more comments and we can discuss? Do we need stats for this also? I just don't know what we are dealing with here honestly.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this won't cause more confusing behavior. While I agree that the current behavior is still confusing because the cluster with not ready ssl will fail when it gets traffic. The problem that this PR fix is that Envoy won't advertise itself is ready in that case.

The runtime flag is off by default and will be removed when we have long term fix. I don't think we need a stats for this given we have a warn level log and the flag is off by default.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK can you add more comments and we can go from there? I think we are going in circles as I'm unclear on what the old/new behavior is. :)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattklein123 reworded the comments.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other issue, which I am not sure if it is addressed here or another PR, is that occasionally we see that Envoy fails to actually send an SDS request to the control plane, so the cluster never actually gets ready. This is exacerbated by the previous issue, because not only will we never be able to handle (tls) traffic, we will also mark ourselves as ready.

@howardjohn I have seen such an issue as well. Was this root caused to a bug in envoy?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't understand how this blocks initialization. Unless I am missing something, the ClusterManagerInitHelper will still complete initialization with this early return. This will cause workers to start, etc., leading to 503s. If this is not the case can you update the comments?

This will block warming -> active in a subsequent CDS update, which is marginally better, but I don't think it fixes the problem of server init?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add the additional comment from the offline:
server init determine decide if all clusters are initialized by number of elements in secondary_init_clusters_ and primary_init_clusters_
This early return doesn't skip removing that cluster from these two lists.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. That's what I thought.

}
clusterWarmingToActive(cluster.info()->name());
updateClusterCounts();
}
Expand Down
9 changes: 9 additions & 0 deletions source/common/upstream/transport_socket_match_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,14 @@ TransportSocketMatcherImpl::resolve(const envoy::config::core::v3::Metadata* met
return MatchData(*default_match_.factory, default_match_.stats, default_match_.name);
}

bool TransportSocketMatcherImpl::factoriesReady() const {
for (const auto& match : matches_) {
if (!match.factory->isReady()) {
return false;
}
}
return default_match_.factory->isReady();
}

} // namespace Upstream
} // namespace Envoy
1 change: 1 addition & 0 deletions source/common/upstream/transport_socket_match_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class TransportSocketMatcherImpl : public Logger::Loggable<Logger::Id::upstream>
Network::TransportSocketFactoryPtr& default_factory, Stats::Scope& stats_scope);

MatchData resolve(const envoy::config::core::v3::Metadata* metadata) const override;
bool factoriesReady() const override;

protected:
TransportSocketMatchStats generateStats(const std::string& prefix);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class QuicTransportSocketFactoryBase : public Network::TransportSocketFactory {
NOT_REACHED_GCOVR_EXCL_LINE;
}
bool implementsSecureTransport() const override { return true; }
bool isReady() const override { return true; }
bool usesProxyProtocolOptions() const override { return false; }
};

Expand Down
1 change: 1 addition & 0 deletions source/extensions/transport_sockets/alts/tsi_socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ TsiSocketFactory::createTransportSocket(Network::TransportSocketOptionsSharedPtr
return std::make_unique<TsiSocket>(handshaker_factory_, handshake_validator_);
}

bool TsiSocketFactory::isReady() const { return true; }
} // namespace Alts
} // namespace TransportSockets
} // namespace Extensions
Expand Down
1 change: 1 addition & 0 deletions source/extensions/transport_sockets/alts/tsi_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class TsiSocketFactory : public Network::TransportSocketFactory {
bool usesProxyProtocolOptions() const override { return false; }
Network::TransportSocketPtr
createTransportSocket(Network::TransportSocketOptionsSharedPtr options) const override;
bool isReady() const override;

private:
HandshakerFactory handshaker_factory_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,11 @@ bool UpstreamProxyProtocolSocketFactory::implementsSecureTransport() const {
return transport_socket_factory_->implementsSecureTransport();
}

bool UpstreamProxyProtocolSocketFactory::isReady() const {
return transport_socket_factory_->isReady();
}

} // namespace ProxyProtocol
} // namespace TransportSockets
} // namespace Extensions
} // namespace Envoy
} // namespace Envoy
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class UpstreamProxyProtocolSocketFactory : public Network::TransportSocketFactor
Network::TransportSocketPtr
createTransportSocket(Network::TransportSocketOptionsSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool isReady() const override;
bool usesProxyProtocolOptions() const override { return true; }

private:
Expand Down
2 changes: 2 additions & 0 deletions source/extensions/transport_sockets/tap/tap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ bool TapSocketFactory::implementsSecureTransport() const {
return transport_socket_factory_->implementsSecureTransport();
}

bool TapSocketFactory::isReady() const { return transport_socket_factory_->isReady(); }

bool TapSocketFactory::usesProxyProtocolOptions() const {
return transport_socket_factory_->usesProxyProtocolOptions();
}
Expand Down
1 change: 1 addition & 0 deletions source/extensions/transport_sockets/tap/tap.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class TapSocketFactory : public Network::TransportSocketFactory,
Network::TransportSocketPtr
createTransportSocket(Network::TransportSocketOptionsSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool isReady() const override;
bool usesProxyProtocolOptions() const override;

private:
Expand Down
16 changes: 13 additions & 3 deletions source/extensions/transport_sockets/tls/context_config_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,14 +169,14 @@ ContextConfigImpl::ContextConfigImpl(
const std::string& default_cipher_suites, const std::string& default_curves,
Server::Configuration::TransportSocketFactoryContext& factory_context)
: api_(factory_context.api()),
tls_certificate_providers_(getTlsCertificateConfigProviders(config, factory_context)),
certificate_validation_context_provider_(
getCertificateValidationContextConfigProvider(config, factory_context, &default_cvc_)),
alpn_protocols_(RepeatedPtrUtil::join(config.alpn_protocols(), ",")),
cipher_suites_(StringUtil::nonEmptyStringOrDefault(
RepeatedPtrUtil::join(config.tls_params().cipher_suites(), ":"), default_cipher_suites)),
ecdh_curves_(StringUtil::nonEmptyStringOrDefault(
RepeatedPtrUtil::join(config.tls_params().ecdh_curves(), ":"), default_curves)),
tls_certificate_providers_(getTlsCertificateConfigProviders(config, factory_context)),
certificate_validation_context_provider_(
getCertificateValidationContextConfigProvider(config, factory_context, &default_cvc_)),
min_protocol_version_(tlsVersionFromProto(config.tls_params().tls_minimum_protocol_version(),
default_min_protocol_version)),
max_protocol_version_(tlsVersionFromProto(config.tls_params().tls_maximum_protocol_version(),
Expand Down Expand Up @@ -367,6 +367,16 @@ ClientContextConfigImpl::ClientContextConfigImpl(
}
}

bool ClientContextConfigImpl::isSecretReady() const {
for (const auto& provider : tls_certificate_providers_) {
if (provider == nullptr || provider->secret() == nullptr) {
return false;
}
}
return certificate_validation_context_provider_ == nullptr ||
certificate_validation_context_provider_->secret() != nullptr;
}

const unsigned ServerContextConfigImpl::DEFAULT_MIN_VERSION = TLS1_VERSION;
const unsigned ServerContextConfigImpl::DEFAULT_MAX_VERSION = TLS1_3_VERSION;

Expand Down
17 changes: 9 additions & 8 deletions source/extensions/transport_sockets/tls/context_config_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ class ContextConfigImpl : public virtual Ssl::ContextConfig {
const std::string& default_cipher_suites, const std::string& default_curves,
Server::Configuration::TransportSocketFactoryContext& factory_context);
Api::Api& api_;
// If certificate validation context type is combined_validation_context. default_cvc_
// holds a copy of CombinedCertificateValidationContext::default_validation_context.
// Otherwise, default_cvc_ is nullptr.
std::unique_ptr<envoy::extensions::transport_sockets::tls::v3::CertificateValidationContext>
default_cvc_;
std::vector<Secret::TlsCertificateConfigProviderSharedPtr> tls_certificate_providers_;
Secret::CertificateValidationContextConfigProviderSharedPtr
certificate_validation_context_provider_;

private:
static unsigned tlsVersionFromProto(
Expand All @@ -81,16 +89,8 @@ class ContextConfigImpl : public virtual Ssl::ContextConfig {

std::vector<Ssl::TlsCertificateConfigImpl> tls_certificate_configs_;
Ssl::CertificateValidationContextConfigPtr validation_context_config_;
// If certificate validation context type is combined_validation_context. default_cvc_
// holds a copy of CombinedCertificateValidationContext::default_validation_context.
// Otherwise, default_cvc_ is nullptr.
std::unique_ptr<envoy::extensions::transport_sockets::tls::v3::CertificateValidationContext>
default_cvc_;
std::vector<Secret::TlsCertificateConfigProviderSharedPtr> tls_certificate_providers_;
// Handle for TLS certificate dynamic secret callback.
Envoy::Common::CallbackHandle* tc_update_callback_handle_{};
Secret::CertificateValidationContextConfigProviderSharedPtr
certificate_validation_context_provider_;
// Handle for certificate validation context dynamic secret callback.
Envoy::Common::CallbackHandle* cvc_update_callback_handle_{};
Envoy::Common::CallbackHandle* cvc_validation_callback_handle_{};
Expand Down Expand Up @@ -120,6 +120,7 @@ class ClientContextConfigImpl : public ContextConfigImpl, public Envoy::Ssl::Cli
bool allowRenegotiation() const override { return allow_renegotiation_; }
size_t maxSessionKeys() const override { return max_session_keys_; }
const std::string& signingAlgorithmsForTest() const override { return sigalgs_; }
bool isSecretReady() const override;

private:
static const unsigned DEFAULT_MIN_VERSION;
Expand Down
4 changes: 4 additions & 0 deletions source/extensions/transport_sockets/tls/ssl_socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ void ClientSslSocketFactory::onAddOrUpdateSecret() {
stats_.ssl_context_update_by_sds_.inc();
}

bool ClientSslSocketFactory::isReady() const { return config_->isSecretReady(); }

ServerSslSocketFactory::ServerSslSocketFactory(Envoy::Ssl::ServerContextConfigPtr config,
Envoy::Ssl::ContextManager& manager,
Stats::Scope& stats_scope,
Expand Down Expand Up @@ -424,6 +426,8 @@ void ServerSslSocketFactory::onAddOrUpdateSecret() {
stats_.ssl_context_update_by_sds_.inc();
}

bool ServerSslSocketFactory::isReady() const { return true; }

} // namespace Tls
} // namespace TransportSockets
} // namespace Extensions
Expand Down
3 changes: 3 additions & 0 deletions source/extensions/transport_sockets/tls/ssl_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,11 @@ class ClientSslSocketFactory : public Network::TransportSocketFactory,
ClientSslSocketFactory(Envoy::Ssl::ClientContextConfigPtr config,
Envoy::Ssl::ContextManager& manager, Stats::Scope& stats_scope);

// Network::TransportSocketFactory
Network::TransportSocketPtr
createTransportSocket(Network::TransportSocketOptionsSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool isReady() const override;
bool usesProxyProtocolOptions() const override { return false; }

// Secret::SecretCallbacks
Expand All @@ -134,6 +136,7 @@ class ServerSslSocketFactory : public Network::TransportSocketFactory,
Network::TransportSocketPtr
createTransportSocket(Network::TransportSocketOptionsSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool isReady() const override;
bool usesProxyProtocolOptions() const override { return false; }

// Secret::SecretCallbacks
Expand Down
2 changes: 2 additions & 0 deletions test/common/upstream/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ envoy_cc_test(
"//test/mocks/upstream:health_checker_mocks",
"//test/mocks/upstream:load_balancer_context_mock",
"//test/mocks/upstream:thread_aware_load_balancer_mocks",
"//test/test_common:test_runtime_lib",
"@envoy_api//envoy/admin/v3:pkg_cc_proto",
"@envoy_api//envoy/config/bootstrap/v3:pkg_cc_proto",
"@envoy_api//envoy/config/cluster/v3:pkg_cc_proto",
"@envoy_api//envoy/config/core/v3:pkg_cc_proto",
"@envoy_api//envoy/extensions/transport_sockets/tls/v3:pkg_cc_proto",
],
)

Expand Down
Loading