-
Notifications
You must be signed in to change notification settings - Fork 5.4k
add all host map to priority set for fast host searching #17290
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
0738957
ea819e5
0f36195
62a06c9
c4115c0
67438ee
dbc35f4
45b4817
5497817
043dc35
d5f9c2f
74b4b43
ae250b8
eef4254
d8dd287
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -978,35 +978,37 @@ void ClusterManagerImpl::postThreadLocalClusterUpdate(ClusterManagerCluster& cm_ | |
| per_priority.overprovisioning_factor_ = host_set->overprovisioningFactor(); | ||
| } | ||
|
|
||
| tls_.runOnAllThreads( | ||
| [info = cm_cluster.cluster().info(), params = std::move(params), add_or_update_cluster, | ||
| load_balancer_factory](OptRef<ThreadLocalClusterManagerImpl> cluster_manager) { | ||
| ThreadLocalClusterManagerImpl::ClusterEntry* new_cluster = nullptr; | ||
| if (add_or_update_cluster) { | ||
| if (cluster_manager->thread_local_clusters_.count(info->name()) > 0) { | ||
| ENVOY_LOG(debug, "updating TLS cluster {}", info->name()); | ||
| } else { | ||
| ENVOY_LOG(debug, "adding TLS cluster {}", info->name()); | ||
| } | ||
| HostMapConstSharedPtr host_map = cm_cluster.cluster().prioritySet().crossPriorityHostMap(); | ||
|
|
||
| tls_.runOnAllThreads([info = cm_cluster.cluster().info(), params = std::move(params), | ||
| add_or_update_cluster, load_balancer_factory, map = std::move(host_map)]( | ||
| OptRef<ThreadLocalClusterManagerImpl> cluster_manager) { | ||
| ThreadLocalClusterManagerImpl::ClusterEntry* new_cluster = nullptr; | ||
| if (add_or_update_cluster) { | ||
| if (cluster_manager->thread_local_clusters_.count(info->name()) > 0) { | ||
| ENVOY_LOG(debug, "updating TLS cluster {}", info->name()); | ||
| } else { | ||
| ENVOY_LOG(debug, "adding TLS cluster {}", info->name()); | ||
| } | ||
|
|
||
| new_cluster = new ThreadLocalClusterManagerImpl::ClusterEntry(*cluster_manager, info, | ||
| load_balancer_factory); | ||
| cluster_manager->thread_local_clusters_[info->name()].reset(new_cluster); | ||
| } | ||
| new_cluster = new ThreadLocalClusterManagerImpl::ClusterEntry(*cluster_manager, info, | ||
| load_balancer_factory); | ||
| cluster_manager->thread_local_clusters_[info->name()].reset(new_cluster); | ||
| } | ||
|
|
||
| for (const auto& per_priority : params.per_priority_update_params_) { | ||
| cluster_manager->updateClusterMembership( | ||
| info->name(), per_priority.priority_, per_priority.update_hosts_params_, | ||
| per_priority.locality_weights_, per_priority.hosts_added_, | ||
| per_priority.hosts_removed_, per_priority.overprovisioning_factor_); | ||
| } | ||
| for (const auto& per_priority : params.per_priority_update_params_) { | ||
| cluster_manager->updateClusterMembership( | ||
| info->name(), per_priority.priority_, per_priority.update_hosts_params_, | ||
| per_priority.locality_weights_, per_priority.hosts_added_, per_priority.hosts_removed_, | ||
| per_priority.overprovisioning_factor_, map); | ||
| } | ||
|
|
||
| if (new_cluster != nullptr) { | ||
| for (auto& cb : cluster_manager->update_callbacks_) { | ||
| cb->onClusterAddOrUpdate(*new_cluster); | ||
| } | ||
| } | ||
| }); | ||
| if (new_cluster != nullptr) { | ||
| for (auto& cb : cluster_manager->update_callbacks_) { | ||
| cb->onClusterAddOrUpdate(*new_cluster); | ||
| } | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| void ClusterManagerImpl::postThreadLocalHealthFailure(const HostSharedPtr& host) { | ||
|
|
@@ -1243,14 +1245,15 @@ void ClusterManagerImpl::ThreadLocalClusterManagerImpl::removeHosts( | |
| void ClusterManagerImpl::ThreadLocalClusterManagerImpl::updateClusterMembership( | ||
| const std::string& name, uint32_t priority, PrioritySet::UpdateHostsParams update_hosts_params, | ||
| LocalityWeightsConstSharedPtr locality_weights, const HostVector& hosts_added, | ||
| const HostVector& hosts_removed, uint64_t overprovisioning_factor) { | ||
| const HostVector& hosts_removed, uint64_t overprovisioning_factor, | ||
| const HostMapConstSharedPtr& cross_priority_host_map) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think usually we'd pass a shared ptr by value and
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. get it. |
||
| ASSERT(thread_local_clusters_.find(name) != thread_local_clusters_.end()); | ||
| const auto& cluster_entry = thread_local_clusters_[name]; | ||
| ENVOY_LOG(debug, "membership update for TLS cluster {} added {} removed {}", name, | ||
| hosts_added.size(), hosts_removed.size()); | ||
| cluster_entry->priority_set_.updateHosts(priority, std::move(update_hosts_params), | ||
| std::move(locality_weights), hosts_added, hosts_removed, | ||
| overprovisioning_factor); | ||
| overprovisioning_factor, cross_priority_host_map); | ||
|
|
||
| // If an LB is thread aware, create a new worker local LB on membership changes. | ||
| if (cluster_entry->lb_factory_ != nullptr) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,7 +47,6 @@ EdsClusterImpl::EdsClusterImpl( | |
| void EdsClusterImpl::startPreInit() { subscription_->start({cluster_name_}); } | ||
|
|
||
| void EdsClusterImpl::BatchUpdateHelper::batchUpdate(PrioritySet::HostUpdateCb& host_update_cb) { | ||
| absl::flat_hash_map<std::string, HostSharedPtr> updated_hosts; | ||
| absl::flat_hash_set<std::string> all_new_hosts; | ||
| PriorityStateManager priority_state_manager(parent_, parent_.local_info_, &host_update_cb); | ||
| for (const auto& locality_lb_endpoint : cluster_load_assignment_.endpoints()) { | ||
|
|
@@ -57,6 +56,11 @@ void EdsClusterImpl::BatchUpdateHelper::batchUpdate(PrioritySet::HostUpdateCb& h | |
|
|
||
| for (const auto& lb_endpoint : locality_lb_endpoint.lb_endpoints()) { | ||
| auto address = parent_.resolveProtoAddress(lb_endpoint.endpoint().address()); | ||
| // When the configuration contains duplicate hosts, only the first one will be retained. | ||
| if (all_new_hosts.count(address->asString()) > 0) { | ||
| continue; | ||
| } | ||
|
Comment on lines
+58
to
+61
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this an unrelated bug fix? Or is this somehow required with this new code? |
||
|
|
||
| priority_state_manager.registerHostForPriority(lb_endpoint.endpoint().hostname(), address, | ||
| locality_lb_endpoint, lb_endpoint, | ||
| parent_.time_source_); | ||
|
|
@@ -67,6 +71,11 @@ void EdsClusterImpl::BatchUpdateHelper::batchUpdate(PrioritySet::HostUpdateCb& h | |
| // Track whether we rebuilt any LB structures. | ||
| bool cluster_rebuilt = false; | ||
|
|
||
| // Get the map of all the latest existing hosts, which is used to filter out the existing | ||
| // hosts in the process of updating cluster memberships. | ||
| HostMapConstSharedPtr all_hosts = parent_.prioritySet().crossPriorityHostMap(); | ||
| ASSERT(all_hosts != nullptr); | ||
|
|
||
| const uint32_t overprovisioning_factor = PROTOBUF_GET_WRAPPED_OR_DEFAULT( | ||
| cluster_load_assignment_.policy(), overprovisioning_factor, kDefaultOverProvisioningFactor); | ||
|
|
||
|
|
@@ -81,13 +90,13 @@ void EdsClusterImpl::BatchUpdateHelper::batchUpdate(PrioritySet::HostUpdateCb& h | |
| if (priority_state[i].first != nullptr) { | ||
| cluster_rebuilt |= parent_.updateHostsPerLocality( | ||
| i, overprovisioning_factor, *priority_state[i].first, parent_.locality_weights_map_[i], | ||
| priority_state[i].second, priority_state_manager, updated_hosts, all_new_hosts); | ||
| priority_state[i].second, priority_state_manager, *all_hosts, all_new_hosts); | ||
| } else { | ||
| // If the new update contains a priority with no hosts, call the update function with an empty | ||
| // set of hosts. | ||
| cluster_rebuilt |= parent_.updateHostsPerLocality( | ||
| i, overprovisioning_factor, {}, parent_.locality_weights_map_[i], empty_locality_map, | ||
| priority_state_manager, updated_hosts, all_new_hosts); | ||
| priority_state_manager, *all_hosts, all_new_hosts); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -100,11 +109,9 @@ void EdsClusterImpl::BatchUpdateHelper::batchUpdate(PrioritySet::HostUpdateCb& h | |
| } | ||
| cluster_rebuilt |= parent_.updateHostsPerLocality( | ||
| i, overprovisioning_factor, {}, parent_.locality_weights_map_[i], empty_locality_map, | ||
| priority_state_manager, updated_hosts, all_new_hosts); | ||
| priority_state_manager, *all_hosts, all_new_hosts); | ||
| } | ||
|
|
||
| parent_.all_hosts_ = std::move(updated_hosts); | ||
|
|
||
| if (!cluster_rebuilt) { | ||
| parent_.info_->stats().update_no_rebuild_.inc(); | ||
| } | ||
|
|
@@ -227,18 +234,12 @@ void EdsClusterImpl::reloadHealthyHostsHelper(const HostSharedPtr& host) { | |
| HostSetImpl::partitionHosts(hosts_copy, hosts_per_locality_copy), | ||
| host_set->localityWeights(), {}, hosts_to_remove, absl::nullopt); | ||
| } | ||
|
|
||
| if (host_to_exclude != nullptr) { | ||
| ASSERT(all_hosts_.find(host_to_exclude->address()->asString()) != all_hosts_.end()); | ||
| all_hosts_.erase(host_to_exclude->address()->asString()); | ||
| } | ||
| } | ||
|
|
||
| bool EdsClusterImpl::updateHostsPerLocality( | ||
| const uint32_t priority, const uint32_t overprovisioning_factor, const HostVector& new_hosts, | ||
| LocalityWeightsMap& locality_weights_map, LocalityWeightsMap& new_locality_weights_map, | ||
| PriorityStateManager& priority_state_manager, | ||
| absl::flat_hash_map<std::string, HostSharedPtr>& updated_hosts, | ||
| PriorityStateManager& priority_state_manager, const HostMap& all_hosts, | ||
| const absl::flat_hash_set<std::string>& all_new_hosts) { | ||
| const auto& host_set = priority_set_.getOrCreateHostSet(priority, overprovisioning_factor); | ||
| HostVectorSharedPtr current_hosts_copy(new HostVector(host_set.hosts())); | ||
|
|
@@ -255,9 +256,8 @@ bool EdsClusterImpl::updateHostsPerLocality( | |
| // performance implications, since this has the knock on effect that we rebuild the load balancers | ||
| // and locality scheduler. See the comment in BaseDynamicClusterImpl::updateDynamicHostList | ||
| // about this. In the future we may need to do better here. | ||
| const bool hosts_updated = | ||
| updateDynamicHostList(new_hosts, *current_hosts_copy, hosts_added, hosts_removed, | ||
| updated_hosts, all_hosts_, all_new_hosts); | ||
| const bool hosts_updated = updateDynamicHostList(new_hosts, *current_hosts_copy, hosts_added, | ||
| hosts_removed, all_hosts, all_new_hosts); | ||
| if (hosts_updated || host_set.overprovisioningFactor() != overprovisioning_factor || | ||
| locality_weights_map != new_locality_weights_map) { | ||
| ASSERT(std::all_of(current_hosts_copy->begin(), current_hosts_copy->end(), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,7 +117,6 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() { | |
| if (status == Network::DnsResolver::ResolutionStatus::Success) { | ||
| parent_.info_->stats().update_success_.inc(); | ||
|
|
||
| HostMap updated_hosts; | ||
| HostVector new_hosts; | ||
| std::chrono::seconds ttl_refresh_rate = std::chrono::seconds::max(); | ||
| absl::flat_hash_set<std::string> all_new_hosts; | ||
|
|
@@ -127,33 +126,44 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() { | |
| // potentially move port handling into the DNS interface itself, which would work better | ||
| // for SRV. | ||
| ASSERT(resp.address_ != nullptr); | ||
| auto address = Network::Utility::getAddressWithPort(*(resp.address_), port_); | ||
| if (all_new_hosts.count(address->asString()) > 0) { | ||
| continue; | ||
| } | ||
|
Comment on lines
+130
to
+132
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same question? |
||
|
|
||
| new_hosts.emplace_back(new HostImpl( | ||
| parent_.info_, hostname_, | ||
| Network::Utility::getAddressWithPort(*(resp.address_), port_), | ||
| parent_.info_, hostname_, address, | ||
| // TODO(zyfjeff): Created through metadata shared pool | ||
| std::make_shared<const envoy::config::core::v3::Metadata>(lb_endpoint_.metadata()), | ||
| lb_endpoint_.load_balancing_weight().value(), locality_lb_endpoints_.locality(), | ||
| lb_endpoint_.endpoint().health_check_config(), locality_lb_endpoints_.priority(), | ||
| lb_endpoint_.health_status(), parent_.time_source_)); | ||
| all_new_hosts.emplace(new_hosts.back()->address()->asString()); | ||
| all_new_hosts.emplace(address->asString()); | ||
| ttl_refresh_rate = min(ttl_refresh_rate, resp.ttl_); | ||
| } | ||
|
|
||
| HostVector hosts_added; | ||
| HostVector hosts_removed; | ||
| if (parent_.updateDynamicHostList(new_hosts, hosts_, hosts_added, hosts_removed, | ||
| updated_hosts, all_hosts_, all_new_hosts)) { | ||
| all_hosts_, all_new_hosts)) { | ||
| ENVOY_LOG(debug, "DNS hosts have changed for {}", dns_address_); | ||
| ASSERT(std::all_of(hosts_.begin(), hosts_.end(), [&](const auto& host) { | ||
| return host->priority() == locality_lb_endpoints_.priority(); | ||
| })); | ||
|
|
||
| // Update host map for current resolve target. | ||
| for (const auto& host : hosts_removed) { | ||
| all_hosts_.erase(host->address()->asString()); | ||
| } | ||
| for (const auto& host : hosts_added) { | ||
| all_hosts_.insert({host->address()->asString(), host}); | ||
| } | ||
|
|
||
| parent_.updateAllHosts(hosts_added, hosts_removed, locality_lb_endpoints_.priority()); | ||
| } else { | ||
| parent_.info_->stats().update_no_rebuild_.inc(); | ||
| } | ||
|
|
||
| all_hosts_ = std::move(updated_hosts); | ||
|
|
||
| // reset failure backoff strategy because there was a success. | ||
| parent_.failure_backoff_strategy_->reset(); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,6 +41,10 @@ class StrictDnsClusterImpl : public BaseDynamicClusterImpl { | |
| const uint32_t port_; | ||
| const Event::TimerPtr resolve_timer_; | ||
| HostVector hosts_; | ||
|
|
||
| // All host map for current resolve target. When we have multiple resolve targets, multiple | ||
| // targets may contain two different host objects with the same address. This host map cannot be | ||
| // replaced by the read only all host map in the priority set. | ||
| HostMap all_hosts_; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there anything that would prevent someone from using the shared map for strict DNS clusters? Seems like it would not be valid?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The shared map is still worked for strict DNS clusters. However, we cannot use shared map to replace this Because strict DNS cluster will remove duplicate hosts in the resolve target. But if multiple resolve targets contain hosts with the same address, these hosts will exist at the same time. As a global host map, shared map cannot reserve two different hosts with the same address for two different resolve targets, nor can it keep the logic of strict DNS cluster unchanged.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess my concern would be around people trying to implement sticky LB for strict DNS but running into weird edge cases: suppose they use regular LB to select an endpoint from the STRICT_DNS cluster and wants to pin this lb selection This works fine if there is only one host per ip within the cluster, but if this is configured with multiple resolve targets that resolve to the same ip, some of which have different I don't think we need to fix this, but perhaps some documentation around this limitation with the strict dns cluster would be good?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, this is a known limitation. I will add some new comments to illustrate this problem. |
||
| }; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Maybe call it host_map within the lambda as well?
mapseems a bit too genericThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
get it.