envoyproxy · mattklein123 · Jul 31, 2018 · Jul 23, 2018 · Jul 24, 2018 · Jul 24, 2018
diff --git a/api/envoy/api/v2/cds.proto b/api/envoy/api/v2/cds.proto
@@ -432,6 +432,12 @@ message Cluster {
       ZoneAwareLbConfig zone_aware_lb_config = 2;
       LocalityWeightedLbConfig locality_weighted_lb_config = 3;
     }
+    // If set, membership and healthcheck updates that happen within this duration will be coalesced
+    // and delivered in one shot when the duration expires. The start of the duration is when the
+    // first update happens. This is useful for big clusters, with potentially noisy deploys that
+    // might trigger excessive CPU usage due to a constant stream of healthcheck state changes or
+    // membership updates.
+    google.protobuf.Duration time_between_updates = 4;
   }
 
   // Common configuration for all load balancer implementations.

diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst
@@ -48,6 +48,8 @@ Version history
 * upstream: added configuration option to the subset load balancer to take locality weights into account when
   selecting a host from a subset.
 * access log: added RESPONSE_DURATION and RESPONSE_TX_DURATION.
+* cluster: added :ref:`option <envoy_api_field_Cluster.CommonLbConfig.time_between_updates>` to coalesce updates
+  within the given duration.
 
 1.7.0
 ===============

diff --git a/source/common/upstream/cluster_manager_impl.cc b/source/common/upstream/cluster_manager_impl.cc
@@ -180,7 +180,7 @@ ClusterManagerImpl::ClusterManagerImpl(const envoy::config::bootstrap::v2::Boots
       init_helper_([this](Cluster& cluster) { onClusterInit(cluster); }),
       config_tracker_entry_(
           admin.getConfigTracker().add("clusters", [this] { return dumpClusterConfigs(); })),
-      system_time_source_(system_time_source) {
+      system_time_source_(system_time_source), dispatcher_(main_thread_dispatcher) {
   async_client_manager_ = std::make_unique<Grpc::AsyncClientManagerImpl>(*this, tls);
   const auto& cm_config = bootstrap.cluster_manager();
   if (cm_config.has_outlier_detection()) {
@@ -330,7 +330,13 @@ void ClusterManagerImpl::onClusterInit(Cluster& cluster) {
                                                            const HostVector& hosts_removed) {
     // This fires when a cluster is about to have an updated member set. We need to send this
     // out to all of the thread local configurations.
-    postThreadLocalClusterUpdate(cluster, priority, hosts_added, hosts_removed);
+
+    // Should we coalesce updates?
+    if (cluster.info()->lbConfig().has_time_between_updates()) {
+      scheduleUpdate(cluster, priority, hosts_added, hosts_removed);
+    } else {
+      postThreadLocalClusterUpdate(cluster, priority, hosts_added, hosts_removed);
+    }
   });
 
   // Finally, if the cluster has any hosts, post updates cross-thread so the per-thread load
@@ -343,6 +349,61 @@ void ClusterManagerImpl::onClusterInit(Cluster& cluster) {
   }
 }
 
+void ClusterManagerImpl::scheduleUpdate(const Cluster& cluster, uint32_t priority,
+                                        const HostVector& hosts_added,
+                                        const HostVector& hosts_removed) {
+  PendingUpdatesByPriorityMapPtr updates_by_prio;
+  PendingUpdatesPtr updates;
+
+  // Find pending updates for this cluster.
+  auto updates_by_prio_it = updates_map_.find(cluster.info()->name());
+  if (updates_by_prio_it != updates_map_.end()) {
+    updates_by_prio = updates_by_prio_it->second;
+  } else {
+    updates_by_prio = std::make_shared<PendingUpdatesByPriorityMap>();
+    updates_map_[cluster.info()->name()] = updates_by_prio;
+  }
+
+  // Find pending updates for this priority.
+  auto updates_it = updates_by_prio->find(priority);
+  if (updates_it != updates_by_prio->end()) {
+    updates = updates_it->second;
+  } else {
+    updates = std::make_shared<PendingUpdates>();
+    (*updates_by_prio)[priority] = updates;
+  }
+
+  // Record the updates that should be applied when the timer fires.
+  updates->added.insert(hosts_added.begin(), hosts_added.end());
+  updates->removed.insert(hosts_removed.begin(), hosts_removed.end());
+
+  // If there's no timer, create one.
+  if (updates->timer == nullptr) {
+    updates->timer = dispatcher_.createTimer([this, &cluster, priority, &updates]() -> void {
+      applyUpdates(cluster, priority, updates);
+    });
+    const auto& time_between_updates = cluster.info()->lbConfig().time_between_updates();
+    const auto timeout = DurationUtil::durationToMilliseconds(time_between_updates);
+    updates->timer->enableTimer(std::chrono::milliseconds(timeout));
+  }
+}
+
+void ClusterManagerImpl::applyUpdates(const Cluster& cluster, uint32_t priority,
+                                      PendingUpdatesPtr updates) {
+  // Merge pending updates & deliver.
+  const HostVector& hosts_added{updates->added.begin(), updates->added.end()};
+  const HostVector& hosts_removed{updates->removed.begin(), updates->removed.end()};
+
+  postThreadLocalClusterUpdate(cluster, priority, hosts_added, hosts_removed);
+
+  cm_stats_.coalesced_updates_.inc();
+
+  // Reset everything.
+  updates->timer = nullptr;
+  updates->added.clear();
+  updates->removed.clear();
+}
+
 bool ClusterManagerImpl::addOrUpdateCluster(const envoy::api::v2::Cluster& cluster,
                                             const std::string& version_info) {
   // First we need to see if this new config is new or an update to an existing dynamic cluster.

diff --git a/source/common/upstream/cluster_manager_impl.h b/source/common/upstream/cluster_manager_impl.h
@@ -138,6 +138,7 @@ class ClusterManagerInitHelper : Logger::Loggable<Logger::Id::upstream> {
   COUNTER(cluster_added)                                                                           \
   COUNTER(cluster_modified)                                                                        \
   COUNTER(cluster_removed)                                                                         \
+  COUNTER(coalesced_updates)                                                                       \
   GAUGE  (active_clusters)                                                                         \
   GAUGE  (warming_clusters)
 // clang-format on
@@ -361,6 +362,20 @@ class ClusterManagerImpl : public ClusterManager, Logger::Loggable<Logger::Id::u
   // This map is ordered so that config dumping is consistent.
   typedef std::map<std::string, ClusterDataPtr> ClusterMap;
 
+  struct PendingUpdates {
+    PendingUpdates() {}
+    Event::TimerPtr timer;
+    std::unordered_set<HostSharedPtr> added;
+    std::unordered_set<HostSharedPtr> removed;
+  };
+  typedef std::shared_ptr<PendingUpdates> PendingUpdatesPtr;
+  typedef std::unordered_map<uint32_t, PendingUpdatesPtr> PendingUpdatesByPriorityMap;
+  typedef std::shared_ptr<PendingUpdatesByPriorityMap> PendingUpdatesByPriorityMapPtr;
+  typedef std::unordered_map<std::string, PendingUpdatesByPriorityMapPtr> ClusterUpdatesMap;
+
+  void applyUpdates(const Cluster& cluster, uint32_t priority, PendingUpdatesPtr updates);
+  void scheduleUpdate(const Cluster& cluster, uint32_t priority, const HostVector& hosts_added,
+                      const HostVector& hosts_removed);
   void createOrUpdateThreadLocalCluster(ClusterData& cluster);
   ProtobufTypes::MessagePtr dumpClusterConfigs();
   static ClusterManagerStats generateStats(Stats::Scope& scope);
@@ -394,6 +409,8 @@ class ClusterManagerImpl : public ClusterManager, Logger::Loggable<Logger::Id::u
   Grpc::AsyncClientManagerPtr async_client_manager_;
   Server::ConfigTracker::EntryOwnerPtr config_tracker_entry_;
   SystemTimeSource& system_time_source_;
+  ClusterUpdatesMap updates_map_;
+  Event::Dispatcher& dispatcher_;
 };
 
 } // namespace Upstream

diff --git a/test/common/upstream/cluster_manager_impl_test.cc b/test/common/upstream/cluster_manager_impl_test.cc
@@ -1620,6 +1620,63 @@ TEST_F(ClusterManagerImplTest, OriginalDstInitialization) {
   factory_.tls_.shutdownThread();
 }
 
+TEST_F(ClusterManagerImplTest, CoalescedUpdates) {
+  const std::string yaml = R"EOF(
+  static_resources:
+    clusters:
+    - name: cluster_1
+      connect_timeout: 0.250s
+      type: STATIC
+      lb_policy: ROUND_ROBIN
+      hosts:
+      - socket_address:
+          address: "127.0.0.1"
+          port_value: 11001
+      - socket_address:
+          address: "127.0.0.1"
+          port_value: 11002
+      common_lb_config:
+        time_between_updates: 3s
+  )EOF";
+
+  create(parseBootstrapFromV2Yaml(yaml));
+  EXPECT_FALSE(cluster_manager_->get("cluster_1")->info()->addedViaApi());
+
+  // Save the updates timer.
+  Event::MockTimer* timer = new NiceMock<Event::MockTimer>(&factory_.dispatcher_);
+
+  // Remove each host, sequentially.
+  const Cluster& cluster = cluster_manager_->clusters().begin()->second;
+
+  HostVectorSharedPtr hosts(
+      new HostVector(cluster.prioritySet().hostSetsPerPriority()[0]->hosts()));
+  HostsPerLocalitySharedPtr hosts_per_locality = std::make_shared<HostsPerLocalityImpl>();
+  HostVector hosts_added{};
+  HostVector hosts_removed_0{(*hosts)[0]};
+  HostVector hosts_removed_1{(*hosts)[1]};
+  cluster.prioritySet().hostSetsPerPriority()[0]->updateHosts(
+      hosts, hosts, hosts_per_locality, hosts_per_locality, {}, hosts_added, hosts_removed_0);
+  cluster.prioritySet().hostSetsPerPriority()[0]->updateHosts(
+      hosts, hosts, hosts_per_locality, hosts_per_locality, {}, hosts_added, hosts_removed_1);
+
+  // Ensure the coalesced updates were applied.
+  timer->callback_();
+  EXPECT_EQ(1, factory_.stats_.counter("cluster_manager.coalesced_updates").value());
+
+  // Prepare a new timer.
+  timer = new NiceMock<Event::MockTimer>(&factory_.dispatcher_);
+
+  // Add them back.
+  cluster.prioritySet().hostSetsPerPriority()[0]->updateHosts(
+      hosts, hosts, hosts_per_locality, hosts_per_locality, {}, hosts_removed_0, hosts_added);
+  cluster.prioritySet().hostSetsPerPriority()[0]->updateHosts(
+      hosts, hosts, hosts_per_locality, hosts_per_locality, {}, hosts_removed_1, hosts_added);
+
+  // Ensure the coalesced updates were applied again.
+  timer->callback_();
+  EXPECT_EQ(2, factory_.stats_.counter("cluster_manager.coalesced_updates").value());
+}
+
 class ClusterManagerInitHelperTest : public testing::Test {
 public:
   MOCK_METHOD1(onClusterInit, void(Cluster& cluster));