From 6e72690015fdf278b11d9b566f6912e494f13076 Mon Sep 17 00:00:00 2001 From: Christoph Pakulski Date: Mon, 17 Sep 2018 12:21:08 -0400 Subject: [PATCH 1/7] Changed how panic mode is turned on when LoadBalancer has several priority sets. Panic checking is suppressed if there is enough healthy hosts across all priority levels, even when a particular priority set should enter panic mode because its number of healthy nodes is low. Signed-off-by: Christoph Pakulski --- source/common/upstream/load_balancer_impl.cc | 20 +++++- source/common/upstream/load_balancer_impl.h | 3 + .../upstream/load_balancer_impl_test.cc | 65 +++++++++++++++++-- test/mocks/upstream/mocks.h | 1 + 4 files changed, 83 insertions(+), 6 deletions(-) diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 94cea9fcf9a13..29562f5aad0b8 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -55,6 +55,7 @@ LoadBalancerBase::LoadBalancerBase(const PrioritySet& priority_set, ClusterStats void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { per_priority_load_.resize(priority_set_.hostSetsPerPriority().size()); per_priority_health_.resize(priority_set_.hostSetsPerPriority().size()); + per_priority_panic_.resize(priority_set_.hostSetsPerPriority().size()); // Determine the health of the newly modified priority level. // Health ranges from 0-100, and is the ratio of healthy hosts to total hosts, modified by the @@ -67,8 +68,17 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { host_set.healthyHosts().size() / host_set.hosts().size())); } - // Now that we've updated health for the changed priority level, we need to caculate percentage + // Now that we've updated health for the changed priority level, we need to calculate percentage // load for all priority levels. + + // The following cases are handled here; + // - Total health is >= 100. It means there are enough healthy hosts to handle the load. + // Do not enter panic mode, even if a specific priority has low number of healthy hosts. + // - Total health is < 100. There is not enough healthy hosts to handle the load. Continue + // distibuting the load among priority sets, but turn on Panic mode if # of healthy hosts is + // low. + // - Total health is 0. All hosts are down. Redirect 100% of traffic to P=0 and enable PanicMode. + // // First, determine if the load needs to be scaled relative to health. For example if there are // 3 host sets with 20% / 20% / 10% health they will get 40% / 40% / 20% load to ensure total load @@ -79,6 +89,7 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { // Everything is terrible. Send all load to P=0. // In this one case sumEntries(per_priority_load_) != 100 since we sinkhole all traffic in P=0. per_priority_load_[0] = 100; + per_priority_panic_[0] = true; return; } size_t total_load = 100; @@ -88,6 +99,11 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { per_priority_load_[i] = std::min(total_load, per_priority_health_[i] * 100 / total_health); total_load -= per_priority_load_[i]; + + // For each level check if it should run in Panic mode. Never set Panic mode if the total health + // is 100%, even when individual priority level has very low # of healthy hosts. + HostSet& priority_host_set = *priority_set_.hostSetsPerPriority()[i]; + per_priority_panic_[i] = (total_health == 100 ? false : isGlobalPanic(priority_host_set)); } if (total_load != 0) { // Account for rounding errors. @@ -378,7 +394,7 @@ ZoneAwareLoadBalancerBase::hostSourceToUse(LoadBalancerContext* context) { hosts_source.priority_ = host_set.priority(); // If the selected host set has insufficient healthy hosts, return all hosts. - if (isGlobalPanic(host_set)) { + if (per_priority_panic_[hosts_source.priority_]) { stats_.lb_healthy_panic_.inc(); hosts_source.source_type_ = HostsSource::SourceType::AllHosts; return hosts_source; diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index cfbf20120f748..488389557af2f 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -62,6 +62,7 @@ class LoadBalancerBase : public LoadBalancer { HostSet& chooseHostSet(LoadBalancerContext* context); uint32_t percentageLoad(uint32_t priority) const { return per_priority_load_[priority]; } + bool isInPanic(uint32_t priority) const { return per_priority_panic_[priority]; } ClusterStats& stats_; Runtime::Loader& runtime_; @@ -79,6 +80,8 @@ class LoadBalancerBase : public LoadBalancer { std::vector per_priority_load_; // The health (0-100) for each priority level. std::vector per_priority_health_; + // Levels which are in panic + std::vector per_priority_panic_; }; class LoadBalancerContextBase : public LoadBalancerContext { diff --git a/test/common/upstream/load_balancer_impl_test.cc b/test/common/upstream/load_balancer_impl_test.cc index 19ea7e8e3e0f7..2789f7ac3f136 100644 --- a/test/common/upstream/load_balancer_impl_test.cc +++ b/test/common/upstream/load_balancer_impl_test.cc @@ -48,6 +48,7 @@ class TestLb : public LoadBalancerBase { const envoy::api::v2::Cluster::CommonLbConfig& common_config) : LoadBalancerBase(priority_set, stats, runtime, random, common_config) {} using LoadBalancerBase::chooseHostSet; + using LoadBalancerBase::isInPanic; using LoadBalancerBase::percentageLoad; HostConstSharedPtr chooseHostOnce(LoadBalancerContext*) override { @@ -71,14 +72,25 @@ class LoadBalancerBaseTest : public LoadBalancerTestBase { host_set.runCallbacks({}, {}); } - std::vector getLoadPercentage() { - std::vector ret; + template + std::vector aggregatePrioritySetsValues(TestLb& lb, FUNC func) { + std::vector ret; + for (size_t i = 0; i < priority_set_.host_sets_.size(); ++i) { - ret.push_back(lb_.percentageLoad(i)); + ret.push_back((lb.*func)(i)); } + return ret; } + std::vector getLoadPercentage() { + return aggregatePrioritySetsValues(lb_, &TestLb::percentageLoad); + } + + std::vector getPanic() { + return aggregatePrioritySetsValues(lb_, &TestLb::isInPanic); + } + envoy::api::v2::Cluster::CommonLbConfig common_config_; TestLb lb_{priority_set_, stats_, runtime_, random_, common_config_}; }; @@ -157,25 +169,34 @@ TEST_P(LoadBalancerBaseTest, OverProvisioningFactor) { TEST_P(LoadBalancerBaseTest, GentleFailover) { // With 100% of P=0 hosts healthy, P=0 gets all the load. + // None of the levels is in Panic mode updateHostSet(host_set_, 1, 1); updateHostSet(failover_host_set_, 1, 1); ASSERT_THAT(getLoadPercentage(), ElementsAre(100, 0)); + ASSERT_THAT(getPanic(), ElementsAre(false, false)); // Health P=0 == 50*1.4 == 70 + // Total health = 70 + 70 >= 100%. None of the levels should be in panic mode. updateHostSet(host_set_, 2 /* num_hosts */, 1 /* num_healthy_hosts */); updateHostSet(failover_host_set_, 2 /* num_hosts */, 1 /* num_healthy_hosts */); ASSERT_THAT(getLoadPercentage(), ElementsAre(70, 30)); + ASSERT_THAT(getPanic(), ElementsAre(false, false)); // Health P=0 == 25*1.4 == 35 P=1 is healthy so takes all spillover. + // Total health = 35+100 >= 100%. P=0 is below Panic level but it is ignored, because + // Total health >= 100%. updateHostSet(host_set_, 4 /* num_hosts */, 1 /* num_healthy_hosts */); updateHostSet(failover_host_set_, 2 /* num_hosts */, 2 /* num_healthy_hosts */); ASSERT_THAT(getLoadPercentage(), ElementsAre(35, 65)); + ASSERT_THAT(getPanic(), ElementsAre(false, false)); // Health P=0 == 25*1.4 == 35 P=1 == 35 // Health is then scaled up by (100 / (35 + 35) == 50) + // Total health = 35% + 35% is less than 100%. Panic levels per priority kick in. updateHostSet(host_set_, 4 /* num_hosts */, 1 /* num_healthy_hosts */); updateHostSet(failover_host_set_, 4 /* num_hosts */, 1 /* num_healthy_hosts */); ASSERT_THAT(getLoadPercentage(), ElementsAre(50, 50)); + ASSERT_THAT(getPanic(), ElementsAre(true, true)); } TEST_P(LoadBalancerBaseTest, GentleFailoverWithExtraLevels) { @@ -185,6 +206,7 @@ TEST_P(LoadBalancerBaseTest, GentleFailoverWithExtraLevels) { updateHostSet(failover_host_set_, 1, 1); updateHostSet(tertiary_host_set_, 1, 1); ASSERT_THAT(getLoadPercentage(), ElementsAre(100, 0, 0)); + ASSERT_THAT(getPanic(), ElementsAre(false, false, false)); // Health P=0 == 50*1.4 == 70 // Health P=0 == 50, so can take the 30% spillover. @@ -219,6 +241,42 @@ TEST_P(LoadBalancerBaseTest, GentleFailoverWithExtraLevels) { updateHostSet(failover_host_set_, 5 /* num_hosts */, 1 /* num_healthy_hosts */); updateHostSet(tertiary_host_set_, 5 /* num_hosts */, 1 /* num_healthy_hosts */); ASSERT_THAT(getLoadPercentage(), ElementsAre(34, 33, 33)); + ASSERT_THAT(getPanic(), ElementsAre(true, true, true)); + + // Levels P=0 and P=1 are totally down. P=2 is totally healthy. + // 100% of the traffic should go to P=2 and P=0 and P=1 should + // not be in panic mode. + updateHostSet(host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(failover_host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(tertiary_host_set_, 5 /* num_hosts */, 5 /* num_healthy_hosts */); + ASSERT_THAT(getLoadPercentage(), ElementsAre(0, 0, 100)); + ASSERT_THAT(getPanic(), ElementsAre(false, false, false)); + + // Levels P=0 and P=1 are totally down. P=2 is 80*1.4 >= 100% healthy. + // 100% of the traffic should go to P=2 and P=0 and P=1 should + // not be in panic mode. + updateHostSet(host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(failover_host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(tertiary_host_set_, 5 /* num_hosts */, 4 /* num_healthy_hosts */); + ASSERT_THAT(getLoadPercentage(), ElementsAre(0, 0, 100)); + ASSERT_THAT(getPanic(), ElementsAre(false, false, false)); + + // Levels P=0 and P=1 are totally down. P=2 is 40*1.4=56%% healthy. + // 100% of the traffic should go to P=2. All levels P=0, P=1 and P=2 should + // be in panic mode even though P=0 and P=1 do not receive any load. + updateHostSet(host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(failover_host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(tertiary_host_set_, 5 /* num_hosts */, 2 /* num_healthy_hosts */); + ASSERT_THAT(getLoadPercentage(), ElementsAre(0, 0, 100)); + ASSERT_THAT(getPanic(), ElementsAre(true, true, true)); + + // All levels are completely down. 100% of traffic should go to P=0 + // and P=0 should be in panic mode + updateHostSet(host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(failover_host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + updateHostSet(tertiary_host_set_, 5 /* num_hosts */, 0 /* num_healthy_hosts */); + ASSERT_THAT(getLoadPercentage(), ElementsAre(100, _, _)); + ASSERT_THAT(getPanic(), ElementsAre(true, _, _)); } TEST_P(LoadBalancerBaseTest, BoundaryConditions) { @@ -846,7 +904,6 @@ TEST_P(RoundRobinLoadBalancerTest, NoZoneAwareRoutingLocalEmpty) { HostsPerLocalitySharedPtr local_hosts_per_locality = makeHostsPerLocality({{}, {}}); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) - .WillOnce(Return(50)) .WillOnce(Return(50)); EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillOnce(Return(true)); diff --git a/test/mocks/upstream/mocks.h b/test/mocks/upstream/mocks.h index fb0c132421ee6..635b598f59386 100644 --- a/test/mocks/upstream/mocks.h +++ b/test/mocks/upstream/mocks.h @@ -74,6 +74,7 @@ class MockHostSet : public HostSet { Common::CallbackManager member_update_cb_helper_; uint32_t priority_{}; uint32_t overprovisioning_factor_{}; + bool run_in_panic_mode_ = false; }; class MockPrioritySet : public PrioritySet { From 748b7a9111ffa3ecab5717d0421d425729846407 Mon Sep 17 00:00:00 2001 From: Christoph Pakulski Date: Fri, 21 Sep 2018 11:47:28 -0400 Subject: [PATCH 2/7] Changed how panic mode is turned on and used when ThreadAwareLoadBalancer has several priority sets. Panic checking is supressed if there is enough healthy hosts accross all priority levels, even when a particular priority set should enter panic mode because number of healthy nodes is low. Signed-off-by: Christoph Pakulski --- source/common/upstream/maglev_lb.h | 4 ++-- source/common/upstream/ring_hash_lb.h | 4 ++-- source/common/upstream/thread_aware_lb_impl.cc | 8 ++++++-- source/common/upstream/thread_aware_lb_impl.h | 3 ++- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/source/common/upstream/maglev_lb.h b/source/common/upstream/maglev_lb.h index 27ef4db632a80..746256e69e5e4 100644 --- a/source/common/upstream/maglev_lb.h +++ b/source/common/upstream/maglev_lb.h @@ -58,13 +58,13 @@ class MaglevLoadBalancer : public ThreadAwareLoadBalancerBase { private: // ThreadAwareLoadBalancerBase - HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set) override { + HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, bool inPanic) override { // Note that we only compute global panic on host set refresh. Given that the runtime setting // will rarely change, this is a reasonable compromise to avoid creating extra LBs when we only // need to create one per priority level. const bool has_locality = host_set.localityWeights() != nullptr && !host_set.localityWeights()->empty(); - if (isGlobalPanic(host_set)) { + if (inPanic) { if (!has_locality) { return std::make_shared(HostsPerLocalityImpl(host_set.hosts(), false), nullptr, table_size_); diff --git a/source/common/upstream/ring_hash_lb.h b/source/common/upstream/ring_hash_lb.h index 7d9eff87e7515..9214e6bb23461 100644 --- a/source/common/upstream/ring_hash_lb.h +++ b/source/common/upstream/ring_hash_lb.h @@ -45,11 +45,11 @@ class RingHashLoadBalancer : public ThreadAwareLoadBalancerBase, typedef std::shared_ptr RingConstSharedPtr; // ThreadAwareLoadBalancerBase - HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set) override { + HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, bool inPanic) override { // Note that we only compute global panic on host set refresh. Given that the runtime setting // will rarely change, this is a reasonable compromise to avoid creating extra LBs when we only // need to create one per priority level. - if (isGlobalPanic(host_set)) { + if (inPanic) { return std::make_shared(config_, host_set.hosts()); } else { return std::make_shared(config_, host_set.healthyHosts()); diff --git a/source/common/upstream/thread_aware_lb_impl.cc b/source/common/upstream/thread_aware_lb_impl.cc index 6bc4d08bd7838..dc5d4a7a86b3d 100644 --- a/source/common/upstream/thread_aware_lb_impl.cc +++ b/source/common/upstream/thread_aware_lb_impl.cc @@ -25,8 +25,11 @@ void ThreadAwareLoadBalancerBase::refresh() { const uint32_t priority = host_set->priority(); (*per_priority_state_vector)[priority].reset(new PerPriorityState); const auto& per_priority_state = (*per_priority_state_vector)[priority]; - per_priority_state->current_lb_ = createLoadBalancer(*host_set); - per_priority_state->global_panic_ = isGlobalPanic(*host_set); + // Copy panic flag from LoadBalancerBase. It is calculated when there is a change + // in hosts set or hosts' health. + per_priority_state->global_panic_ = per_priority_panic_[priority]; + per_priority_state->current_lb_ = + createLoadBalancer(*host_set, per_priority_state->global_panic_); } { @@ -42,6 +45,7 @@ ThreadAwareLoadBalancerBase::LoadBalancerImpl::chooseHost(LoadBalancerContext* c if (per_priority_state_ == nullptr) { return nullptr; } + // If there is no hash in the context, just choose a random value (this effectively becomes // the random LB but it won't crash if someone configures it this way). // computeHashKey() may be computed on demand, so get it only once. diff --git a/source/common/upstream/thread_aware_lb_impl.h b/source/common/upstream/thread_aware_lb_impl.h index 5bbc46c212bfd..8233d5e3dfc24 100644 --- a/source/common/upstream/thread_aware_lb_impl.h +++ b/source/common/upstream/thread_aware_lb_impl.h @@ -78,7 +78,8 @@ class ThreadAwareLoadBalancerBase : public LoadBalancerBase, public ThreadAwareL std::shared_ptr> per_priority_load_; }; - virtual HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set) PURE; + virtual HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, + bool inPanic) PURE; void refresh(); std::shared_ptr factory_; From 5175c9a9343eb34b9d04d79f9f72d3364eb73e1f Mon Sep 17 00:00:00 2001 From: Christoph Pakulski Date: Tue, 2 Oct 2018 11:48:39 -0400 Subject: [PATCH 3/7] Small changes after code review: renamed variables names to match coding style. Signed-off-by: Christoph Pakulski --- source/common/upstream/load_balancer_impl.cc | 8 ++++---- source/common/upstream/maglev_lb.h | 4 ++-- source/common/upstream/ring_hash_lb.h | 4 ++-- source/common/upstream/thread_aware_lb_impl.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 29562f5aad0b8..03e65543c5447 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -72,11 +72,11 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { // load for all priority levels. // The following cases are handled here; - // - Total health is >= 100. It means there are enough healthy hosts to handle the load. + // - Total health is = 100. It means there are enough healthy hosts to handle the load. // Do not enter panic mode, even if a specific priority has low number of healthy hosts. // - Total health is < 100. There is not enough healthy hosts to handle the load. Continue - // distibuting the load among priority sets, but turn on Panic mode if # of healthy hosts is - // low. + // distibuting the load among priority sets, but turn on Panic mode if # of healthy hosts + // in priority set is low. // - Total health is 0. All hosts are down. Redirect 100% of traffic to P=0 and enable PanicMode. // @@ -102,7 +102,7 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { // For each level check if it should run in Panic mode. Never set Panic mode if the total health // is 100%, even when individual priority level has very low # of healthy hosts. - HostSet& priority_host_set = *priority_set_.hostSetsPerPriority()[i]; + const HostSet& priority_host_set = *priority_set_.hostSetsPerPriority()[i]; per_priority_panic_[i] = (total_health == 100 ? false : isGlobalPanic(priority_host_set)); } if (total_load != 0) { diff --git a/source/common/upstream/maglev_lb.h b/source/common/upstream/maglev_lb.h index 746256e69e5e4..58ab7e714bd67 100644 --- a/source/common/upstream/maglev_lb.h +++ b/source/common/upstream/maglev_lb.h @@ -58,13 +58,13 @@ class MaglevLoadBalancer : public ThreadAwareLoadBalancerBase { private: // ThreadAwareLoadBalancerBase - HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, bool inPanic) override { + HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, bool in_panic) override { // Note that we only compute global panic on host set refresh. Given that the runtime setting // will rarely change, this is a reasonable compromise to avoid creating extra LBs when we only // need to create one per priority level. const bool has_locality = host_set.localityWeights() != nullptr && !host_set.localityWeights()->empty(); - if (inPanic) { + if (in_panic) { if (!has_locality) { return std::make_shared(HostsPerLocalityImpl(host_set.hosts(), false), nullptr, table_size_); diff --git a/source/common/upstream/ring_hash_lb.h b/source/common/upstream/ring_hash_lb.h index 9214e6bb23461..0de89f105db6f 100644 --- a/source/common/upstream/ring_hash_lb.h +++ b/source/common/upstream/ring_hash_lb.h @@ -45,11 +45,11 @@ class RingHashLoadBalancer : public ThreadAwareLoadBalancerBase, typedef std::shared_ptr RingConstSharedPtr; // ThreadAwareLoadBalancerBase - HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, bool inPanic) override { + HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, bool in_panic) override { // Note that we only compute global panic on host set refresh. Given that the runtime setting // will rarely change, this is a reasonable compromise to avoid creating extra LBs when we only // need to create one per priority level. - if (inPanic) { + if (in_panic) { return std::make_shared(config_, host_set.hosts()); } else { return std::make_shared(config_, host_set.healthyHosts()); diff --git a/source/common/upstream/thread_aware_lb_impl.h b/source/common/upstream/thread_aware_lb_impl.h index 8233d5e3dfc24..e9fdeb8e6cb74 100644 --- a/source/common/upstream/thread_aware_lb_impl.h +++ b/source/common/upstream/thread_aware_lb_impl.h @@ -79,7 +79,7 @@ class ThreadAwareLoadBalancerBase : public LoadBalancerBase, public ThreadAwareL }; virtual HashingLoadBalancerSharedPtr createLoadBalancer(const HostSet& host_set, - bool inPanic) PURE; + bool in_panic) PURE; void refresh(); std::shared_ptr factory_; From 510fbc25bf70290bc76b26f87e7ac7c8ef530736 Mon Sep 17 00:00:00 2001 From: Christoph Pakulski Date: Thu, 4 Oct 2018 13:57:07 -0400 Subject: [PATCH 4/7] Small code changes after code review and updated doc with examples how priority levels and panic threshold interact. Signed-off-by: Christoph Pakulski --- .../intro/arch_overview/load_balancing.rst | 86 ++++++++++++++----- source/common/upstream/load_balancer_impl.cc | 19 ++-- 2 files changed, 77 insertions(+), 28 deletions(-) diff --git a/docs/root/intro/arch_overview/load_balancing.rst b/docs/root/intro/arch_overview/load_balancing.rst index 2052e79691ad8..d3a8f436c80b7 100644 --- a/docs/root/intro/arch_overview/load_balancing.rst +++ b/docs/root/intro/arch_overview/load_balancing.rst @@ -143,26 +143,6 @@ percentage of healthy hosts multiplied by the overprovisioning factor drops below 100. The default value is 1.4, so a priority level or locality will not be considered unhealthy until the percentage of healthy endpoints goes below 72%. -.. _arch_overview_load_balancing_panic_threshold: - -Panic threshold ---------------- - -During load balancing, Envoy will generally only consider healthy hosts in an upstream cluster. -However, if the percentage of healthy hosts in the cluster becomes too low, Envoy will disregard -health status and balance amongst all hosts. This is known as the *panic threshold*. The default -panic threshold is 50%. This is :ref:`configurable ` via -runtime as well as in the :ref:`cluster configuration -`. The panic threshold -is used to avoid a situation in which host failures cascade throughout the cluster as load -increases. - -Note that panic thresholds are *per-priority*. This means that if the percentage of healthy nodes -in a single priority goes below the threshold, that priority will enter panic mode. In general -it is discouraged to use panic thresholds in conjunction with priorities, as by the time enough -nodes are unhealthy to trigger the panic threshold most of the traffic should already have spilled -over to the next priority level. - .. _arch_overview_load_balancing_priority_levels: Priority levels @@ -247,6 +227,72 @@ To sum this up in pseudo algorithms: total_health = min(100, Σ(health(P_0)...health(P_X)) load to P_X = 100 - Σ(percent_load(P_0)..percent_load(P_X-1)) +.. _arch_overview_load_balancing_panic_threshold: + +Panic threshold +--------------- + +During load balancing, Envoy will generally only consider healthy hosts in an upstream cluster. +However, if the percentage of healthy hosts in the cluster becomes too low, Envoy will disregard +health status and balance amongst all hosts. This is known as the *panic threshold*. The default +panic threshold is 50%. This is :ref:`configurable ` via +runtime as well as in the :ref:`cluster configuration +`. The panic threshold +is used to avoid a situation in which host failures cascade throughout the cluster as load +increases. + +Panic thresholds work in conjunction with priorities. If number of healthy hosts in given priority +goes down, Envoy will try try shift some traffic to lower priorities. If it succeeds finding enough +healthy hosts in lower priorities, Envoy will disregard panic thresholds. In mathematical terms, +if total health across all priority levels is 100%, Envoy disregards panic thresholds but continues to +distribute traffic load across priorities according to algorithm described :ref:`here ( + // Sum of priority levels' health values may exceed 100, so it is capped at 100 and referred as + // normalized total health. + const uint32_t normalized_total_health = std::min( std::accumulate(per_priority_health_.begin(), per_priority_health_.end(), 0), 100); - if (total_health == 0) { + if (normalized_total_health == 0) { // Everything is terrible. Send all load to P=0. // In this one case sumEntries(per_priority_load_) != 100 since we sinkhole all traffic in P=0. per_priority_load_[0] = 100; @@ -97,13 +99,14 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { // Now assign as much load as possible to the high priority levels and cease assigning load when // total_load runs out. per_priority_load_[i] = - std::min(total_load, per_priority_health_[i] * 100 / total_health); + std::min(total_load, per_priority_health_[i] * 100 / normalized_total_health); total_load -= per_priority_load_[i]; // For each level check if it should run in Panic mode. Never set Panic mode if the total health // is 100%, even when individual priority level has very low # of healthy hosts. const HostSet& priority_host_set = *priority_set_.hostSetsPerPriority()[i]; - per_priority_panic_[i] = (total_health == 100 ? false : isGlobalPanic(priority_host_set)); + per_priority_panic_[i] = + (normalized_total_health == 100 ? false : isGlobalPanic(priority_host_set)); } if (total_load != 0) { // Account for rounding errors. From ec08cc3278f3befb8cfa23a081253b5073d76036 Mon Sep 17 00:00:00 2001 From: Christoph Pakulski Date: Fri, 5 Oct 2018 14:20:47 -0400 Subject: [PATCH 5/7] Fixed broken link in documentation. Signed-off-by: Christoph Pakulski --- docs/root/intro/arch_overview/load_balancing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/root/intro/arch_overview/load_balancing.rst b/docs/root/intro/arch_overview/load_balancing.rst index 92d627ff23565..3b7dae70cdf6d 100644 --- a/docs/root/intro/arch_overview/load_balancing.rst +++ b/docs/root/intro/arch_overview/load_balancing.rst @@ -245,7 +245,7 @@ Panic thresholds work in conjunction with priorities. If number of healthy hosts goes down, Envoy will try try shift some traffic to lower priorities. If it succeeds finding enough healthy hosts in lower priorities, Envoy will disregard panic thresholds. In mathematical terms, if total health across all priority levels is 100%, Envoy disregards panic thresholds but continues to -distribute traffic load across priorities according to algorithm described :ref:`here `. The following examples explain relationship between total health and panic threshold. It is assumed that default value of 50% is used for panic threshold. From 68f9cd57e4db902edb2bc6b396fbd6300519fbcc Mon Sep 17 00:00:00 2001 From: Christoph Pakulski Date: Tue, 9 Oct 2018 16:27:09 -0400 Subject: [PATCH 6/7] Added method to LoadBalancer to calculate normalized total health. Expanded priority_levels and panic _threshold sections in arch_overview docs. Signed-off-by: Christoph Pakulski --- .../intro/arch_overview/load_balancing.rst | 162 ++++++++++-------- source/common/upstream/load_balancer_impl.cc | 27 +-- source/common/upstream/load_balancer_impl.h | 10 ++ 3 files changed, 118 insertions(+), 81 deletions(-) diff --git a/docs/root/intro/arch_overview/load_balancing.rst b/docs/root/intro/arch_overview/load_balancing.rst index 3b7dae70cdf6d..0bd2e55a2ba53 100644 --- a/docs/root/intro/arch_overview/load_balancing.rst +++ b/docs/root/intro/arch_overview/load_balancing.rst @@ -161,43 +161,58 @@ healthy because 80*1.4 > 100. As the number of healthy endpoints dips below 72%, goes below 100. At that point the percent of traffic equivalent to the health of P=0 will go to P=0 and remaining traffic will flow to P=1. +It is important to understand how Envoy evaluates priority levels' health. Each priority level is assigned +a health value which basically is a percentage of healthy hosts in relation to total number of hosts in a given +priority level multiplied by overprovisioning factor of 1.4. +A priority level's health is integer value between 0% and 100%. When there are more than one priority levels +in a cluster, Envoy adds all priority levels' health values and caps it at 100%. This is called normalized total health. Value 0% of +normalized total health means that all hosts in all priority levels are unhealthy. Value 100% of normalized total health may +describe many situations: all levels have health of 100% or 4 levels have health value of 30% each. +When normalized total health value is 100%, Envoy assumes that there are enough healthy hosts in all priority +levels to handle the load. Not all hosts need to be in one priority as Envoy distributes traffic across priority +levels based on each priority level's health value. +In order for load distribution algorithm and normalized total health calculation work properly, the number of hosts +in each priority level should be close. Envoy assumes that for example 100% healthy priority level P=1 is able to take +entire traffic from P=0 should all its hosts become unhealthy. If P=0 has 10 hosts and P=1 has only 2 hosts, P=1 may be unable +to take entire load from P=0, even though P=1 health is 100%. + Assume a simple set-up with 2 priority levels, P=1 100% healthy. -+----------------------------+---------------------------+----------------------------+ -| P=0 healthy endpoints | Percent of traffic to P=0 | Percent of traffic to P=1 | -+============================+===========================+============================+ -| 100% | 100% | 0% | -+----------------------------+---------------------------+----------------------------+ -| 72% | 100% | 0% | -+----------------------------+---------------------------+----------------------------+ -| 71% | 99% | 1% | -+----------------------------+---------------------------+----------------------------+ -| 50% | 70% | 30% | -+----------------------------+---------------------------+----------------------------+ -| 25% | 35% | 65% | -+----------------------------+---------------------------+----------------------------+ -| 0% | 0% | 100% | -+----------------------------+---------------------------+----------------------------+ ++----------------------------+----------------+-----------------+ +| P=0 healthy endpoints | Traffic to P=0 | Traffic to P=1 | ++============================+================+=================+ +| 100% | 100% | 0% | ++----------------------------+----------------+-----------------+ +| 72% | 100% | 0% | ++----------------------------+----------------+-----------------+ +| 71% | 99% | 1% | ++----------------------------+----------------+-----------------+ +| 50% | 70% | 30% | ++----------------------------+----------------+-----------------+ +| 25% | 35% | 65% | ++----------------------------+----------------+-----------------+ +| 0% | 0% | 100% | ++----------------------------+----------------+-----------------+ If P=1 becomes unhealthy, it will continue to take spilled load from P=0 until the sum of the health P=0 + P=1 goes below 100. At this point the healths will be scaled up to an "effective" health of 100%. -+------------------------+-------------------------+-----------------+-----------------+ -| P=0 healthy endpoints | P=1 healthy endpoints | Traffic to P=0 | Traffic to P=1 | -+========================+=========================+=================+=================+ -| 100% | 100% | 100% | 0% | -+------------------------+-------------------------+-----------------+-----------------+ -| 72% | 72% | 100% | 0% | -+------------------------+-------------------------+-----------------+-----------------+ -| 71% | 71% | 99% | 1% | -+------------------------+-------------------------+-----------------+-----------------+ -| 50% | 50% | 70% | 30% | -+------------------------+-------------------------+-----------------+-----------------+ -| 25% | 100% | 35% | 65% | -+------------------------+-------------------------+-----------------+-----------------+ -| 25% | 25% | 50% | 50% | -+------------------------+-------------------------+-----------------+-----------------+ ++------------------------+-------------------------+-----------------+----------------+ +| P=0 healthy endpoints | P=1 healthy endpoints | Traffic to P=0 | Traffic to P=1 | ++========================+=========================+=================+================+ +| 100% | 100% | 100% | 0% | ++------------------------+-------------------------+-----------------+----------------+ +| 72% | 72% | 100% | 0% | ++------------------------+-------------------------+-----------------+----------------+ +| 71% | 71% | 99% | 1% | ++------------------------+-------------------------+-----------------+----------------+ +| 50% | 50% | 70% | 30% | ++------------------------+-------------------------+-----------------+----------------+ +| 25% | 100% | 35% | 65% | ++------------------------+-------------------------+-----------------+----------------+ +| 25% | 25% | 50% | 50% | ++------------------------+-------------------------+-----------------+----------------+ As more priorities are added, each level consumes load equal to its "scaled" effective health, so P=2 would only receive traffic if the combined health of P=0 + P=1 was less than 100. @@ -222,9 +237,9 @@ To sum this up in pseudo algorithms: :: - load to P_0 = min(100, health(P_0) * 100 / total_health) + load to P_0 = min(100, health(P_0) * 100 / normalized_total_health) health(P_X) = 140 * healthy_P_X_backends / total_P_X_backends - total_health = min(100, Σ(health(P_0)...health(P_X)) + normalized_total_health = min(100, Σ(health(P_0)...health(P_X)) load to P_X = 100 - Σ(percent_load(P_0)..percent_load(P_X-1)) .. _arch_overview_load_balancing_panic_threshold: @@ -241,55 +256,60 @@ runtime as well as in the :ref:`cluster configuration is used to avoid a situation in which host failures cascade throughout the cluster as load increases. -Panic thresholds work in conjunction with priorities. If number of healthy hosts in given priority -goes down, Envoy will try try shift some traffic to lower priorities. If it succeeds finding enough +Panic thresholds work in conjunction with priorities. If number of healthy hosts in a given priority +goes down, Envoy will try to shift some traffic to lower priorities. If it succeeds in finding enough healthy hosts in lower priorities, Envoy will disregard panic thresholds. In mathematical terms, -if total health across all priority levels is 100%, Envoy disregards panic thresholds but continues to +if normalized total health across all priority levels is 100%, Envoy disregards panic thresholds but continues to distribute traffic load across priorities according to algorithm described :ref:`here `. +However, when value of normalized total health drops below 100%, Envoy assumes that there is not enough healthy +hosts across all priority levels. It continues to distribute traffic load across priorities, but if a specific priority level's +health is below panic threshold, traffic will go to all hosts in that priority regardless of their health. -The following examples explain relationship between total health and panic threshold. It is +The following examples explain relationship between normalized total health and panic threshold. It is assumed that default value of 50% is used for panic threshold. Assume a simple set-up with 2 priority levels, P=1 100% healthy. In this scenario -total health is always 100% and P=0 never enters panic mode and Envoy is able to shift entire traffic to P=1. - -+----------------------------+--------------------+ -| P=0 healthy endpoints | P=0 in panic | -+============================+====================+ -| 100% | NO | -+----------------------------+--------------------+ -| 72% | NO | -+----------------------------+--------------------+ -| 71% | NO | -+----------------------------+--------------------+ -| 50% | NO | -+----------------------------+--------------------+ -| 25% | NO | -+----------------------------+--------------------+ -| 0% | NO | -+----------------------------+--------------------+ +normalized total health is always 100% and P=0 never enters panic mode and Envoy is able to shift entire traffic to P=1. + ++-------------+------------+--------------+------------+--------------+--------------+ +| P=0 healthy | Traffic | P=0 in panic | Ttraffic | P=1 in panic | normalized | +| endpoints | to P=0 | | to P=1 | | total health | ++=============+============+==============+============+==============+==============+ +| 100% | 100% | NO | 0% | NO | 100% | ++-------------+------------+--------------+------------+--------------+--------------+ +| 72% | 100% | NO | 0% | NO | 100% | ++-------------+------------+--------------+------------+--------------+--------------+ +| 71% | 100% | NO | 0% | NO | 100% | ++-------------+------------+--------------+------------+--------------+--------------+ +| 50% | 100% | NO | 0% | NO | 100% | ++-------------+------------+--------------+------------+--------------+--------------+ +| 25% | 100% | NO | 0% | NO | 100% | ++-------------+------------+--------------+------------+--------------+--------------+ +| 0% | 100% | NO | 0% | NO | 100% | ++-------------+------------+--------------+------------+--------------+--------------+ If P=1 becomes unhealthy, panic threshold continues to be disregarded until the sum of the health -P=0 + P=1 goes below 100. At this point Envoy starts checking panic threshold value for each +P=0 + P=1 goes below 100%. At this point Envoy starts checking panic threshold value for each priority. -+------------------------+-------------------------+-----------------+-----------------+-----------------+ -| P=0 healthy endpoints | P=1 healthy endpoints | Total health | P=0 in panic | P=1 in panic | -+========================+=========================+=================+=================+=================+ -| 100% | 100% | 100% | NO | NO | -+------------------------+-------------------------+-----------------+-----------------+-----------------+ -| 72% | 72% | 100% | NO | NO | -+------------------------+-------------------------+-----------------+-----------------+-----------------+ -| 71% | 71% | 100% | NO | NO | -+------------------------+-------------------------+-----------------+-----------------+-----------------+ -| 50% | 50% | 100% | NO | NO | -+------------------------+-------------------------+-----------------+-----------------+-----------------+ -| 25% | 100% | 100% | NO | NO | -+------------------------+-------------------------+-----------------+-----------------+-----------------+ -| 25% | 25% | 70% | YES | YES | -+------------------------+-------------------------+-----------------+-----------------+-----------------+ -| 5% | 65% | 98% | YES | NO | -+------------------------+-------------------------+-----------------+-----------------+-----------------+ ++-------------+-------------+----------+--------------+----------+--------------+-------------+ +| P=0 healthy | P=1 healthy | Traffic | P=0 in panic | Traffic | P=1 in panic | normalized | +| endpoints | endpoints | to P=0 | | to P=1 | | total health| ++=============+=============+==========+==============+==========+==============+=============+ +| 100% | 100% | 100% | NO | 0% | NO | 100% | ++-------------+-------------+----------+--------------+----------+--------------+-------------+ +| 72% | 72% | 100% | NO | 0% | NO | 100% | ++-------------+-------------+----------+--------------+----------+--------------+-------------+ +| 71% | 71% | 99% | NO | 1% | NO | 100% | ++-------------+-------------+----------+--------------+----------+--------------+-------------+ +| 50% | 60% | 50% | NO | 50% | NO | 100% | ++-------------+-------------+----------+--------------+----------+--------------+-------------+ +| 25% | 100% | 25% | NO | 75% | NO | 100% | ++-------------+-------------+----------+--------------+----------+--------------+-------------+ +| 25% | 25% | 50% | YES | 50% | YES | 70% | ++-------------+-------------+----------+--------------+----------+--------------+-------------+ +| 5% | 65% | 7% | YES | 93% | NO | 98% | ++-------------+-------------+----------+--------------+----------+--------------+-------------+ Note that panic thresholds can be configured *per-priority*. diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 131ea07bd1da3..1e7980828cad1 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -62,13 +62,16 @@ LoadBalancerBase::LoadBalancerBase(const PrioritySet& priority_set, ClusterStats } // The following cases are handled by -// recalculatePerPriorityState and recalculatePerPriorityPanic methods; -// - Total health is = 100. It means there are enough healthy hosts to handle the load. +// recalculatePerPriorityState and recalculatePerPriorityPanic methods (normalized total health is +// sum of all priorities' health values and capped at 100). +// - normalized total health is = 100%. It means there are enough healthy hosts to handle the load. // Do not enter panic mode, even if a specific priority has low number of healthy hosts. -// - Total health is < 100. There are not enough healthy hosts to handle the load. Continue +// - normalized total health is < 100%. There are not enough healthy hosts to handle the load. +// Continue // distibuting the load among priority sets, but turn on panic mode for a given priority // if # of healthy hosts in priority set is low. -// - Total health is 0. All hosts are down. Redirect 100% of traffic to P=0 and enable panic mode. +// - normalized total health is 0%. All hosts are down. Redirect 100% of traffic to P=0 and enable +// panic mode. void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority, const PrioritySet& priority_set, @@ -83,6 +86,11 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority, HostSet& host_set = *priority_set.hostSetsPerPriority()[priority]; per_priority_health[priority] = 0; if (host_set.hosts().size() > 0) { + // Each priority level's health is ratio of healthy hosts to total number of hosts in a priority + // multiplied by overprovisioning factor of 1.4 and capped at 100%. It means that if all + // hosts are healthy that priority's health is 100%*1.4=140% and is capped at 100% which results + // in 100%. If 80% of hosts are healty, that priority's health is still 100% (80%*1.4=112% and + // capped at 100%). per_priority_health[priority] = std::min(100, (host_set.overprovisioning_factor() * host_set.healthyHosts().size() / host_set.hosts().size())); @@ -97,8 +105,7 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority, // adds up to 100. // Sum of priority levels' health values may exceed 100, so it is capped at 100 and referred as // normalized total health. - const uint32_t normalized_total_health = std::min( - std::accumulate(per_priority_health.begin(), per_priority_health.end(), 0), 100); + const uint32_t normalized_total_health = calcNormalizedTotalHealth(per_priority_health); if (normalized_total_health == 0) { // Everything is terrible. Send all load to P=0. // In this one case sumEntries(per_priority_load) != 100 since we sinkhole all traffic in P=0. @@ -131,8 +138,7 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority, void LoadBalancerBase::recalculatePerPriorityPanic() { per_priority_panic_.resize(priority_set_.hostSetsPerPriority().size()); - const uint32_t normalized_total_health = std::min( - std::accumulate(per_priority_health_.begin(), per_priority_health_.end(), 0), 100); + const uint32_t normalized_total_health = calcNormalizedTotalHealth(per_priority_health_); if (normalized_total_health == 0) { // Everything is terrible. All load should be to P=0. Turn on panic mode. @@ -142,8 +148,9 @@ void LoadBalancerBase::recalculatePerPriorityPanic() { } for (size_t i = 0; i < per_priority_health_.size(); ++i) { - // For each level check if it should run in panic mode. Never set panic mode if the total health - // is 100%, even when individual priority level has very low # of healthy hosts. + // For each level check if it should run in panic mode. Never set panic mode if + // normalized total health is 100%, even when individual priority level has very low # of + // healthy hosts. const HostSet& priority_host_set = *priority_set_.hostSetsPerPriority()[i]; per_priority_panic_[i] = (normalized_total_health == 100 ? false : isGlobalPanic(priority_host_set)); diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index 306fa050af555..ffec62688ee9a 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -81,6 +81,16 @@ class LoadBalancerBase : public LoadBalancer { void recalculatePerPriorityPanic(); protected: + // Method calculates normalized total health. Each priority level's health is ratio of + // healthy hosts to total number of hosts in a priority multiplied by overprovisioning factor + // of 1.4 and capped at 100%. Effectively each priority's health is a value between 0-100%. + // Calculating normalized total health starts with summarizing all priorities' health values. + // It can exceed 100%. For example if there are three priorities and each is 100% healthy, the + // total of all priorities is 300%. Normalized total health is then capped at 100%. + static uint32_t calcNormalizedTotalHealth(std::vector& per_priority_health) { + return std::min( + std::accumulate(per_priority_health.begin(), per_priority_health.end(), 0), 100); + } // The percentage load (0-100) for each priority level std::vector per_priority_load_; // The health (0-100) for each priority level. From 0a918949ebc7125ba71d69e9454ab7f2366defc2 Mon Sep 17 00:00:00 2001 From: Christoph Pakulski Date: Wed, 10 Oct 2018 11:09:18 -0400 Subject: [PATCH 7/7] Added release notes. Corrected spelling mistakes in arch overview. Signed-off-by: Christoph Pakulski --- docs/root/intro/arch_overview/load_balancing.rst | 8 ++++---- docs/root/intro/version_history.rst | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/root/intro/arch_overview/load_balancing.rst b/docs/root/intro/arch_overview/load_balancing.rst index 0bd2e55a2ba53..547962dc086ce 100644 --- a/docs/root/intro/arch_overview/load_balancing.rst +++ b/docs/root/intro/arch_overview/load_balancing.rst @@ -171,10 +171,10 @@ describe many situations: all levels have health of 100% or 4 levels have health When normalized total health value is 100%, Envoy assumes that there are enough healthy hosts in all priority levels to handle the load. Not all hosts need to be in one priority as Envoy distributes traffic across priority levels based on each priority level's health value. -In order for load distribution algorithm and normalized total health calculation work properly, the number of hosts +In order for load distribution algorithm and normalized total health calculation to work properly, the number of hosts in each priority level should be close. Envoy assumes that for example 100% healthy priority level P=1 is able to take -entire traffic from P=0 should all its hosts become unhealthy. If P=0 has 10 hosts and P=1 has only 2 hosts, P=1 may be unable -to take entire load from P=0, even though P=1 health is 100%. +the entire traffic from P=0 should all its hosts become unhealthy. If P=0 has 10 hosts and P=1 has only 2 hosts, P=1 may be unable +to take the entire load from P=0, even though P=1 health is 100%. Assume a simple set-up with 2 priority levels, P=1 100% healthy. @@ -272,7 +272,7 @@ Assume a simple set-up with 2 priority levels, P=1 100% healthy. In this scenari normalized total health is always 100% and P=0 never enters panic mode and Envoy is able to shift entire traffic to P=1. +-------------+------------+--------------+------------+--------------+--------------+ -| P=0 healthy | Traffic | P=0 in panic | Ttraffic | P=1 in panic | normalized | +| P=0 healthy | Traffic | P=0 in panic | Traffic | P=1 in panic | normalized | | endpoints | to P=0 | | to P=1 | | total health | +=============+============+==============+============+==============+==============+ | 100% | 100% | NO | 0% | NO | 100% | diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst index 1927b261d03da..6ca70d4b0cd05 100644 --- a/docs/root/intro/version_history.rst +++ b/docs/root/intro/version_history.rst @@ -13,6 +13,7 @@ Version history * stats: added :ref:`stats_matcher ` to the bootstrap config for granular control of stat instantiation. * stream: renamed the `RequestInfo` namespace to `StreamInfo` to better match its behaviour within TCP and HTTP implementations. +* upstream: changed how load calculation for :ref:`priority levels` and :ref:`panic thresholds` interact. As long as normalized total health is 100% panic thresholds are disregarded. 1.8.0 (Oct 4, 2018) ===================