-
Notifications
You must be signed in to change notification settings - Fork 5.3k
retry extensions: implement "other priority" extension #4529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f91c5ce
f52c9dc
6b6c025
15967bc
2274a5a
158d084
f619696
16be1ce
60c4fa6
6d446f5
978f3f7
e5b3b36
48c0963
83166de
580cb5c
b8abcd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| licenses(["notice"]) # Apache 2 | ||
|
|
||
| load("//bazel:api_build_system.bzl", "api_proto_library_internal") | ||
|
|
||
| api_proto_library_internal( | ||
| name = "other_priority", | ||
| srcs = ["other_priority_config.proto"], | ||
| deps = [ | ||
| "//envoy/api/v2/core:base", | ||
| ], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| syntax = "proto3"; | ||
|
|
||
| package envoy.config.retry.other_priority; | ||
|
|
||
| // A retry host selector that attempts to spread retries between priorities, even if certain | ||
| // priorities would not normally be attempted due to higher priorities being available. | ||
| // | ||
| // As priorities get excluded, load will be distributed amongst the remaining healthy priorities | ||
| // based on the relative health of the priorities, matching how load is distributed during regular | ||
| // host selection. For example, given priority healths of {100, 50, 50}, the original load will be | ||
| // {100, 0, 0} (since P0 has capacity to handle 100% of the traffic). If P0 is excluded, the load | ||
| // changes to {0, 50, 50}, because P1 is only able to handle 50% of the traffic, causing the | ||
| // remaining to spill over to P2. | ||
| // | ||
| // Each priority attempted will be excluded until there are no healthy priorities left, at which | ||
| // point the list of attempted priorities will be reset, essentially starting from the beginning. | ||
| // For example, given three priorities P0, P1, P2 with healthy % of 100, 0 and 50 respectively, the | ||
| // following sequence of priorities would be selected (assuming update_frequency = 1): | ||
| // Attempt 1: P0 (P0 is 100% healthy) | ||
| // Attempt 2: P2 (P0 already attempted, P2 only healthy priority) | ||
| // Attempt 3: P0 (no healthy priorities, reset) | ||
| // Attempt 4: P2 | ||
| // | ||
| // Using this PriorityFilter requires rebuilding the priority load, which runs in O(# of | ||
| // priorities), which might incur significant overhead for clusters with many priorities. | ||
| message OtherPriorityConfig { | ||
| // How often the priority load should be updated based on previously attempted priorities. Useful | ||
| // to allow each priorities to receive more than one request before being excluded or to reduce | ||
| // the number of times that the priority load has to be recomputed. | ||
| // | ||
| // For example, by setting this to 2, then the first two attempts (initial attempt and first | ||
| // retry) will use the unmodified priority load. The third and fourth attempt will use priority | ||
| // load which excludes the priorities routed to with the first two attempts, and the fifth and | ||
| // sixth attempt will use the priority load excluding the priorities used for the first four | ||
| // attempts. | ||
| int32 update_frequency = 1; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There might be some context I'm missing here but it's not obvious to me what the units of this frequency are.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The unit here is "number of attempts". I'll try to clarify the comment
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment definitely helps but reading it I misread and thought this was number_of_requests_using_default_priority. Can we extend it just a bit to say that the fourth and fifth will then use the priority load excluding priorities for the frirst 4 attempts? We should also comment what happens when we run out of priority levels |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,25 +44,29 @@ LoadBalancerBase::LoadBalancerBase(const PrioritySet& priority_set, ClusterStats | |
| common_config, healthy_panic_threshold, 100, 50)), | ||
| priority_set_(priority_set) { | ||
| for (auto& host_set : priority_set_.hostSetsPerPriority()) { | ||
| recalculatePerPriorityState(host_set->priority()); | ||
| recalculatePerPriorityState(host_set->priority(), priority_set_, per_priority_load_, | ||
| per_priority_health_); | ||
| } | ||
| priority_set_.addMemberUpdateCb( | ||
| [this](uint32_t priority, const HostVector&, const HostVector&) -> void { | ||
| recalculatePerPriorityState(priority); | ||
| }); | ||
| priority_set_.addMemberUpdateCb([this](uint32_t priority, const HostVector&, | ||
| const HostVector&) -> void { | ||
| recalculatePerPriorityState(priority, priority_set_, per_priority_load_, per_priority_health_); | ||
| }); | ||
| } | ||
|
|
||
| void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { | ||
| per_priority_load_.resize(priority_set_.hostSetsPerPriority().size()); | ||
| per_priority_health_.resize(priority_set_.hostSetsPerPriority().size()); | ||
| void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority, | ||
| const PrioritySet& priority_set, | ||
| PriorityLoad& per_priority_load, | ||
| std::vector<uint32_t>& per_priority_health) { | ||
| per_priority_load.resize(priority_set.hostSetsPerPriority().size()); | ||
| per_priority_health.resize(priority_set.hostSetsPerPriority().size()); | ||
|
|
||
| // Determine the health of the newly modified priority level. | ||
| // Health ranges from 0-100, and is the ratio of healthy hosts to total hosts, modified by the | ||
| // overprovisioning factor. | ||
| HostSet& host_set = *priority_set_.hostSetsPerPriority()[priority]; | ||
| per_priority_health_[priority] = 0; | ||
| HostSet& host_set = *priority_set.hostSetsPerPriority()[priority]; | ||
| per_priority_health[priority] = 0; | ||
| if (host_set.hosts().size() > 0) { | ||
| per_priority_health_[priority] = | ||
| per_priority_health[priority] = | ||
| std::min<uint32_t>(100, (host_set.overprovisioning_factor() * | ||
| host_set.healthyHosts().size() / host_set.hosts().size())); | ||
| } | ||
|
|
@@ -74,32 +78,32 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) { | |
| // 3 host sets with 20% / 20% / 10% health they will get 40% / 40% / 20% load to ensure total load | ||
| // adds up to 100. | ||
| const uint32_t total_health = std::min<uint32_t>( | ||
| std::accumulate(per_priority_health_.begin(), per_priority_health_.end(), 0), 100); | ||
| std::accumulate(per_priority_health.begin(), per_priority_health.end(), 0), 100); | ||
| if (total_health == 0) { | ||
| // Everything is terrible. Send all load to P=0. | ||
| // In this one case sumEntries(per_priority_load_) != 100 since we sinkhole all traffic in P=0. | ||
| per_priority_load_[0] = 100; | ||
| per_priority_load[0] = 100; | ||
| return; | ||
| } | ||
|
|
||
| size_t total_load = 100; | ||
| int32_t first_healthy_priority = -1; | ||
| for (size_t i = 0; i < per_priority_health_.size(); ++i) { | ||
| if (first_healthy_priority < 0 && per_priority_health_[i] > 0) { | ||
| for (size_t i = 0; i < per_priority_health.size(); ++i) { | ||
| if (first_healthy_priority < 0 && per_priority_health[i] > 0) { | ||
| first_healthy_priority = i; | ||
| } | ||
| // Now assign as much load as possible to the high priority levels and cease assigning load | ||
| // when total_load runs out. | ||
| per_priority_load_[i] = | ||
| std::min<uint32_t>(total_load, per_priority_health_[i] * 100 / total_health); | ||
| total_load -= per_priority_load_[i]; | ||
| per_priority_load[i] = | ||
| std::min<uint32_t>(total_load, per_priority_health[i] * 100 / total_health); | ||
| total_load -= per_priority_load[i]; | ||
| } | ||
|
|
||
| if (total_load != 0) { | ||
| ASSERT(first_healthy_priority != -1); | ||
| // Account for rounding errors by assigning it to the first healthy priority. | ||
|
||
| ASSERT(total_load < per_priority_load_.size()); | ||
| per_priority_load_[first_healthy_priority] += total_load; | ||
| ASSERT(total_load < per_priority_load.size()); | ||
| per_priority_load[first_healthy_priority] += total_load; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,11 +70,15 @@ class LoadBalancerBase : public LoadBalancer { | |
| // The priority-ordered set of hosts to use for load balancing. | ||
| const PrioritySet& priority_set_; | ||
|
|
||
| public: | ||
| // Called when a host set at the given priority level is updated. This updates | ||
| // per_priority_health_ for that priority level, and may update per_priority_load_ for all | ||
| // per_priority_health for that priority level, and may update per_priority_load for all | ||
| // priority levels. | ||
| void recalculatePerPriorityState(uint32_t priority); | ||
| void static recalculatePerPriorityState(uint32_t priority, const PrioritySet& priority_set, | ||
| PriorityLoad& priority_load, | ||
| std::vector<uint32_t>& per_priority_health); | ||
|
|
||
| protected: | ||
| // The percentage load (0-100) for each priority level | ||
| std::vector<uint32_t> per_priority_load_; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you meant to make these vectors public?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I'll fix |
||
| // The health (0-100) for each priority level. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| licenses(["notice"]) # Apache 2 | ||
|
|
||
| load( | ||
| "//bazel:envoy_build_system.bzl", | ||
| "envoy_cc_library", | ||
| "envoy_package", | ||
| ) | ||
|
|
||
| envoy_package() | ||
|
|
||
| envoy_cc_library( | ||
| name = "well_known_names", | ||
| hdrs = ["well_known_names.h"], | ||
| deps = [ | ||
| "//source/common/singleton:const_singleton", | ||
| ], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| licenses(["notice"]) # Apache 2 | ||
|
|
||
| load( | ||
| "//bazel:envoy_build_system.bzl", | ||
| "envoy_cc_library", | ||
| "envoy_package", | ||
| ) | ||
|
|
||
| envoy_package() | ||
|
|
||
| envoy_cc_library( | ||
| name = "other_priority_lib", | ||
| srcs = ["other_priority.cc"], | ||
| hdrs = ["other_priority.h"], | ||
| deps = [ | ||
| "//include/envoy/upstream:retry_interface", | ||
| "//source/common/upstream:load_balancer_lib", | ||
| ], | ||
| ) | ||
|
|
||
| envoy_cc_library( | ||
| name = "config", | ||
| srcs = ["config.cc"], | ||
| hdrs = ["config.h"], | ||
| deps = [ | ||
| ":other_priority_lib", | ||
| "//include/envoy/registry", | ||
| "//include/envoy/upstream:retry_interface", | ||
| "//source/common/protobuf", | ||
| "//source/extensions/retry/priority:well_known_names", | ||
| "@envoy_api//envoy/config/retry/other_priority:other_priority_cc", | ||
| ], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| #include "extensions/retry/priority/other_priority/config.h" | ||
|
|
||
| #include "envoy/config/retry/other_priority/other_priority_config.pb.validate.h" | ||
| #include "envoy/registry/registry.h" | ||
| #include "envoy/upstream/retry.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Extensions { | ||
| namespace Retry { | ||
| namespace Priority { | ||
|
|
||
| void OtherPriorityRetryPriorityFactory::createRetryPriority( | ||
| Upstream::RetryPriorityFactoryCallbacks& callbacks, const Protobuf::Message& config, | ||
| uint32_t max_retries) { | ||
| callbacks.addRetryPriority(std::make_shared<OtherPriorityRetryPriority>( | ||
| MessageUtil::downcastAndValidate< | ||
| const envoy::config::retry::other_priority::OtherPriorityConfig&>(config) | ||
| .update_frequency(), | ||
| max_retries)); | ||
| } | ||
|
|
||
| static Registry::RegisterFactory<OtherPriorityRetryPriorityFactory, Upstream::RetryPriorityFactory> | ||
| register_; | ||
|
|
||
| } // namespace Priority | ||
| } // namespace Retry | ||
| } // namespace Extensions | ||
| } // namespace Envoy |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| #pragma once | ||
|
|
||
| #include "envoy/upstream/retry.h" | ||
|
|
||
| #include "common/protobuf/protobuf.h" | ||
|
|
||
| #include "extensions/retry/priority/other_priority/other_priority.h" | ||
| #include "extensions/retry/priority/well_known_names.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Extensions { | ||
| namespace Retry { | ||
| namespace Priority { | ||
|
|
||
| class OtherPriorityRetryPriorityFactory : public Upstream::RetryPriorityFactory { | ||
| public: | ||
| void createRetryPriority(Upstream::RetryPriorityFactoryCallbacks& callbacks, | ||
| const Protobuf::Message& config, uint32_t max_retries) override; | ||
|
|
||
| std::string name() const override { | ||
| return RetryPriorityValues::get().PreviousPrioritiesRetryPriority; | ||
| } | ||
|
|
||
| ProtobufTypes::MessagePtr createEmptyConfigProto() override { | ||
| return ProtobufTypes::MessagePtr(new ::Envoy::ProtobufWkt::Empty()); | ||
| } | ||
| }; | ||
|
|
||
| } // namespace Priority | ||
| } // namespace Retry | ||
| } // namespace Extensions | ||
| } // namespace Envoy |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is fantastic. 5***** would read again :-)