-
Notifications
You must be signed in to change notification settings - Fork 153
Introduce bootstrap scaling strategies #449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
openshift-merge-robot
merged 2 commits into
openshift:master
from
ironcladlou:adaptive-bootstrap-ha
Dec 6, 2020
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| package ceohelpers | ||
|
|
||
| import ( | ||
| "fmt" | ||
|
|
||
| "github.com/openshift/library-go/pkg/operator/v1helpers" | ||
| "k8s.io/apimachinery/pkg/api/errors" | ||
| corev1listers "k8s.io/client-go/listers/core/v1" | ||
| "k8s.io/klog/v2" | ||
|
|
||
| "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" | ||
| ) | ||
|
|
||
| // BootstrapScalingStrategy describes the invariants which will be enforced when | ||
| // scaling the etcd cluster. | ||
| type BootstrapScalingStrategy string | ||
|
|
||
| const ( | ||
| // HAScalingStrategy means the etcd cluster will only be scaled up when at least | ||
| // 3 node are available so that HA is enforced at all times. This rule applies | ||
| // during bootstrapping and the steady state. | ||
| // | ||
| // This is the default strategy. | ||
| HAScalingStrategy BootstrapScalingStrategy = "HAScalingStrategy" | ||
|
|
||
| // DelayedHAScalingStrategy means that during bootstrapping, the etcd cluster will | ||
| // be allowed to scale when at least 2 members are available (which is not HA), | ||
| // but after bootstrapping any further scaling will require 3 nodes in the same | ||
| // way as HAScalingStrategy. | ||
| // | ||
| // This strategy is selected by adding the `openshift.io/delayed-ha-bootstrap` | ||
| // annotation to the openshift-etcd namesapce. | ||
| DelayedHAScalingStrategy BootstrapScalingStrategy = "DelayedHAScalingStrategy" | ||
|
|
||
| // UnsafeScalingStrategy means scaling will occur without regards to nodes and | ||
| // any effect on quorum. Use of this strategy isn't officially tested or supported, | ||
| // but is made available for ad-hoc use. | ||
| // | ||
| // This strategy is selected by setting unsupportedConfigOverrides on the | ||
| // operator config. | ||
| UnsafeScalingStrategy BootstrapScalingStrategy = "UnsafeScalingStrategy" | ||
| ) | ||
|
|
||
| const ( | ||
| // DelayedHABootstrapScalingStrategyAnnotation is an annotation on the openshift-etcd | ||
| // namespace which if present indicates the DelayedHAScalingStrategy strategy | ||
| // should be used. | ||
| DelayedHABootstrapScalingStrategyAnnotation = "openshift.io/delayed-ha-bootstrap" | ||
| ) | ||
|
|
||
| // GetBootstrapScalingStrategy determines the scaling strategy to use. | ||
| func GetBootstrapScalingStrategy(staticPodClient v1helpers.StaticPodOperatorClient, namespaceLister corev1listers.NamespaceLister) (BootstrapScalingStrategy, error) { | ||
| var strategy BootstrapScalingStrategy | ||
|
|
||
| operatorSpec, _, _, err := staticPodClient.GetStaticPodOperatorState() | ||
| if err != nil { | ||
| return strategy, fmt.Errorf("failed to get operator state: %w", err) | ||
| } | ||
|
|
||
| isUnsupportedUnsafeEtcd, err := isUnsupportedUnsafeEtcd(operatorSpec) | ||
ironcladlou marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if err != nil { | ||
| return strategy, fmt.Errorf("couldn't determine etcd unsupported override status, assuming default HA scaling strategy: %w", err) | ||
| } | ||
|
|
||
| etcdNamespace, err := namespaceLister.Get(operatorclient.TargetNamespace) | ||
| if err != nil { | ||
| return strategy, fmt.Errorf("failed to get %s namespace: %w", operatorclient.TargetNamespace, err) | ||
| } | ||
| _, hasDelayedHAAnnotation := etcdNamespace.Annotations[DelayedHABootstrapScalingStrategyAnnotation] | ||
|
|
||
| switch { | ||
| case isUnsupportedUnsafeEtcd: | ||
| strategy = UnsafeScalingStrategy | ||
| case hasDelayedHAAnnotation: | ||
| strategy = DelayedHAScalingStrategy | ||
| default: | ||
| strategy = HAScalingStrategy | ||
| } | ||
| return strategy, nil | ||
| } | ||
|
|
||
| // CheckSafeToScaleCluster is used to implement the bootstrap scaling strategy invariants. | ||
| // This function returns nil if cluster conditions are such that it's safe to scale | ||
| // the etcd cluster based on the scaling strategy in use, and otherwise will return | ||
| // an error explaining why it's unsafe to scale. | ||
| func CheckSafeToScaleCluster(configmapLister corev1listers.ConfigMapLister, staticPodClient v1helpers.StaticPodOperatorClient, namespaceLister corev1listers.NamespaceLister) error { | ||
| bootstrapComplete, err := IsBootstrapComplete(configmapLister, staticPodClient) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to determine bootstrap status: %w", err) | ||
| } | ||
|
|
||
| _, operatorStatus, _, err := staticPodClient.GetStaticPodOperatorState() | ||
| if err != nil { | ||
| return fmt.Errorf("failed to get operator state: %w", err) | ||
| } | ||
|
|
||
| scalingStrategy, err := GetBootstrapScalingStrategy(staticPodClient, namespaceLister) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to get bootstrap scaling strategy: %w", err) | ||
| } | ||
|
|
||
| var minimumNodes int | ||
| switch scalingStrategy { | ||
ironcladlou marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| case HAScalingStrategy: | ||
| minimumNodes = 3 | ||
| case UnsafeScalingStrategy: | ||
| minimumNodes = 1 | ||
| case DelayedHAScalingStrategy: | ||
| if bootstrapComplete { | ||
| minimumNodes = 3 | ||
| } else { | ||
| minimumNodes = 2 | ||
| } | ||
| default: | ||
| return fmt.Errorf("unrecognized scaling strategy %q", scalingStrategy) | ||
| } | ||
|
|
||
| nodeCount := len(operatorStatus.NodeStatuses) | ||
| if nodeCount < minimumNodes { | ||
| return fmt.Errorf("%d nodes are required, but only %d are available", minimumNodes, nodeCount) | ||
| } | ||
|
|
||
| klog.V(4).Infof("node count %d satisfies minimum of %d required by the %s bootstrap scaling strategy", nodeCount, minimumNodes, scalingStrategy) | ||
| return nil | ||
| } | ||
|
|
||
| // IsBootstrapComplete returns true if bootstrap has completed. | ||
| func IsBootstrapComplete(configMapClient corev1listers.ConfigMapLister, staticPodClient v1helpers.StaticPodOperatorClient) (bool, error) { | ||
| // do a cheap check to see if the annotation is already gone. | ||
| // check to see if bootstrapping is complete | ||
| bootstrapFinishedConfigMap, err := configMapClient.ConfigMaps("kube-system").Get("bootstrap") | ||
| if err != nil { | ||
| if errors.IsNotFound(err) { | ||
| // If the resource was deleted (e.g. by an admin) after bootstrap is actually complete, | ||
| // this is a false negative. | ||
| klog.V(4).Infof("bootstrap considered incomplete because the kube-system/bootstrap configmap wasn't found") | ||
| return false, nil | ||
| } | ||
| // We don't know, give up quickly. | ||
| return false, fmt.Errorf("failed to get configmap %s/%s: %w", "kube-system", "bootstrap", err) | ||
| } | ||
|
|
||
| if status, ok := bootstrapFinishedConfigMap.Data["status"]; !ok || status != "complete" { | ||
| // do nothing, not torn down | ||
| klog.V(4).Infof("bootstrap considered incomplete because status is %q", status) | ||
| return false, nil | ||
| } | ||
|
|
||
| // now run check to stability of revisions | ||
| _, status, _, err := staticPodClient.GetStaticPodOperatorState() | ||
| if err != nil { | ||
| return false, fmt.Errorf("failed to get static pod operator state: %w", err) | ||
| } | ||
| if status.LatestAvailableRevision == 0 { | ||
| return false, nil | ||
| } | ||
| for _, curr := range status.NodeStatuses { | ||
| if curr.CurrentRevision != status.LatestAvailableRevision { | ||
| klog.V(4).Infof("bootstrap considered incomplete because revision %d is still in progress", status.LatestAvailableRevision) | ||
| return false, nil | ||
| } | ||
| } | ||
| return true, nil | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.