diff --git a/pkg/config/clusterstatus/clusterstatus.go b/pkg/config/clusterstatus/clusterstatus.go new file mode 100644 index 0000000000..dbffe62376 --- /dev/null +++ b/pkg/config/clusterstatus/clusterstatus.go @@ -0,0 +1,29 @@ +package clusterstatus + +import ( + "context" + "fmt" + + configv1 "github.com/openshift/api/config/v1" + openshiftcorev1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "k8s.io/client-go/rest" +) + +const infraResourceName = "cluster" + +func GetClusterInfraStatus(ctx context.Context, restClient *rest.Config) (*configv1.InfrastructureStatus, error) { + client, err := openshiftcorev1.NewForConfig(restClient) + if err != nil { + return nil, err + } + infra, err := client.Infrastructures().Get(ctx, infraResourceName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + if infra == nil { + return nil, fmt.Errorf("getting resource Infrastructure (name: %s) succeeded but object was nil", infraResourceName) + } + return &infra.Status, nil +} diff --git a/pkg/config/leaderelection/leaderelection.go b/pkg/config/leaderelection/leaderelection.go index fa5d8d79f7..5cec68257b 100644 --- a/pkg/config/leaderelection/leaderelection.go +++ b/pkg/config/leaderelection/leaderelection.go @@ -124,3 +124,31 @@ func LeaderElectionDefaulting(config configv1.LeaderElection, defaultNamespace, } return ret } + +// LeaderElectionSNOConfig uses the formula derived in LeaderElectionDefaulting with increased +// retry period and lease duration for SNO clusters that have limited resources. +// This method does not respect the passed in LeaderElection config and the returned object will have values +// that are overridden with SNO environments in mind. +// This method should only be called when running in an SNO Cluster. +func LeaderElectionSNOConfig(config configv1.LeaderElection) configv1.LeaderElection { + + // We want to make sure we respect a 30s clock skew as well as a 4 retry attempt with out making + // leader election ineffectual while still having some small performance gain by limiting calls against + // the api server. + + // 1. clock skew tolerance is leaseDuration-renewDeadline == 30s + // 2. kube-apiserver downtime tolerance is == 180s + // lastRetry=floor(renewDeadline/retryPeriod)*retryPeriod == 240 + // downtimeTolerance = lastRetry-retryPeriod == 180s + // 3. worst non-graceful lease acquisition is leaseDuration+retryPeriod == 330s + // 4. worst graceful lease acquisition is retryPeriod == 60s + + ret := *(&config).DeepCopy() + // 270-240 = 30s of clock skew tolerance + ret.LeaseDuration.Duration = 270 * time.Second + // 240/60 = 4 retries attempts before leader is lost. + ret.RenewDeadline.Duration = 240 * time.Second + // With 60s retry config we aim to maintain 30s of clock skew as well as 4 retry attempts. + ret.RetryPeriod.Duration = 60 * time.Second + return ret +} diff --git a/pkg/config/leaderelection/leaderelection_test.go b/pkg/config/leaderelection/leaderelection_test.go new file mode 100644 index 0000000000..84a80fcd1f --- /dev/null +++ b/pkg/config/leaderelection/leaderelection_test.go @@ -0,0 +1,178 @@ +package leaderelection + +import ( + "reflect" + "testing" + "time" + + configv1 "github.com/openshift/api/config/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestLeaderElectionSNOConfig(t *testing.T) { + testCases := []struct { + desc string + inputConfig configv1.LeaderElection + expectedConfig configv1.LeaderElection + }{ + { + desc: "should not alter disable flag when true", + inputConfig: configv1.LeaderElection{ + Disable: true, + }, + expectedConfig: configv1.LeaderElection{ + Disable: true, + LeaseDuration: metav1.Duration{Duration: 270 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 240 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 60 * time.Second}, + }, + }, + { + desc: "should not alter disable flag when false", + inputConfig: configv1.LeaderElection{ + Disable: false, + }, + expectedConfig: configv1.LeaderElection{ + Disable: false, + LeaseDuration: metav1.Duration{Duration: 270 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 240 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 60 * time.Second}, + }, + }, + { + desc: "should change durations when none are provided", + inputConfig: configv1.LeaderElection{}, + expectedConfig: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 270 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 240 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 60 * time.Second}, + }, + }, + { + desc: "should change durations for sno configs", + inputConfig: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + expectedConfig: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 270 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 240 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 60 * time.Second}, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + resultConfig := LeaderElectionSNOConfig(tc.inputConfig) + if !reflect.DeepEqual(tc.expectedConfig, resultConfig) { + t.Errorf("expected %#v, got %#v", tc.expectedConfig, resultConfig) + } + }) + } +} + +func TestLeaderElectionDefaulting(t *testing.T) { + testCases := []struct { + desc string + defaultNamespace string + defaultName string + inputConfig configv1.LeaderElection + expectedConfig configv1.LeaderElection + }{ + { + desc: "should not alter disable flag when true", + inputConfig: configv1.LeaderElection{ + Disable: true, + }, + expectedConfig: configv1.LeaderElection{ + Disable: true, + LeaseDuration: metav1.Duration{Duration: 137 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 107 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 26 * time.Second}, + }, + }, + { + desc: "should not alter disable flag when false", + inputConfig: configv1.LeaderElection{ + Disable: false, + }, + expectedConfig: configv1.LeaderElection{ + Disable: false, + LeaseDuration: metav1.Duration{Duration: 137 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 107 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 26 * time.Second}, + }, + }, + { + desc: "should change durations when none are provided", + inputConfig: configv1.LeaderElection{}, + expectedConfig: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 137 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 107 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 26 * time.Second}, + }, + }, + { + desc: "should use default name and namespace when none is provided", + inputConfig: configv1.LeaderElection{}, + defaultName: "new-default-name", + defaultNamespace: "new-default-namespace", + expectedConfig: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 137 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 107 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 26 * time.Second}, + Name: "new-default-name", + Namespace: "new-default-namespace", + }, + }, + { + desc: "should not alter durations when values are provided", + inputConfig: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + expectedConfig: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + }, + { + desc: "should not alter when durations, name and namespace are provided", + defaultName: "new-default-name", + defaultNamespace: "new-default-namespace", + inputConfig: configv1.LeaderElection{ + Name: "original-name", + Namespace: "original-namespace", + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + expectedConfig: configv1.LeaderElection{ + Name: "original-name", + Namespace: "original-namespace", + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + resultConfig := LeaderElectionDefaulting(tc.inputConfig, tc.defaultNamespace, tc.defaultName) + + // When testing in clusters, namespace is read from /var/run/secrets/kubernetes.io/serviceaccount/namespace in LeaderElectionDefaulting if none is provided. + // We configure expectedConfig.Namespace to equal resultConfig.Namespace if no default or input namespace is provided + // so we use the dynamic namespace read in from the environment + if tc.defaultNamespace == "" && tc.inputConfig.Namespace == "" { + tc.expectedConfig.Namespace = resultConfig.Namespace + } + + if !reflect.DeepEqual(tc.expectedConfig, resultConfig) { + t.Errorf("expected %#v, got %#v", tc.expectedConfig, resultConfig) + } + }) + } +} diff --git a/pkg/controller/controllercmd/builder.go b/pkg/controller/controllercmd/builder.go index 08c829c22b..1e70728723 100644 --- a/pkg/controller/controllercmd/builder.go +++ b/pkg/controller/controllercmd/builder.go @@ -13,6 +13,7 @@ import ( operatorv1alpha1 "github.com/openshift/api/operator/v1alpha1" "github.com/openshift/library-go/pkg/authorization/hardcodedauthorizer" "github.com/openshift/library-go/pkg/config/client" + "github.com/openshift/library-go/pkg/config/clusterstatus" "github.com/openshift/library-go/pkg/config/configdefaults" leaderelectionconverter "github.com/openshift/library-go/pkg/config/leaderelection" "github.com/openshift/library-go/pkg/config/serving" @@ -89,6 +90,10 @@ type ControllerBuilder struct { // This stub exists for unit test where we can check if the graceful termination work properly. // Default function will klog.Warning(args) and os.Exit(1). nonZeroExitFn func(args ...interface{}) + + // Keep track if we defaulted leader election, used to make sure we don't stomp on the users intent for leader election + // We use this flag to determine at runtime if we can alter leader election for SNO configurations + userExplicitlySetLeaderElectionValues bool } // NewController returns a builder struct for constructing the command you want to run @@ -142,6 +147,11 @@ func (b *ControllerBuilder) WithLeaderElection(leaderElection configv1.LeaderEle return b } + // Set flag that SNO leader election configs can be used since user provided no timing configs + b.userExplicitlySetLeaderElectionValues = leaderElection.LeaseDuration.Duration != 0 || + leaderElection.RenewDeadline.Duration != 0 || + leaderElection.RetryPeriod.Duration != 0 + defaulted := leaderelectionconverter.LeaderElectionDefaulting(leaderElection, defaultNamespace, defaultName) b.leaderElection = &defaulted return b @@ -304,6 +314,17 @@ func (b *ControllerBuilder) Run(ctx context.Context, config *unstructured.Unstru return nil } + if !b.userExplicitlySetLeaderElectionValues { + infraStatus, err := clusterstatus.GetClusterInfraStatus(ctx, clientConfig) + if err != nil || infraStatus == nil { + eventRecorder.Warningf("ClusterInfrastructureStatus", "unable to get cluster infrastructure status, using HA cluster values for leader election: %v", err) + klog.Warningf("unable to get cluster infrastructure status, using HA cluster values for leader election: %v", err) + } else { + snoLeaderElection := infraStatusTopologyLeaderElection(infraStatus, *b.leaderElection) + b.leaderElection = &snoLeaderElection + } + } + // ensure blocking TCP connections don't block the leader election leaderConfig := rest.CopyConfig(protoConfig) leaderConfig.Timeout = b.leaderElection.RenewDeadline.Duration @@ -370,3 +391,17 @@ func (b *ControllerBuilder) getClientConfig() (*rest.Config, error) { return client.GetKubeConfigOrInClusterConfig(kubeconfig, b.clientOverrides) } + +func infraStatusTopologyLeaderElection(infraStatus *configv1.InfrastructureStatus, original configv1.LeaderElection) configv1.LeaderElection { + // if we can't determine the infra toplogy, return original + if infraStatus == nil { + return original + } + + // If we are running in a SingleReplicaTopologyMode and leader election is not disabled, configure leader election for SNO Toplogy + if infraStatus.ControlPlaneTopology == configv1.SingleReplicaTopologyMode && !original.Disable { + klog.Info("detected SingleReplicaTopologyMode, the original leader election has been altered for the default SingleReplicaToplogy") + return leaderelectionconverter.LeaderElectionSNOConfig(original) + } + return original +} diff --git a/pkg/controller/controllercmd/builder_test.go b/pkg/controller/controllercmd/builder_test.go index d180d35e0b..64ab488e29 100644 --- a/pkg/controller/controllercmd/builder_test.go +++ b/pkg/controller/controllercmd/builder_test.go @@ -3,11 +3,14 @@ package controllercmd import ( "context" "fmt" + "reflect" "strings" "testing" "time" + configv1 "github.com/openshift/api/config/v1" "github.com/openshift/library-go/pkg/operator/events/eventstesting" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func TestControllerBuilder_getOnStartedLeadingFunc(t *testing.T) { @@ -184,3 +187,67 @@ func TestControllerBuilder_OnLeadingFunc_NonZeroExit(t *testing.T) { t.Fatal("unexpected timeout while terminating") } } + +func TestInfraStatusTopologyLeaderElection(t *testing.T) { + testCases := []struct { + desc string + infra *configv1.InfrastructureStatus + original configv1.LeaderElection + expected configv1.LeaderElection + }{ + { + desc: "should not set SNO config when infra is nil", + infra: nil, + original: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + expected: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + }, + { + desc: "should not set SNO config when infra is HighlyAvailableTopologyMode", + infra: &configv1.InfrastructureStatus{ + ControlPlaneTopology: configv1.HighlyAvailableTopologyMode, + }, + original: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + expected: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + }, + { + desc: "should set SNO leader election config when SingleReplicaToplogy Controlplane", + infra: &configv1.InfrastructureStatus{ + ControlPlaneTopology: configv1.SingleReplicaTopologyMode, + }, + original: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 60 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 40 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 20 * time.Second}, + }, + expected: configv1.LeaderElection{ + LeaseDuration: metav1.Duration{Duration: 270 * time.Second}, + RenewDeadline: metav1.Duration{Duration: 240 * time.Second}, + RetryPeriod: metav1.Duration{Duration: 60 * time.Second}, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + leConfig := infraStatusTopologyLeaderElection(tc.infra, tc.original) + if !reflect.DeepEqual(tc.expected, leConfig) { + t.Errorf("expected %#v, got %#v", tc.expected, leConfig) + } + }) + } +}