Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions cmd/package-server-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"

"github.com/openshift/operator-framework-olm/pkg/leaderelection"
controllers "github.com/openshift/operator-framework-olm/pkg/package-server-manager"
//+kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -59,17 +60,20 @@ func run(cmd *cobra.Command, args []string) error {
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))
setupLog := ctrl.Log.WithName("setup")

restConfig := ctrl.GetConfigOrDie()
le := leaderelection.GetLeaderElectionConfig(setupLog, restConfig, !disableLeaderElection)

packageserverCSVFields := fields.Set{"metadata.name": name}
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), manager.Options{
mgr, err := ctrl.NewManager(restConfig, manager.Options{
Scheme: setupScheme(),
Namespace: namespace,
MetricsBindAddress: defaultMetricsPort,
LeaderElection: !disableLeaderElection,
LeaderElectionNamespace: namespace,
LeaderElectionID: leaderElectionConfigmapName,
RetryPeriod: timeDurationPtr(defaultRetryPeriod),
RenewDeadline: timeDurationPtr(defaultRenewDeadline),
LeaseDuration: timeDurationPtr(defaultLeaseDuration),
LeaseDuration: &le.LeaseDuration.Duration,
RenewDeadline: &le.RenewDeadline.Duration,
RetryPeriod: &le.RetryPeriod.Duration,
HealthProbeBindAddress: healthCheckAddr,
NewCache: cache.BuilderWithOptions(cache.Options{
SelectorsByObject: cache.SelectorsByObject{
Expand Down
16 changes: 0 additions & 16 deletions cmd/package-server-manager/util.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package main

import (
"time"

configv1 "github.com/openshift/api/config/v1"
olmv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"

Expand All @@ -11,20 +9,6 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
)

const (
// Note: In order for SNO to GA, controllers need to handle ~60s of API server
// disruptions when attempting to get and sustain leader election:
// - https://github.com/openshift/library-go/pull/1104#discussion_r649313822
// - https://bugzilla.redhat.com/show_bug.cgi?id=1985697
defaultRetryPeriod = 30 * time.Second
defaultRenewDeadline = 60 * time.Second
defaultLeaseDuration = 90 * time.Second
)

func timeDurationPtr(t time.Duration) *time.Duration {
return &t
}

func setupScheme() *runtime.Scheme {
scheme := runtime.NewScheme()
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
Expand Down
87 changes: 87 additions & 0 deletions pkg/leaderelection/leaderelection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package leaderelection

import (
"context"
"time"

"github.com/go-logr/logr"

configv1 "github.com/openshift/api/config/v1"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
infraResourceName = "cluster"

// Defaults follow conventions
// https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#high-availability
// Impl Calculations: https://github.com/openshift/library-go/commit/7e7d216ed91c3119800219c9194e5e57113d059a
defaultLeaseDuration = 137 * time.Second
defaultRenewDeadline = 107 * time.Second
defaultRetryPeriod = 26 * time.Second

// Default leader election for SNO environments
// Impl Calculations:
// https://github.com/openshift/library-go/commit/2612981f3019479805ac8448b997266fc07a236a#diff-61dd95c7fd45fa18038e825205fbfab8a803f1970068157608b6b1e9e6c27248R127
defaultSingleNodeLeaseDuration = 270 * time.Second
defaultSingleNodeRenewDeadline = 240 * time.Second
defaultSingleNodeRetryPeriod = 60 * time.Second
)

var (
defaultLeaderElectionConfig = configv1.LeaderElection{
LeaseDuration: metav1.Duration{Duration: defaultLeaseDuration},
RenewDeadline: metav1.Duration{Duration: defaultRenewDeadline},
RetryPeriod: metav1.Duration{Duration: defaultRetryPeriod},
}
)

func GetLeaderElectionConfig(log logr.Logger, restConfig *rest.Config, enabled bool) (defaultConfig configv1.LeaderElection) {
client, err := client.New(restConfig, client.Options{})
if err != nil {
log.Error(err, "unable to create client, using HA cluster values for leader election")
return defaultLeaderElectionConfig
}
configv1.AddToScheme(client.Scheme())
return getLeaderElectionConfig(log, client, enabled)
}

func getLeaderElectionConfig(log logr.Logger, client client.Client, enabled bool) (config configv1.LeaderElection) {
config = defaultLeaderElectionConfig
config.Disable = !enabled
if enabled {
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Second*3))
defer cancel()
infra, err := getClusterInfraStatus(ctx, client)
if err != nil {
log.Error(err, "unable to get cluster infrastructure status, using HA cluster values for leader election")
return
}
if infra != nil && infra.ControlPlaneTopology == configv1.SingleReplicaTopologyMode {
return leaderElectionSNOConfig(config)
}
}
return
}

func leaderElectionSNOConfig(config configv1.LeaderElection) configv1.LeaderElection {
ret := *(&config).DeepCopy()
ret.LeaseDuration.Duration = defaultSingleNodeLeaseDuration
ret.RenewDeadline.Duration = defaultSingleNodeRenewDeadline
ret.RetryPeriod.Duration = defaultSingleNodeRetryPeriod
return ret
}

// Retrieve the cluster status, used to determine if we should use different leader election.
func getClusterInfraStatus(ctx context.Context, client client.Client) (*configv1.InfrastructureStatus, error) {
infra := &configv1.Infrastructure{}
err := client.Get(ctx, types.NamespacedName{Name: infraResourceName}, infra)
if err != nil {
return nil, err
}
return &infra.Status, nil
}
102 changes: 102 additions & 0 deletions pkg/leaderelection/leaderelection_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package leaderelection

import (
"reflect"
"testing"

configv1 "github.com/openshift/api/config/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"

ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

func TestGetLeaderElectionConfig(t *testing.T) {
sch := runtime.NewScheme()
configv1.AddToScheme(sch)
testCases := []struct {
desc string
enabled bool
clusterInfra configv1.Infrastructure
expected configv1.LeaderElection
}{
{
desc: "single node leader election values when ControlPlaneTopology is SingleReplicaTopologyMode",
enabled: true,
clusterInfra: configv1.Infrastructure{
ObjectMeta: metav1.ObjectMeta{Name: infraResourceName},
Status: configv1.InfrastructureStatus{
ControlPlaneTopology: configv1.SingleReplicaTopologyMode,
}},
expected: configv1.LeaderElection{
Disable: false,
LeaseDuration: metav1.Duration{
Duration: defaultSingleNodeLeaseDuration,
},
RenewDeadline: metav1.Duration{
Duration: defaultSingleNodeRenewDeadline,
},
RetryPeriod: metav1.Duration{
Duration: defaultSingleNodeRetryPeriod,
},
},
},
{
desc: "ha leader election values when ControlPlaneTopology is HighlyAvailableTopologyMode",
enabled: true,
clusterInfra: configv1.Infrastructure{
ObjectMeta: metav1.ObjectMeta{Name: infraResourceName},
Status: configv1.InfrastructureStatus{
ControlPlaneTopology: configv1.HighlyAvailableTopologyMode,
}},
expected: configv1.LeaderElection{
Disable: false,
LeaseDuration: metav1.Duration{
Duration: defaultLeaseDuration,
},
RenewDeadline: metav1.Duration{
Duration: defaultRenewDeadline,
},
RetryPeriod: metav1.Duration{
Duration: defaultRetryPeriod,
},
},
},
{
desc: "when disabled the default HA values should be returned",
enabled: false,
clusterInfra: configv1.Infrastructure{
ObjectMeta: metav1.ObjectMeta{Name: infraResourceName},
Status: configv1.InfrastructureStatus{
ControlPlaneTopology: configv1.SingleReplicaTopologyMode,
}},
expected: configv1.LeaderElection{
Disable: true,
LeaseDuration: metav1.Duration{
Duration: defaultLeaseDuration,
},
RenewDeadline: metav1.Duration{
Duration: defaultRenewDeadline,
},
RetryPeriod: metav1.Duration{
Duration: defaultRetryPeriod,
},
},
},
}

for _, tC := range testCases {
t.Run(tC.desc, func(t *testing.T) {
client := fake.NewClientBuilder().
WithRuntimeObjects(&tC.clusterInfra).WithScheme(sch).Build()

setupLog := ctrl.Log.WithName("leaderelection_config_testing")

result := getLeaderElectionConfig(setupLog, client, tC.enabled)
if !reflect.DeepEqual(result, tC.expected) {
t.Errorf("expected %+v but got %+v", tC.expected, result)
}
})
}
}