diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 7e3ecf268695..d87c2d9fa617 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -5,6 +5,7 @@ import ( "net/url" "runtime" "strings" + "time" "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/cluster/managed" @@ -13,6 +14,7 @@ import ( "github.com/k3s-io/kine/pkg/endpoint" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/util/wait" utilsnet "k8s.io/utils/net" ) @@ -107,11 +109,14 @@ func (c *Cluster) Start(ctx context.Context) (<-chan struct{}, error) { } if !c.config.EtcdDisableSnapshots { - if err := c.managedDB.ReconcileSnapshotData(ctx); err != nil { - logrus.Errorf("Failed to record snapshots for cluster: %v", err) - } + wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + err := c.managedDB.ReconcileSnapshotData(ctx) + if err != nil { + logrus.Errorf("Failed to record snapshots for cluster: %v", err) + } + return err == nil, nil + }) } - return default: runtime.Gosched() diff --git a/pkg/etcd/snapshot_controller.go b/pkg/etcd/snapshot_controller.go index 295f0117e43e..d4b22755e03f 100644 --- a/pkg/etcd/snapshot_controller.go +++ b/pkg/etcd/snapshot_controller.go @@ -33,6 +33,13 @@ const ( var ( snapshotConfigMapName = version.Program + "-etcd-snapshots" errNotReconciled = errors.New("no nodes have reconciled ETCDSnapshotFile resources") + reconcileBackoff = wait.Backoff{ + Steps: 9, + Duration: 10 * time.Millisecond, + Factor: 3.0, + Jitter: 0.1, + Cap: 30 * time.Second, + } ) type etcdSnapshotHandler struct { @@ -62,7 +69,7 @@ func (e *etcdSnapshotHandler) sync(key string, esf *apisv1.ETCDSnapshotFile) (*a err := e.reconcile() if err == errNotReconciled { logrus.Debugf("Failed to reconcile snapshot ConfigMap: %v, requeuing", err) - e.snapshots.Enqueue(key) + e.snapshots.EnqueueAfter(key, reconcileBackoff.Step()) return nil, nil } return nil, err