diff --git a/controller/appcontroller.go b/controller/appcontroller.go index 44d1a49398cf1..27a7e911d6d12 100644 --- a/controller/appcontroller.go +++ b/controller/appcontroller.go @@ -2103,9 +2103,7 @@ func (ctrl *ApplicationController) autoSync(app *appv1.Application, syncStatus * InitiatedBy: appv1.OperationInitiator{Automated: true}, Retry: appv1.RetryStrategy{Limit: 5}, } - if app.Status.OperationState != nil && app.Status.OperationState.Operation.Sync != nil { - op.Sync.SelfHealAttemptsCount = app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount - } + if app.Spec.SyncPolicy.Retry != nil { op.Retry = *app.Spec.SyncPolicy.Retry } @@ -2121,8 +2119,18 @@ func (ctrl *ApplicationController) autoSync(app *appv1.Application, syncStatus * } logCtx.Infof("Skipping auto-sync: most recent sync already to %s", desiredCommitSHA) return nil, 0 - } else if alreadyAttempted && selfHeal { - if shouldSelfHeal, retryAfter := ctrl.shouldSelfHeal(app); shouldSelfHeal { + } else if selfHeal { + shouldSelfHeal, retryAfter := ctrl.shouldSelfHeal(app, alreadyAttempted) + if app.Status.OperationState != nil && app.Status.OperationState.Operation.Sync != nil { + op.Sync.SelfHealAttemptsCount = app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount + } + + if alreadyAttempted { + if !shouldSelfHeal { + logCtx.Infof("Skipping auto-sync: already attempted sync to %s with timeout %v (retrying in %v)", desiredCommitSHA, ctrl.selfHealTimeout, retryAfter) + ctrl.requestAppRefresh(app.QualifiedName(), CompareWithLatest.Pointer(), &retryAfter) + return nil, 0 + } op.Sync.SelfHealAttemptsCount++ for _, resource := range resources { if resource.Status != appv1.SyncStatusCodeSynced { @@ -2133,10 +2141,6 @@ func (ctrl *ApplicationController) autoSync(app *appv1.Application, syncStatus * }) } } - } else { - logCtx.Infof("Skipping auto-sync: already attempted sync to %s with timeout %v (retrying in %v)", desiredCommitSHA, ctrl.selfHealTimeout, retryAfter) - ctrl.requestAppRefresh(app.QualifiedName(), CompareWithLatest.Pointer(), &retryAfter) - return nil, 0 } } ts.AddCheckpoint("already_attempted_check_ms") @@ -2220,11 +2224,16 @@ func alreadyAttemptedSync(app *appv1.Application, commitSHA string, commitSHAsMS } } -func (ctrl *ApplicationController) shouldSelfHeal(app *appv1.Application) (bool, time.Duration) { +func (ctrl *ApplicationController) shouldSelfHeal(app *appv1.Application, alreadyAttempted bool) (bool, time.Duration) { if app.Status.OperationState == nil { return true, time.Duration(0) } + // Reset counter if the prior sync was successful OR if the revision has changed + if !alreadyAttempted || app.Status.Sync.Status == appv1.SyncStatusCodeSynced { + app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount = 0 + } + var retryAfter time.Duration if ctrl.selfHealBackOff == nil { if app.Status.OperationState.FinishedAt == nil { diff --git a/controller/appcontroller_test.go b/controller/appcontroller_test.go index e627f37bd82a8..ecf5c3f4fe6e3 100644 --- a/controller/appcontroller_test.go +++ b/controller/appcontroller_test.go @@ -2548,29 +2548,84 @@ func TestSelfHealExponentialBackoff(t *testing.T) { testCases := []struct { attempts int64 + expectedAttempts int64 finishedAt *metav1.Time expectedDuration time.Duration shouldSelfHeal bool + alreadyAttempted bool + syncStatus v1alpha1.SyncStatusCode }{{ attempts: 0, finishedAt: ptr.To(metav1.Now()), expectedDuration: 0, shouldSelfHeal: true, + alreadyAttempted: true, + expectedAttempts: 0, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, }, { attempts: 1, finishedAt: ptr.To(metav1.Now()), expectedDuration: 2 * time.Second, shouldSelfHeal: false, + alreadyAttempted: true, + expectedAttempts: 1, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, }, { attempts: 2, finishedAt: ptr.To(metav1.Now()), expectedDuration: 6 * time.Second, shouldSelfHeal: false, + alreadyAttempted: true, + expectedAttempts: 2, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, }, { attempts: 3, finishedAt: nil, expectedDuration: 18 * time.Second, shouldSelfHeal: false, + alreadyAttempted: true, + expectedAttempts: 3, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, + }, { + attempts: 4, + finishedAt: nil, + expectedDuration: 54 * time.Second, + shouldSelfHeal: false, + alreadyAttempted: true, + expectedAttempts: 4, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, + }, { + attempts: 5, + finishedAt: nil, + expectedDuration: 120 * time.Second, + shouldSelfHeal: false, + alreadyAttempted: true, + expectedAttempts: 5, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, + }, { + attempts: 6, + finishedAt: nil, + expectedDuration: 120 * time.Second, + shouldSelfHeal: false, + alreadyAttempted: true, + expectedAttempts: 6, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, + }, { + attempts: 6, + finishedAt: nil, + expectedDuration: 0, + shouldSelfHeal: true, + alreadyAttempted: false, + expectedAttempts: 0, + syncStatus: v1alpha1.SyncStatusCodeOutOfSync, + }, { + attempts: 6, + finishedAt: nil, + expectedDuration: 0, + shouldSelfHeal: true, + alreadyAttempted: true, + expectedAttempts: 0, + syncStatus: v1alpha1.SyncStatusCodeSynced, }} for i := range testCases { @@ -2578,8 +2633,10 @@ func TestSelfHealExponentialBackoff(t *testing.T) { t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount = tc.attempts app.Status.OperationState.FinishedAt = tc.finishedAt - ok, duration := ctrl.shouldSelfHeal(app) + app.Status.Sync.Status = tc.syncStatus + ok, duration := ctrl.shouldSelfHeal(app, tc.alreadyAttempted) require.Equal(t, ok, tc.shouldSelfHeal) + require.Equal(t, tc.expectedAttempts, app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount) assertDurationAround(t, tc.expectedDuration, duration) }) }