Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ func NewCommand() *cobra.Command {
selfHealBackoffTimeoutSeconds int
selfHealBackoffFactor int
selfHealBackoffCapSeconds int
selfHealBackoffCooldownSeconds int
syncTimeout int
statusProcessors int
operationProcessors int
Expand Down Expand Up @@ -201,6 +202,7 @@ func NewCommand() *cobra.Command {
time.Duration(appResyncJitter)*time.Second,
time.Duration(selfHealTimeoutSeconds)*time.Second,
selfHealBackoff,
time.Duration(selfHealBackoffCooldownSeconds)*time.Second,
time.Duration(syncTimeout)*time.Second,
time.Duration(repoErrorGracePeriod)*time.Second,
metricsPort,
Expand Down Expand Up @@ -272,6 +274,7 @@ func NewCommand() *cobra.Command {
command.Flags().IntVar(&selfHealBackoffTimeoutSeconds, "self-heal-backoff-timeout-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_TIMEOUT_SECONDS", 2, 0, math.MaxInt32), "Specifies initial timeout of exponential backoff between self heal attempts")
command.Flags().IntVar(&selfHealBackoffFactor, "self-heal-backoff-factor", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_FACTOR", 3, 0, math.MaxInt32), "Specifies factor of exponential timeout between application self heal attempts")
command.Flags().IntVar(&selfHealBackoffCapSeconds, "self-heal-backoff-cap-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_CAP_SECONDS", 300, 0, math.MaxInt32), "Specifies max timeout of exponential backoff between application self heal attempts")
command.Flags().IntVar(&selfHealBackoffCooldownSeconds, "self-heal-backoff-cooldown-seconds", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_COOLDOWN_SECONDS", 330, 0, math.MaxInt32), "Specifies period of time the app needs to stay synced before the self heal backoff can reset")
command.Flags().IntVar(&syncTimeout, "sync-timeout", env.ParseNumFromEnv("ARGOCD_APPLICATION_CONTROLLER_SYNC_TIMEOUT", 0, 0, math.MaxInt32), "Specifies the timeout after which a sync would be terminated. 0 means no timeout (default 0).")
command.Flags().Int64Var(&kubectlParallelismLimit, "kubectl-parallelism-limit", env.ParseInt64FromEnv("ARGOCD_APPLICATION_CONTROLLER_KUBECTL_PARALLELISM_LIMIT", 20, 0, math.MaxInt64), "Number of allowed concurrent kubectl fork/execs. Any value less than 1 means no limit.")
command.Flags().BoolVar(&repoServerPlaintext, "repo-server-plaintext", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_PLAINTEXT", false), "Disable TLS on connections to repo server")
Expand Down
22 changes: 16 additions & 6 deletions controller/appcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"k8s.io/utils/ptr"

commitclient "github.com/argoproj/argo-cd/v3/commitserver/apiclient"
"github.com/argoproj/argo-cd/v3/common"
Expand Down Expand Up @@ -133,6 +134,7 @@ type ApplicationController struct {
statusRefreshJitter time.Duration
selfHealTimeout time.Duration
selfHealBackOff *wait.Backoff
selfHealBackoffCooldown time.Duration
syncTimeout time.Duration
db db.ArgoDB
settingsMgr *settings_util.SettingsManager
Expand Down Expand Up @@ -168,6 +170,7 @@ func NewApplicationController(
appResyncJitter time.Duration,
selfHealTimeout time.Duration,
selfHealBackoff *wait.Backoff,
selfHealBackoffCooldown time.Duration,
syncTimeout time.Duration,
repoErrorGracePeriod time.Duration,
metricsPort int,
Expand Down Expand Up @@ -214,6 +217,7 @@ func NewApplicationController(
settingsMgr: settingsMgr,
selfHealTimeout: selfHealTimeout,
selfHealBackOff: selfHealBackoff,
selfHealBackoffCooldown: selfHealBackoffCooldown,
syncTimeout: syncTimeout,
clusterSharding: clusterSharding,
projByNameCache: sync.Map{},
Expand Down Expand Up @@ -2245,17 +2249,22 @@ func (ctrl *ApplicationController) shouldSelfHeal(app *appv1.Application, alread
return true, time.Duration(0)
}

// Reset counter if the prior sync was successful OR if the revision has changed
if !alreadyAttempted || app.Status.Sync.Status == appv1.SyncStatusCodeSynced {
var timeSinceOperation *time.Duration
if app.Status.OperationState.FinishedAt != nil {
timeSinceOperation = ptr.To(time.Since(app.Status.OperationState.FinishedAt.Time))
}

// Reset counter if the prior sync was successful and the cooldown period is over OR if the revision has changed
if !alreadyAttempted || (timeSinceOperation != nil && *timeSinceOperation >= ctrl.selfHealBackoffCooldown && app.Status.Sync.Status == appv1.SyncStatusCodeSynced) {
app.Status.OperationState.Operation.Sync.SelfHealAttemptsCount = 0
}

var retryAfter time.Duration
if ctrl.selfHealBackOff == nil {
if app.Status.OperationState.FinishedAt == nil {
if timeSinceOperation == nil {
retryAfter = ctrl.selfHealTimeout
} else {
retryAfter = ctrl.selfHealTimeout - time.Since(app.Status.OperationState.FinishedAt.Time)
retryAfter = ctrl.selfHealTimeout - *timeSinceOperation
}
} else {
backOff := *ctrl.selfHealBackOff
Expand All @@ -2265,10 +2274,11 @@ func (ctrl *ApplicationController) shouldSelfHeal(app *appv1.Application, alread
for i := 0; i < steps; i++ {
delay = backOff.Step()
}
if app.Status.OperationState.FinishedAt == nil {

if timeSinceOperation == nil {
retryAfter = delay
} else {
retryAfter = delay - time.Since(app.Status.OperationState.FinishedAt.Time)
retryAfter = delay - *timeSinceOperation
}
}
return retryAfter <= 0, retryAfter
Expand Down
15 changes: 12 additions & 3 deletions controller/appcontroller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ func newFakeControllerWithResync(data *fakeData, appResyncPeriod time.Duration,
time.Second,
time.Minute,
nil,
time.Minute,
0,
time.Second*10,
common.DefaultPortArgoCDMetrics,
Expand Down Expand Up @@ -2607,10 +2608,18 @@ func TestSelfHealExponentialBackoff(t *testing.T) {
alreadyAttempted: false,
expectedAttempts: 0,
syncStatus: v1alpha1.SyncStatusCodeOutOfSync,
}, {
}, { // backoff will not reset as finished tme isn't >= cooldown
attempts: 6,
finishedAt: nil,
expectedDuration: 0,
finishedAt: ptr.To(metav1.Now()),
expectedDuration: 120 * time.Second,
shouldSelfHeal: false,
alreadyAttempted: true,
expectedAttempts: 6,
syncStatus: v1alpha1.SyncStatusCodeSynced,
}, { // backoff will reset as finished time is >= cooldown
attempts: 40,
finishedAt: &metav1.Time{Time: time.Now().Add(-(1 * time.Minute))},
expectedDuration: -60 * time.Second,
shouldSelfHeal: true,
alreadyAttempted: true,
expectedAttempts: 0,
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ spec:
name: argocd-cmd-params-cm
key: controller.self.heal.backoff.cap.seconds
optional: true
- name: ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_COOLDOWN_SECONDS
valueFrom:
configMapKeyRef:
name: argocd-cmd-params-cm
key: controller.self.heal.backoff.cooldown.seconds
optional: true
- name: ARGOCD_APPLICATION_CONTROLLER_SYNC_TIMEOUT
valueFrom:
configMapKeyRef:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ spec:
name: argocd-cmd-params-cm
key: controller.self.heal.backoff.cap.seconds
optional: true
- name: ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_COOLDOWN_SECONDS
valueFrom:
configMapKeyRef:
name: argocd-cmd-params-cm
key: controller.self.heal.backoff.cooldown.seconds
optional: true
- name: ARGOCD_APPLICATION_CONTROLLER_SYNC_TIMEOUT
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/core-install-with-hydrator.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/core-install.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/ha/install-with-hydrator.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/ha/install.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/ha/namespace-install-with-hydrator.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/ha/namespace-install.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/install-with-hydrator.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/install.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/namespace-install-with-hydrator.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions manifests/namespace-install.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading