From aca04ff42ced8e0e9af2e5faeb174c8e69f7d970 Mon Sep 17 00:00:00 2001 From: Alan Clucas Date: Wed, 25 Feb 2026 16:55:32 +0000 Subject: [PATCH] fix: remove 1s sleep and INFORMER_WRITE_BACK from persistUpdates The INFORMER_WRITE_BACK env var controlled whether to write back to the informer cache or sleep for 1 second after persisting workflow updates. Alternative mechanisms now prevent reprocessing of stale workflow state, making both the sleep and the write-back unnecessary. https://claude.ai/code/session_0141wLcUV65wmraQEVWnLkv6 Signed-off-by: Alan Clucas --- docs/environment-variables.md | 1 - docs/upgrading.md | 9 +++++++++ workflow/controller/operator.go | 25 ------------------------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 7c7744f2ed65..428df6f061d0 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -32,7 +32,6 @@ This document outlines environment variables that can be used to customize behav | `EXPRESSION_TEMPLATES` | `bool` | `true` | Escape hatch to disable expression templates. | | `EVENT_AGGREGATION_WITH_ANNOTATIONS` | `bool` | `false` | Whether event annotations will be used when aggregating events. | | `GZIP_IMPLEMENTATION` | `string` | `PGZip` | The implementation of compression/decompression. Currently only "`PGZip`" and "`GZip`" are supported. | -| `INFORMER_WRITE_BACK` | `bool` | `false` | Whether to write back to informer instead of catching up. | | `HEALTHZ_AGE` | `time.Duration` | `5m` | How old a un-reconciled workflow is to report unhealthy. | | `INDEX_WORKFLOW_SEMAPHORE_KEYS` | `bool` | `true` | Whether or not to index semaphores. | | `LEADER_ELECTION_IDENTITY` | `string` | Controller's `metadata.name` | The ID used for workflow controllers to elect a leader. | diff --git a/docs/upgrading.md b/docs/upgrading.md index 69a50806c3ed..7b9775db0200 100644 --- a/docs/upgrading.md +++ b/docs/upgrading.md @@ -5,6 +5,15 @@ For the upgrading guide to a specific version of workflows change the documentat Breaking changes typically (sometimes we don't realise they are breaking) have "!" in the commit message, as per the [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/#summary). +## Upgrading to v4.1 + +### INFORMER_WRITE_BACK environment variable removed + +The `INFORMER_WRITE_BACK` environment variable has been removed. +This variable controlled whether to write workflow updates back to the informer cache (`true`) or sleep for 1 second (`false`, the default) after persisting updates. +Alternative mechanisms now prevent reprocessing, making both behaviors unnecessary. +If you have this variable set, it can be safely removed from your configuration. + ## Upgrading to v4.0 ### Deprecations diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index 5f8c74f5f941..a1b7c2d94dbd 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -816,19 +816,6 @@ func (woc *wfOperationCtx) persistUpdates(ctx context.Context) { woc.log.WithFields(logging.Fields{"resourceVersion": woc.wf.ResourceVersion, "phase": woc.wf.Status.Phase}).Info(ctx, "Workflow update successful") - switch os.Getenv("INFORMER_WRITE_BACK") { - // this does not reduce errors, but does reduce - // conflicts and therefore we log fewer warning messages. - case "true": - if err := woc.writeBackToInformer(); err != nil { - _ = woc.markWorkflowError(ctx, err) - return - } - // no longer write back to informer cache as default (as per v4.0) - case "", "false": - time.Sleep(1 * time.Second) - } - // Make sure the workflow completed. if woc.wf.Status.Fulfilled() { woc.controller.metrics.CompleteRealtimeMetricsForWfUID(string(woc.wf.GetUID())) @@ -868,18 +855,6 @@ func (woc *wfOperationCtx) deleteTaskResults(ctx context.Context) error { ) } -func (woc *wfOperationCtx) writeBackToInformer() error { - un, err := wfutil.ToUnstructured(woc.wf) - if err != nil { - return fmt.Errorf("failed to convert workflow to unstructured: %w", err) - } - err = woc.controller.wfInformer.GetStore().Update(un) - if err != nil { - return fmt.Errorf("failed to update informer store: %w", err) - } - return nil -} - // persistWorkflowSizeLimitErr will fail a the workflow with an error when we hit the resource size limit // See https://github.com/argoproj/argo-workflows/issues/913 func (woc *wfOperationCtx) persistWorkflowSizeLimitErr(ctx context.Context, wfClient v1alpha1.WorkflowInterface, err error) {