From 103fb074fa22c18bf2b63fa4a68c588ef863d0ed Mon Sep 17 00:00:00 2001 From: Diana Tuck Date: Fri, 6 Feb 2026 09:22:56 -0800 Subject: [PATCH] fix(google): do not retry HTTP 409 Conflict errors HTTP 409 (Conflict) errors from Google Cloud DNS indicate permanent state conflicts (e.g., resource already exists) that won't resolve by retrying. Previously, all API errors were wrapped as SoftErrors, causing the controller to retry indefinitely. This change adds detection for 409 errors and returns them as regular errors (not SoftErrors), preventing unnecessary retry loops while maintaining retry behavior for other transient errors. Co-Authored-By: Claude Sonnet 4.5 --- provider/google/google.go | 19 ++++++++++++ provider/google/google_test.go | 55 ++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/provider/google/google.go b/provider/google/google.go index 80514a6cc8..9ea3f88423 100644 --- a/provider/google/google.go +++ b/provider/google/google.go @@ -18,7 +18,9 @@ package google import ( "context" + "errors" "fmt" + "net/http" "sort" "time" @@ -40,6 +42,17 @@ const ( defaultTTL = 300 ) +// is409Error checks if an error is an HTTP 409 Conflict error from the Google API. +// 409 Conflict errors typically indicate a resource already exists or a state +// conflict that won't be resolved by retrying. +func is409Error(err error) bool { + var apiErr *googleapi.Error + if errors.As(err, &apiErr) { + return apiErr.Code == http.StatusConflict + } + return false +} + type managedZonesCreateCallInterface interface { Do(opts ...googleapi.CallOption) (*dns.ManagedZone, error) } @@ -297,6 +310,12 @@ func (p *GoogleProvider) submitChange(ctx context.Context, change *dns.Change) e } if _, err := p.changesClient.Create(p.project, zone, c).Do(); err != nil { + // Don't retry HTTP 409 (Conflict) errors as they indicate a permanent + // state conflict (e.g., resource already exists) that won't resolve by retrying + if is409Error(err) { + log.Infof("Received HTTP 409 Conflict error, not retrying: %v", err) + return fmt.Errorf("failed to create changes: %w", err) + } return provider.NewSoftErrorf("failed to create changes: %w", err) } diff --git a/provider/google/google_test.go b/provider/google/google_test.go index 1ea5696e48..09d6174b81 100644 --- a/provider/google/google_test.go +++ b/provider/google/google_test.go @@ -172,6 +172,18 @@ func (m *mockChangesClient) Create(project string, managedZone string, change *d return &mockChangesCreateCall{project: project, managedZone: managedZone, change: change} } +type mockChangesCreateCall409 struct{} + +func (m *mockChangesCreateCall409) Do(_ ...googleapi.CallOption) (*dns.Change, error) { + return nil, &googleapi.Error{Code: http.StatusConflict} +} + +type mockChangesClient409 struct{} + +func (m *mockChangesClient409) Create(project string, managedZone string, change *dns.Change) changesCreateCallInterface { + return &mockChangesCreateCall409{} +} + func zoneKey(project, zoneName string) string { return project + "/" + zoneName } @@ -632,6 +644,49 @@ func TestSoftErrListRecordsConflict(t *testing.T) { require.Empty(t, records) } +func TestApplyChanges409ErrorNotRetried(t *testing.T) { + // Create a provider with a changes client that returns 409 errors + p := &GoogleProvider{ + project: "zalando-external-dns-test", + dryRun: false, + domainFilter: endpoint.NewDomainFilter([]string{"ext-dns-test-2.gcp.zalan.do."}), + zoneIDFilter: provider.NewZoneIDFilter([]string{""}), + resourceRecordSetsClient: &mockResourceRecordSetsClient{}, + managedZonesClient: &mockManagedZonesClient{}, + changesClient: &mockChangesClient409{}, + ctx: context.Background(), + } + + // Create a test zone + createZone(t, p, &dns.ManagedZone{ + Name: "zone-1-ext-dns-test-2-gcp-zalan-do", + DnsName: "zone-1.ext-dns-test-2.gcp.zalan.do.", + }) + + // Temporarily use the normal changes client to set up the zone properly + p.changesClient = &mockChangesClient{} + // Now switch back to the 409-returning client + p.changesClient = &mockChangesClient409{} + + // Try to apply changes + changes := &plan.Changes{ + Create: []*endpoint.Endpoint{ + endpoint.NewEndpoint("test.zone-1.ext-dns-test-2.gcp.zalan.do", endpoint.RecordTypeA, "1.2.3.4"), + }, + } + + err := p.ApplyChanges(context.Background(), changes) + + // Verify that we got an error + require.Error(t, err) + + // Verify that the error is NOT a SoftError (i.e., it won't be retried) + require.NotErrorIs(t, err, provider.SoftError) + + // Verify that the error contains information about the 409 + require.Contains(t, err.Error(), "failed to create changes") +} + func sortChangesByName(cs *dns.Change) { sort.SliceStable(cs.Additions, func(i, j int) bool { return cs.Additions[i].Name < cs.Additions[j].Name