From 12253b5c271d02dc7af7bab293ef9374d1f235f8 Mon Sep 17 00:00:00 2001 From: Damiano Donati Date: Wed, 18 Jun 2025 14:58:57 +0200 Subject: [PATCH] fix: controllers: guard on empty .status.authoritativeAPI --- pkg/controller/machine/controller.go | 96 +++++++++++----- .../machine/machine_controller_test.go | 39 +++++-- pkg/controller/machineset/controller.go | 108 ++++++++++-------- .../machineset/machineset_controller_test.go | 47 +++++--- pkg/util/conditions/gettersetter.go | 14 +++ 5 files changed, 200 insertions(+), 104 deletions(-) diff --git a/pkg/controller/machine/controller.go b/pkg/controller/machine/controller.go index 0bb0b3d03..51f430262 100644 --- a/pkg/controller/machine/controller.go +++ b/pkg/controller/machine/controller.go @@ -177,43 +177,60 @@ func (r *ReconcileMachine) Reconcile(ctx context.Context, request reconcile.Requ originalConditions := conditions.DeepCopyConditions(m.Status.Conditions) if r.gate.Enabled(featuregate.Feature(openshiftfeatures.FeatureGateMachineAPIMigration)) { - // Check Status.AuthoritativeAPI - // If not MachineAPI. Set the paused condition true and return early. - // - // Once we have a webhook, we want to remove the check that the AuthoritativeAPI - // field is populated. - if m.Status.AuthoritativeAPI != "" && - m.Status.AuthoritativeAPI != machinev1.MachineAuthorityMachineAPI { - conditions.Set(m, conditions.TrueConditionWithReason( - PausedCondition, - PausedConditionReason, - "The AuthoritativeAPI is set to %s", string(m.Status.AuthoritativeAPI), - )) - if patchErr := r.updateStatus(ctx, m, ptr.Deref(m.Status.Phase, ""), nil, originalConditions); patchErr != nil { - klog.Errorf("%v: error patching status: %v", machineName, patchErr) + switch m.Status.AuthoritativeAPI { + case "": + // An empty .status.authoritativeAPI normally means the resource has not yet been reconciled + // by the migration controller, which has the responsibility for propagating .spec.authoritativeAPI to the status. + // Pause the resource and take no further action but return until that field is populated. + desiredCondition := conditions.TrueConditionWithReason( + PausedCondition, PausedConditionReason, + "The AuthoritativeAPI status is not yet set", + ) + + if _, err := r.ensureUpdatedPausedCondition(ctx, m, desiredCondition, + fmt.Sprintf("%v: machine .status.authoritativeAPI is not yet set, ensuring machine is paused", machineName)); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err) } klog.Infof("%v: machine is paused, taking no further action", machineName) + return reconcile.Result{}, nil - } - var pausedFalseReason string - if m.Status.AuthoritativeAPI != "" { - pausedFalseReason = fmt.Sprintf("The AuthoritativeAPI is set to %s", string(m.Status.AuthoritativeAPI)) - } else { - pausedFalseReason = "The AuthoritativeAPI is not set" - } + case machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating: + // In cases when .status.authoritativeAPI is set to machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating + // the resource should be paused and not reconciled further. + desiredCondition := conditions.TrueConditionWithReason( + PausedCondition, PausedConditionReason, + "The AuthoritativeAPI status is set to '%s'", string(m.Status.AuthoritativeAPI), + ) + + if _, err := r.ensureUpdatedPausedCondition(ctx, m, desiredCondition, + fmt.Sprintf("%v: machine .status.authoritativeAPI is set to '%s', ensuring machine is paused", machineName, m.Status.AuthoritativeAPI)); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err) + } - // Set the paused condition to false, continue reconciliation - conditions.Set(m, conditions.FalseCondition( - PausedCondition, - NotPausedConditionReason, - machinev1.ConditionSeverityInfo, - "%s", - pausedFalseReason, - )) - if patchErr := r.updateStatus(ctx, m, ptr.Deref(m.Status.Phase, ""), nil, originalConditions); patchErr != nil { - klog.Errorf("%v: error patching status: %v", machineName, patchErr) + klog.Infof("%v: machine is paused, taking no further action", machineName) + + return reconcile.Result{}, nil + + case machinev1.MachineAuthorityMachineAPI: + // The authority is MachineAPI and the resource should not be paused. + desiredCondition := conditions.FalseCondition( + PausedCondition, NotPausedConditionReason, machinev1.ConditionSeverityInfo, "%s", + fmt.Sprintf("The AuthoritativeAPI status is set to '%s'", string(m.Status.AuthoritativeAPI)), + ) + + if updated, err := r.ensureUpdatedPausedCondition(ctx, m, desiredCondition, + fmt.Sprintf("%v: machine .status.authoritativeAPI is set to '%s', unpausing machine", machineName, m.Status.AuthoritativeAPI)); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err) + } else if updated { + klog.Infof("%v: setting machine paused condition to false", machineName) + } + + // Fallthrough and continue reconcilation. + default: + klog.Errorf("%v: invalid .status.authoritativeAPI '%s'", machineName, m.Status.AuthoritativeAPI) + return reconcile.Result{}, nil // Do not return an error to avoid immediate requeue. } } @@ -433,6 +450,23 @@ func (r *ReconcileMachine) deleteNode(ctx context.Context, name string) error { return r.Client.Delete(ctx, &node) } +// ensureUpdatedPausedCondition updates the paused condition if needed. +func (r *ReconcileMachine) ensureUpdatedPausedCondition(ctx context.Context, m *machinev1.Machine, desiredCondition *machinev1.Condition, logMessage string) (bool, error) { + oldM := m.DeepCopy() + if !conditions.IsEquivalentTo(conditions.Get(m, PausedCondition), desiredCondition) { + klog.Info(logMessage) + conditions.Set(m, desiredCondition) + if err := r.updateStatus(ctx, m, ptr.Deref(m.Status.Phase, ""), nil, oldM.Status.Conditions); err != nil { + klog.Errorf("%v: error updating status: %v", oldM.Name, err) + return false, fmt.Errorf("error updating status for machine %s: %w", oldM.Name, err) + } + + return true, nil + } + + return false, nil +} + func delayIfRequeueAfterError(err error) (reconcile.Result, error) { var requeueAfterError *RequeueAfterError if errors.As(err, &requeueAfterError) { diff --git a/pkg/controller/machine/machine_controller_test.go b/pkg/controller/machine/machine_controller_test.go index cae799958..186df8a09 100644 --- a/pkg/controller/machine/machine_controller_test.go +++ b/pkg/controller/machine/machine_controller_test.go @@ -135,7 +135,7 @@ var _ = Describe("Machine Reconciler", func() { It("Should set the Paused condition appropriately", func() { - By("Creating the Machine") + By("Creating the Machine (empty status.authoritativeAPI)") Expect(k8sClient.Create(ctx, instance)).To(Succeed()) By("Setting the AuthoritativeAPI to ClusterAPI") @@ -143,11 +143,12 @@ var _ = Describe("Machine Reconciler", func() { instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityClusterAPI })).Should(Succeed()) - By("Verifying the paused condition is approproately set to true") + By("Verifying the paused condition is appropriately set to true") Eventually(k.Object(instance), timeout).Should(SatisfyAll( HaveField("Status.Conditions", ContainElement(SatisfyAll( HaveField("Type", Equal(PausedCondition)), HaveField("Status", Equal(corev1.ConditionTrue)), + HaveField("Message", Equal("The AuthoritativeAPI status is set to 'ClusterAPI'")), ))), HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityClusterAPI)), )) @@ -189,36 +190,50 @@ var _ = Describe("Machine Reconciler", func() { }) }() - By("Transitioning the AuthoritativeAPI though 'Migrating' to MachineAPI") + By("Changing status.authoritativeAPI from ClusterAPI to Migrating") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityMigrating })).Should(Succeed()) - By("Updating the AuthoritativeAPI from Migrating to MachineAPI") + By("Verifying the paused condition is appropriately set to true") + Eventually(k.Object(instance), timeout).Should(SatisfyAll( + HaveField("Status.Conditions", ContainElement(SatisfyAll( + HaveField("Type", Equal(PausedCondition)), + HaveField("Status", Equal(corev1.ConditionTrue)), + HaveField("Message", Equal("The AuthoritativeAPI status is set to 'Migrating'")), + ))), + HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityMigrating)), + )) + + By("Changing status.authoritativeAPI from Migrating to MachineAPI") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityMachineAPI })).Should(Succeed()) - By("Verifying the paused condition is approproately set to false") + By("Verifying the paused condition is appropriately set to false") Eventually(k.Object(instance), timeout).Should(SatisfyAll( HaveField("Status.Conditions", ContainElement(SatisfyAll( HaveField("Type", Equal(PausedCondition)), HaveField("Status", Equal(corev1.ConditionFalse)), + HaveField("Message", Equal("The AuthoritativeAPI status is set to 'MachineAPI'")), ))), HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityMachineAPI)), )) - By("Unsetting the AuthoritativeAPI field in the status") + By("Changing status.authoritativeAPI from MachineAPI to empty") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = "" })).Should(Succeed()) - By("Verifying the paused condition is still approproately set to false") - Eventually(k.Object(instance), timeout).Should(HaveField("Status.Conditions", ContainElement(SatisfyAll( - HaveField("Type", Equal(PausedCondition)), - HaveField("Status", Equal(corev1.ConditionFalse)), - HaveField("Message", Equal("The AuthoritativeAPI is not set")), - )))) + By("Verifying the paused condition is still appropriately set to true when empty status.authoritativeAPI") + Eventually(k.Object(instance), timeout).Should(SatisfyAll( + HaveField("Status.Conditions", ContainElement(SatisfyAll( + HaveField("Type", Equal(PausedCondition)), + HaveField("Status", Equal(corev1.ConditionTrue)), + HaveField("Message", Equal("The AuthoritativeAPI status is not yet set")), + ))), + HaveField("Status.AuthoritativeAPI", BeEquivalentTo("")), + )) }) }) }) diff --git a/pkg/controller/machineset/controller.go b/pkg/controller/machineset/controller.go index 4329f85b7..e0922d86f 100644 --- a/pkg/controller/machineset/controller.go +++ b/pkg/controller/machineset/controller.go @@ -174,59 +174,60 @@ func (r *ReconcileMachineSet) Reconcile(ctx context.Context, request reconcile.R } if r.gate.Enabled(featuregate.Feature(openshiftfeatures.FeatureGateMachineAPIMigration)) { - machineSetName := machineSet.GetName() - machineSetCopy := machineSet.DeepCopy() - // Check Status.AuthoritativeAPI. If it's not set to MachineAPI. Set the - // paused condition true and return early. - // - // Once we have a webhook, we want to remove the check that the AuthoritativeAPI - // field is populated. - if machineSet.Status.AuthoritativeAPI != "" && - machineSet.Status.AuthoritativeAPI != machinev1.MachineAuthorityMachineAPI { - conditions.Set(machineSetCopy, conditions.TrueConditionWithReason( - machine.PausedCondition, - machine.PausedConditionReason, - "The AuthoritativeAPI is set to %s", string(machineSet.Status.AuthoritativeAPI), - )) - - _, err := updateMachineSetStatus(r.Client, machineSet, machineSetCopy.Status) - if err != nil && !apierrors.IsNotFound(err) { - klog.Errorf("%v: error updating status: %v", machineSetName, err) - return reconcile.Result{}, fmt.Errorf("error updating status: %w", err) - } else if apierrors.IsNotFound(err) { - return reconcile.Result{}, nil + switch machineSet.Status.AuthoritativeAPI { + case "": + // An empty .status.authoritativeAPI normally means the resource has not yet been reconciled + // by the migration controller, which has the responsibility for propagating .spec.authoritativeAPI to the status. + // Pause the resource and take no further action but return until that field is populated. + desiredCondition := conditions.TrueConditionWithReason( + machine.PausedCondition, machine.PausedConditionReason, + "The AuthoritativeAPI status is not yet set", + ) + + if _, err := r.ensureUpdatedPausedCondition(machineSet, desiredCondition, + fmt.Sprintf("%v: machine set .status.authoritativeAPI is not yet set, ensuring machine set is paused", machineSet.Name)); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err) } + klog.Infof("%v: machine set is paused, taking no further action", machineSet.Name) - klog.Infof("%v: machine set is paused, taking no further action", machineSetName) return reconcile.Result{}, nil - } - var pausedFalseReason string - if machineSet.Status.AuthoritativeAPI != "" { - pausedFalseReason = fmt.Sprintf("The AuthoritativeAPI is set to %s", string(machineSet.Status.AuthoritativeAPI)) - } else { - pausedFalseReason = "The AuthoritativeAPI is not set" - } + case machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating: + // In case .status.authoritativeAPI is set to machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating + // the resource should be paused and not reconciled further. + desiredCondition := conditions.TrueConditionWithReason( + machine.PausedCondition, machine.PausedConditionReason, + "The AuthoritativeAPI status is set to '%s'", string(machineSet.Status.AuthoritativeAPI), + ) + + if _, err := r.ensureUpdatedPausedCondition(machineSet, desiredCondition, + fmt.Sprintf("%v: machine set .status.authoritativeAPI is set to '%s', ensuring machine set is paused", machineSet.Name, machineSet.Status.AuthoritativeAPI)); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err) + } - // Set the paused condition to false, continue reconciliation - conditions.Set(machineSetCopy, conditions.FalseCondition( - machine.PausedCondition, - machine.NotPausedConditionReason, - machinev1.ConditionSeverityInfo, - "%s", - pausedFalseReason, - )) + klog.Infof("%v: machine set is paused, taking no further action", machineSet.Name) - var err error - machineSet, err = updateMachineSetStatus(r.Client, machineSet, machineSetCopy.Status) - if err != nil && !apierrors.IsNotFound(err) { - klog.Errorf("%v: error updating status: %v", machineSetName, err) - return reconcile.Result{}, fmt.Errorf("error updating status: %w", err) - } else if apierrors.IsNotFound(err) { return reconcile.Result{}, nil - } - klog.Infof("%v: setting paused to false and continuing reconcile", machineSetName) + case machinev1.MachineAuthorityMachineAPI: + // The authority is MachineAPI and the resource should not be paused. + desiredCondition := conditions.FalseCondition( + machine.PausedCondition, machine.NotPausedConditionReason, machinev1.ConditionSeverityInfo, "%s", + fmt.Sprintf("The AuthoritativeAPI status is set to '%s'", string(machineSet.Status.AuthoritativeAPI)), + ) + + if updated, err := r.ensureUpdatedPausedCondition(machineSet, desiredCondition, + fmt.Sprintf("%v: machine set .status.authoritativeAPI is set to '%s', unpausing machine set", machineSet.Name, machineSet.Status.AuthoritativeAPI)); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err) + } else if updated { + klog.Infof("%v: setting machine set paused condition to false", machineSet.Name) + } + + // Fallthrough and continue reconciliation. + default: + klog.Errorf("%v: invalid .status.authoritativeAPI '%s'", machineSet.Name, machineSet.Status.AuthoritativeAPI) + return reconcile.Result{}, nil // Do not return an error to avoid immediate requeue. + } } result, err := r.reconcile(ctx, machineSet) @@ -329,6 +330,23 @@ func (r *ReconcileMachineSet) reconcile(ctx context.Context, machineSet *machine return reconcile.Result{}, nil } +// ensureUpdatedPausedCondition updates the paused condition if needed. +func (r *ReconcileMachineSet) ensureUpdatedPausedCondition(ms *machinev1.MachineSet, desiredCondition *machinev1.Condition, logMessage string) (bool, error) { + newMs := ms.DeepCopy() + if !conditions.IsEquivalentTo(conditions.Get(ms, machine.PausedCondition), desiredCondition) { + klog.Info(logMessage) + conditions.Set(newMs, desiredCondition) + if _, err := updateMachineSetStatus(r.Client, ms, newMs.Status); err != nil { + klog.Errorf("%v: error updating status: %v", newMs.Name, err) + return false, fmt.Errorf("error updating status for machine set %s: %w", newMs.Name, err) + } + + return true, nil + } + + return false, nil +} + // syncReplicas essentially scales machine resources up and down. func (r *ReconcileMachineSet) syncReplicas(ms *machinev1.MachineSet, machines []*machinev1.Machine) error { if ms.Spec.Replicas == nil { diff --git a/pkg/controller/machineset/machineset_controller_test.go b/pkg/controller/machineset/machineset_controller_test.go index 99d04e8f0..43444d02f 100644 --- a/pkg/controller/machineset/machineset_controller_test.go +++ b/pkg/controller/machineset/machineset_controller_test.go @@ -110,10 +110,10 @@ var _ = Describe("MachineSet Reconciler", func() { It("Should reconcile a MachineSet", func() { instance := machineSetBuilder.Build() - By("Creating the MachineSet") + By("Creating the MachineSet (empty status.authoritativeAPI)") Expect(k8sClient.Create(ctx, instance)).To(Succeed()) - By("Setting the AuthoritativeAPI to MachineAPI") + By("Setting status.authoritativeAPI to MachineAPI") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityMachineAPI })).Should(Succeed()) @@ -157,19 +157,20 @@ var _ = Describe("MachineSet Reconciler", func() { }) It("Should set the Paused condition appropriately", func() { - By("Creating the MachineSet") + By("Creating the MachineSet (empty status.authoritativeAPI)") Expect(k8sClient.Create(ctx, instance)).To(Succeed()) - By("Setting the AuthoritativeAPI to ClusterAPI") + By("Setting status.authoritativeAPI to ClusterAPI") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityClusterAPI })).Should(Succeed()) - By("Verifying the paused condition is approproately set to true") + By("Verifying the paused condition is appropriately set to true") Eventually(k.Object(instance), timeout).Should(SatisfyAll( HaveField("Status.Conditions", ContainElement(SatisfyAll( HaveField("Type", Equal(machine.PausedCondition)), HaveField("Status", Equal(corev1.ConditionTrue)), + HaveField("Message", Equal("The AuthoritativeAPI status is set to 'ClusterAPI'")), ))), HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityClusterAPI)), )) @@ -200,7 +201,7 @@ var _ = Describe("MachineSet Reconciler", func() { }, // Condition / until function: until we observe the MachineAuthority being MAPI func(_ context.Context, g framework.GomegaAssertions) bool { - By("Checking that the AuthoritativeAPI is not MachineAPI") + By("Checking that status.authoritativeAPI is not MachineAPI") localInstance := instanceCopy.DeepCopy() if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(localInstance), localInstance); err != nil { @@ -211,36 +212,50 @@ var _ = Describe("MachineSet Reconciler", func() { }) }() - By("Transitioning the AuthoritativeAPI though Migrating") + By("Changing status.authoritativeAPI from ClusterAPI to Migrating") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityMigrating })).Should(Succeed()) - By("Updating the AuthoritativeAPI from Migrating to MachineAPI") + By("Verifying the paused condition is appropriately set to true") + Eventually(k.Object(instance), timeout).Should(SatisfyAll( + HaveField("Status.Conditions", ContainElement(SatisfyAll( + HaveField("Type", Equal(machine.PausedCondition)), + HaveField("Status", Equal(corev1.ConditionTrue)), + HaveField("Message", Equal("The AuthoritativeAPI status is set to 'Migrating'")), + ))), + HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityMigrating)), + )) + + By("Changing status.authoritativeAPI from Migrating to MachineAPI") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityMachineAPI })).Should(Succeed()) - By("Verifying the paused condition is approproately set to false") + By("Verifying the paused condition is appropriately set to false") Eventually(k.Object(instance), timeout).Should(SatisfyAll( HaveField("Status.Conditions", ContainElement(SatisfyAll( HaveField("Type", Equal(machine.PausedCondition)), HaveField("Status", Equal(corev1.ConditionFalse)), + HaveField("Message", Equal("The AuthoritativeAPI status is set to 'MachineAPI'")), ))), HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityMachineAPI)), )) - By("Unsetting the AuthoritativeAPI field in the status") + By("Changing status.authoritativeAPI from MachineAPI to empty") Eventually(k.UpdateStatus(instance, func() { instance.Status.AuthoritativeAPI = "" })).Should(Succeed()) - By("Verifying the paused condition is still approproately set to false") - Eventually(k.Object(instance), timeout).Should(HaveField("Status.Conditions", ContainElement(SatisfyAll( - HaveField("Type", Equal(machine.PausedCondition)), - HaveField("Status", Equal(corev1.ConditionFalse)), - HaveField("Message", Equal("The AuthoritativeAPI is not set")), - )))) + By("Verifying the paused condition is still appropriately set to true when empty status.authoritativeAPI") + Eventually(k.Object(instance), timeout).Should(SatisfyAll( + HaveField("Status.Conditions", ContainElement(SatisfyAll( + HaveField("Type", Equal(machine.PausedCondition)), + HaveField("Status", Equal(corev1.ConditionTrue)), + HaveField("Message", Equal("The AuthoritativeAPI status is not yet set")), + ))), + HaveField("Status.AuthoritativeAPI", BeEquivalentTo("")), + )) }) }) }) diff --git a/pkg/util/conditions/gettersetter.go b/pkg/util/conditions/gettersetter.go index d2a3eafb4..9a6fda8c7 100644 --- a/pkg/util/conditions/gettersetter.go +++ b/pkg/util/conditions/gettersetter.go @@ -151,6 +151,20 @@ func IsFalse(from interface{}, t machinev1.ConditionType) bool { return false } +// IsEquivalentTo returns true if condition a is equivalent to condition b, +// by checking for equality of the following fields: Type, Status, Reason, Severity and Message (it excludes LastTransitionTime). +func IsEquivalentTo(a, b *machinev1.Condition) bool { + if a == nil && b == nil { + return true + } else if a == nil { + return false + } else if b == nil { + return false + } + + return hasSameState(a, b) +} + // lexicographicLess returns true if a condition is less than another with regards to the // to order of conditions designed for convenience of the consumer, i.e. kubectl. func lexicographicLess(i, j *machinev1.Condition) bool {