-
Notifications
You must be signed in to change notification settings - Fork 310
Do retries with backoff in ValidateManagementAccess, Inspect, and Deprovision #749
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
562b6ac
84ca573
2d02462
7791d21
d7d38ee
7c3daef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -240,13 +240,15 @@ func (hsm *hostStateMachine) handleRegistering(info *reconcileInfo) actionResult | |
| } else { | ||
| hsm.NextState = metal3v1alpha1.StateInspecting | ||
| } | ||
| hsm.Host.Status.ErrorCount = 0 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably it could make sense to bind the error count reset to the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was nice having it be magic, but there's no current place to hang it - actionComplete doesn't work for the steady states.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree that there's probably something to be reviewed in the steady states, as in the current implementation the ErrorCount is cleared when:
But at least for point 1 having an utility function that set the actionComplete and resets the ErrorCount could help in reducing the scattering. |
||
| return actionComplete{} | ||
| } | ||
|
|
||
| func (hsm *hostStateMachine) handleInspecting(info *reconcileInfo) actionResult { | ||
| actResult := hsm.Reconciler.actionInspecting(hsm.Provisioner, info) | ||
| if _, complete := actResult.(actionComplete); complete { | ||
| hsm.NextState = metal3v1alpha1.StateMatchProfile | ||
| hsm.Host.Status.ErrorCount = 0 | ||
| } | ||
| return actResult | ||
| } | ||
|
|
@@ -255,12 +257,14 @@ func (hsm *hostStateMachine) handleMatchProfile(info *reconcileInfo) actionResul | |
| actResult := hsm.Reconciler.actionMatchProfile(hsm.Provisioner, info) | ||
| if _, complete := actResult.(actionComplete); complete { | ||
| hsm.NextState = metal3v1alpha1.StateReady | ||
| hsm.Host.Status.ErrorCount = 0 | ||
| } | ||
| return actResult | ||
| } | ||
|
|
||
| func (hsm *hostStateMachine) handleExternallyProvisioned(info *reconcileInfo) actionResult { | ||
| if hsm.Host.Spec.ExternallyProvisioned { | ||
| // ErrorCount is cleared when appropriate inside actionManageSteadyState | ||
| return hsm.Reconciler.actionManageSteadyState(hsm.Provisioner, info) | ||
| } | ||
|
|
||
|
|
@@ -281,8 +285,8 @@ func (hsm *hostStateMachine) handleReady(info *reconcileInfo) actionResult { | |
| return actionComplete{} | ||
| } | ||
|
|
||
| // ErrorCount is cleared when appropriate inside actionManageReady | ||
| actResult := hsm.Reconciler.actionManageReady(hsm.Provisioner, info) | ||
|
|
||
| if _, complete := actResult.(actionComplete); complete { | ||
| hsm.NextState = metal3v1alpha1.StateProvisioning | ||
| } | ||
|
|
@@ -317,6 +321,7 @@ func (hsm *hostStateMachine) handleProvisioning(info *reconcileInfo) actionResul | |
| actResult := hsm.Reconciler.actionProvisioning(hsm.Provisioner, info) | ||
| if _, complete := actResult.(actionComplete); complete { | ||
| hsm.NextState = metal3v1alpha1.StateProvisioned | ||
| hsm.Host.Status.ErrorCount = 0 | ||
| } | ||
| return actResult | ||
| } | ||
|
|
@@ -327,6 +332,7 @@ func (hsm *hostStateMachine) handleProvisioned(info *reconcileInfo) actionResult | |
| return actionComplete{} | ||
| } | ||
|
|
||
| // ErrorCount is cleared when appropriate inside actionManageSteadyState | ||
| return hsm.Reconciler.actionManageSteadyState(hsm.Provisioner, info) | ||
| } | ||
|
|
||
|
|
@@ -336,6 +342,7 @@ func (hsm *hostStateMachine) handleDeprovisioning(info *reconcileInfo) actionRes | |
| if hsm.Host.DeletionTimestamp.IsZero() { | ||
| if _, complete := actResult.(actionComplete); complete { | ||
| hsm.NextState = metal3v1alpha1.StateReady | ||
| hsm.Host.Status.ErrorCount = 0 | ||
| } | ||
| } else { | ||
| skipToDelete := func() actionResult { | ||
|
|
@@ -347,13 +354,13 @@ func (hsm *hostStateMachine) handleDeprovisioning(info *reconcileInfo) actionRes | |
| switch r := actResult.(type) { | ||
| case actionComplete: | ||
| hsm.NextState = metal3v1alpha1.StateDeleting | ||
| hsm.Host.Status.ErrorCount = 0 | ||
| case actionFailed: | ||
| // If the provisioner gives up deprovisioning and | ||
| // deletion has been requested, continue to delete. | ||
| // Note that this is entirely theoretical, as the | ||
| // Ironic provisioner currently never gives up | ||
| // trying to deprovision. | ||
| return skipToDelete() | ||
| if hsm.Host.Status.ErrorCount > 3 { | ||
| return skipToDelete() | ||
| } | ||
| case actionError: | ||
| if r.NeedsRegistration() && !hsm.haveCreds { | ||
| // If the host is not registered as a node in Ironic and we | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor: what about pushing this check directly within the related provisioner method (ValidateManagementAccess in this case)? It will help in keeping such logic within the provisioner code, and at the same time will minimize the impacts on the interface
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The controller (not the provisioner) owns setting the errors and knowing the types, so conceptually I don't feel like this belongs in the provisioner.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current implementation seems pretty fixed for every case, ie
Adoptchecks always forRegistrationError