Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions apis/metal3.io/v1alpha1/baremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ const (
// learn about the hardware components available there
StateInspecting ProvisioningState = "inspecting"

// StatePoweringOffBeforeDelete means we are in the process of
// powering off the node before it's deleted.
StatePoweringOffBeforeDelete ProvisioningState = "powering off before delete"

// StateDeleting means we are in the process of cleaning up the host
// ready for deletion
StateDeleting ProvisioningState = "deleting"
Expand Down
25 changes: 25 additions & 0 deletions controllers/metal3.io/baremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,31 @@ func setErrorMessage(host *metal3api.BareMetalHost, errType metal3api.ErrorType,
host.Status.ErrorCount++
}

func (r *BareMetalHostReconciler) actionPowerOffBeforeDeleting(prov provisioner.Provisioner, info *reconcileInfo) actionResult {
info.log.Info("host ready to be powered off")
provResult, err := prov.PowerOff(
metal3api.RebootModeHard,
info.host.Status.ErrorType == metal3api.PowerManagementError)

if err != nil {
return actionError{errors.Wrap(err, "failed to power off before deleting node")}
}

if provResult.ErrorMessage != "" {
return recordActionFailure(info, metal3api.PowerManagementError, provResult.ErrorMessage)
}

if provResult.Dirty {
result := actionContinue{provResult.RequeueAfter}
if clearError(info.host) {
return actionUpdate{result}
}
return result
}

return actionComplete{}
}

// Manage deletion of the host
func (r *BareMetalHostReconciler) actionDeleting(prov provisioner.Provisioner, info *reconcileInfo) actionResult {
info.log.Info(
Expand Down
66 changes: 51 additions & 15 deletions controllers/metal3.io/host_state_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,20 @@ type stateHandler func(*reconcileInfo) actionResult

func (hsm *hostStateMachine) handlers() map[metal3api.ProvisioningState]stateHandler {
return map[metal3api.ProvisioningState]stateHandler{
metal3api.StateNone: hsm.handleNone,
metal3api.StateUnmanaged: hsm.handleUnmanaged,
metal3api.StateRegistering: hsm.handleRegistering,
metal3api.StateInspecting: hsm.handleInspecting,
metal3api.StateExternallyProvisioned: hsm.handleExternallyProvisioned,
metal3api.StateMatchProfile: hsm.handleMatchProfile, // Backward compatibility, remove eventually
metal3api.StatePreparing: hsm.handlePreparing,
metal3api.StateAvailable: hsm.handleAvailable,
metal3api.StateReady: hsm.handleAvailable,
metal3api.StateProvisioning: hsm.handleProvisioning,
metal3api.StateProvisioned: hsm.handleProvisioned,
metal3api.StateDeprovisioning: hsm.handleDeprovisioning,
metal3api.StateDeleting: hsm.handleDeleting,
metal3api.StateNone: hsm.handleNone,
metal3api.StateUnmanaged: hsm.handleUnmanaged,
metal3api.StateRegistering: hsm.handleRegistering,
metal3api.StateInspecting: hsm.handleInspecting,
metal3api.StateExternallyProvisioned: hsm.handleExternallyProvisioned,
metal3api.StateMatchProfile: hsm.handleMatchProfile, // Backward compatibility, remove eventually
metal3api.StatePreparing: hsm.handlePreparing,
metal3api.StateAvailable: hsm.handleAvailable,
metal3api.StateReady: hsm.handleAvailable,
metal3api.StateProvisioning: hsm.handleProvisioning,
metal3api.StateProvisioned: hsm.handleProvisioned,
metal3api.StateDeprovisioning: hsm.handleDeprovisioning,
metal3api.StatePoweringOffBeforeDelete: hsm.handlePoweringOffBeforeDelete,
metal3api.StateDeleting: hsm.handleDeleting,
}
}

Expand Down Expand Up @@ -223,14 +224,15 @@ func (hsm *hostStateMachine) checkInitiateDelete(log logr.Logger) bool {

switch hsm.NextState {
default:
hsm.NextState = metal3api.StateDeleting
hsm.NextState = metal3api.StatePoweringOffBeforeDelete
case metal3api.StateProvisioning, metal3api.StateProvisioned:
if hsm.Host.OperationalStatus() == metal3api.OperationalStatusDetached {
if delayDeleteForDetachedHost(hsm.Host) {
log.Info("Delaying detached host deletion")
deleteDelayedForDetached.Inc()
return false
}
// We cannot power off a detached host. Skip to delete.
hsm.NextState = metal3api.StateDeleting
} else {
hsm.NextState = metal3api.StateDeprovisioning
Expand All @@ -241,6 +243,9 @@ func (hsm *hostStateMachine) checkInitiateDelete(log logr.Logger) bool {
case metal3api.StateDeleting:
// Already in deleting state. Allow state machine to run.
return false
case metal3api.StatePoweringOffBeforeDelete:
// Already in powering off state. Allow state machine to run.
return false
}
return true
}
Expand Down Expand Up @@ -322,7 +327,7 @@ func (hsm *hostStateMachine) ensureRegistered(info *reconcileInfo) (result actio
case metal3api.StateMatchProfile:
// Backward compatibility, remove eventually
return
case metal3api.StateDeleting:
case metal3api.StateDeleting, metal3api.StatePoweringOffBeforeDelete:
// In the deleting state the whole idea is to de-register the host
return
case metal3api.StateRegistering:
Expand Down Expand Up @@ -561,6 +566,37 @@ func (hsm *hostStateMachine) handleDeprovisioning(info *reconcileInfo) actionRes
return actResult
}

func (hsm *hostStateMachine) handlePoweringOffBeforeDelete(info *reconcileInfo) actionResult {
actResult := hsm.Reconciler.actionPowerOffBeforeDeleting(hsm.Provisioner, info)
skipToDelete := func() actionResult {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: most of this function is repeating handleDeprovisioning. It would be great to refactor them.

hsm.NextState = metal3api.StateDeleting
info.postSaveCallbacks = append(info.postSaveCallbacks, deleteWithoutPowerOff.Inc)
return actionComplete{}
}

switch r := actResult.(type) {
case actionComplete:
hsm.NextState = metal3api.StateDeleting
hsm.Host.Status.ErrorCount = 0
hsm.Host.Status.PoweredOn = false
case actionFailed:
// If the provisioner gives up deprovisioning and
// deletion has been requested, continue to delete.
if hsm.Host.Status.ErrorCount > 3 {
info.log.Info("Giving up on host power off after 3 attempts.")
return skipToDelete()
}
case actionError:
if r.NeedsRegistration() && !hsm.haveCreds {
// If the host is not registered as a node in Ironic and we
// lack the credentials to power it off, just continue to
// delete.
return skipToDelete()
}
}
return actResult
}

func (hsm *hostStateMachine) handleDeleting(info *reconcileInfo) actionResult {
return hsm.Reconciler.actionDeleting(hsm.Provisioner, info)
}
5 changes: 5 additions & 0 deletions controllers/metal3.io/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ var deleteWithoutDeprov = prometheus.NewCounter(prometheus.CounterOpts{
Help: "Number of times a host is deleted despite deprovisioning failing",
})

var deleteWithoutPowerOff = prometheus.NewCounter(prometheus.CounterOpts{
Name: "metal3_delete_without_powering_off_total",
Help: "Number of times a host is deleted despite powering off failing",
})

var provisionerNotReady = prometheus.NewCounter(prometheus.CounterOpts{
Name: "metal3_provisioner_not_ready_total",
Help: "Number of times a host is not provision ready",
Expand Down