Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions controllers/metal3.io/baremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,6 @@ func clearError(host *metal3v1alpha1.BareMetalHost) (dirty bool) {
host.Status.ErrorMessage = ""
dirty = true
}
if host.Status.ErrorCount != 0 {
host.Status.ErrorCount = 0
dirty = true
}
return dirty
}

Expand Down Expand Up @@ -438,7 +434,7 @@ func (r *BareMetalHostReconciler) actionRegistering(prov provisioner.Provisioner
info.postSaveCallbacks = append(info.postSaveCallbacks, updatedCredentials.Inc)
}

provResult, err := prov.ValidateManagementAccess(credsChanged)
provResult, err := prov.ValidateManagementAccess(credsChanged, info.host.Status.ErrorType == metal3v1alpha1.RegistrationError)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: what about pushing this check directly within the related provisioner method (ValidateManagementAccess in this case)? It will help in keeping such logic within the provisioner code, and at the same time will minimize the impacts on the interface

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The controller (not the provisioner) owns setting the errors and knowing the types, so conceptually I don't feel like this belongs in the provisioner.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current implementation seems pretty fixed for every case, ie Adopt checks always for RegistrationError

if err != nil {
noManagementAccess.Inc()
return actionError{errors.Wrap(err, "failed to validate BMC access")}
Expand Down Expand Up @@ -476,7 +472,7 @@ func (r *BareMetalHostReconciler) actionRegistering(prov provisioner.Provisioner
func (r *BareMetalHostReconciler) actionInspecting(prov provisioner.Provisioner, info *reconcileInfo) actionResult {
info.log.Info("inspecting hardware")

provResult, details, err := prov.InspectHardware()
provResult, details, err := prov.InspectHardware(info.host.Status.ErrorType == metal3v1alpha1.InspectionError)
if err != nil {
return actionError{errors.Wrap(err, "hardware inspection failed")}
}
Expand Down Expand Up @@ -608,7 +604,7 @@ func (r *BareMetalHostReconciler) actionDeprovisioning(prov provisioner.Provisio

info.log.Info("deprovisioning")

provResult, err = prov.Deprovision()
provResult, err = prov.Deprovision(info.host.Status.ErrorType == metal3v1alpha1.ProvisioningError)
if err != nil {
return actionError{errors.Wrap(err, "failed to deprovision")}
}
Expand Down Expand Up @@ -720,6 +716,7 @@ func (r *BareMetalHostReconciler) manageHostPower(prov provisioner.Provisioner,
// state and there were no errors, so reflect the new state in the
// host status field.
info.host.Status.PoweredOn = info.host.Spec.Online
info.host.Status.ErrorCount = 0
return steadyStateResult
}

Expand Down
24 changes: 0 additions & 24 deletions controllers/metal3.io/baremetalhost_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1310,27 +1310,3 @@ func TestErrorCountIncrementsAlways(t *testing.T) {
setErrorMessage(b, metal3v1alpha1.InspectionError, "Another error message")
assert.Equal(t, b.Status.ErrorCount, 2)
}

func TestClearErrorCount(t *testing.T) {

b := &metal3v1alpha1.BareMetalHost{
Status: metal3v1alpha1.BareMetalHostStatus{
ErrorCount: 5,
},
}

assert.True(t, clearError(b))
assert.Equal(t, 0, b.Status.ErrorCount)
}

func TestClearErrorCountOnlyIfNotZero(t *testing.T) {

b := &metal3v1alpha1.BareMetalHost{
Status: metal3v1alpha1.BareMetalHostStatus{
ErrorCount: 5,
},
}

assert.True(t, clearError(b))
assert.False(t, clearError(b))
}
17 changes: 12 additions & 5 deletions controllers/metal3.io/host_state_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,15 @@ func (hsm *hostStateMachine) handleRegistering(info *reconcileInfo) actionResult
} else {
hsm.NextState = metal3v1alpha1.StateInspecting
}
hsm.Host.Status.ErrorCount = 0
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably it could make sense to bind the error count reset to the actionComplete (maybe with an utility method like recordActionComplete), otherwise my feeling is that it could be easily missed next time some new code will be added handling a completed action.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was nice having it be magic, but there's no current place to hang it - actionComplete doesn't work for the steady states.
The fact that we can't move the code into the state machine for the steady states indicates a design problem, and maybe once that is resolved we'll have a convenient place to tie it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree that there's probably something to be reviewed in the steady states, as in the current implementation the ErrorCount is cleared when:

  1. When completing an action and moving from/to:
  • Registering -> Inspecting/Ext.Prov
  • Inspecting -> Match Profile
  • Provisioning -> Provisioned
  • Deprovisioning -> Deleting / Ready
  1. Due the power management within the steady states (Ext.Prov.//Provisioned ) and Ready

But at least for point 1 having an utility function that set the actionComplete and resets the ErrorCount could help in reducing the scattering.

return actionComplete{}
}

func (hsm *hostStateMachine) handleInspecting(info *reconcileInfo) actionResult {
actResult := hsm.Reconciler.actionInspecting(hsm.Provisioner, info)
if _, complete := actResult.(actionComplete); complete {
hsm.NextState = metal3v1alpha1.StateMatchProfile
hsm.Host.Status.ErrorCount = 0
}
return actResult
}
Expand All @@ -255,12 +257,14 @@ func (hsm *hostStateMachine) handleMatchProfile(info *reconcileInfo) actionResul
actResult := hsm.Reconciler.actionMatchProfile(hsm.Provisioner, info)
if _, complete := actResult.(actionComplete); complete {
hsm.NextState = metal3v1alpha1.StateReady
hsm.Host.Status.ErrorCount = 0
}
return actResult
}

func (hsm *hostStateMachine) handleExternallyProvisioned(info *reconcileInfo) actionResult {
if hsm.Host.Spec.ExternallyProvisioned {
// ErrorCount is cleared when appropriate inside actionManageSteadyState
return hsm.Reconciler.actionManageSteadyState(hsm.Provisioner, info)
}

Expand All @@ -281,8 +285,8 @@ func (hsm *hostStateMachine) handleReady(info *reconcileInfo) actionResult {
return actionComplete{}
}

// ErrorCount is cleared when appropriate inside actionManageReady
actResult := hsm.Reconciler.actionManageReady(hsm.Provisioner, info)

if _, complete := actResult.(actionComplete); complete {
hsm.NextState = metal3v1alpha1.StateProvisioning
}
Expand Down Expand Up @@ -317,6 +321,7 @@ func (hsm *hostStateMachine) handleProvisioning(info *reconcileInfo) actionResul
actResult := hsm.Reconciler.actionProvisioning(hsm.Provisioner, info)
if _, complete := actResult.(actionComplete); complete {
hsm.NextState = metal3v1alpha1.StateProvisioned
hsm.Host.Status.ErrorCount = 0
}
return actResult
}
Expand All @@ -327,6 +332,7 @@ func (hsm *hostStateMachine) handleProvisioned(info *reconcileInfo) actionResult
return actionComplete{}
}

// ErrorCount is cleared when appropriate inside actionManageSteadyState
return hsm.Reconciler.actionManageSteadyState(hsm.Provisioner, info)
}

Expand All @@ -336,6 +342,7 @@ func (hsm *hostStateMachine) handleDeprovisioning(info *reconcileInfo) actionRes
if hsm.Host.DeletionTimestamp.IsZero() {
if _, complete := actResult.(actionComplete); complete {
hsm.NextState = metal3v1alpha1.StateReady
hsm.Host.Status.ErrorCount = 0
}
} else {
skipToDelete := func() actionResult {
Expand All @@ -347,13 +354,13 @@ func (hsm *hostStateMachine) handleDeprovisioning(info *reconcileInfo) actionRes
switch r := actResult.(type) {
case actionComplete:
hsm.NextState = metal3v1alpha1.StateDeleting
hsm.Host.Status.ErrorCount = 0
case actionFailed:
// If the provisioner gives up deprovisioning and
// deletion has been requested, continue to delete.
// Note that this is entirely theoretical, as the
// Ironic provisioner currently never gives up
// trying to deprovision.
return skipToDelete()
if hsm.Host.Status.ErrorCount > 3 {
return skipToDelete()
}
case actionError:
if r.NeedsRegistration() && !hsm.haveCreds {
// If the host is not registered as a node in Ironic and we
Expand Down
32 changes: 22 additions & 10 deletions controllers/metal3.io/host_state_machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,9 @@ func TestErrorCountIncreasedWhenRegistrationFails(t *testing.T) {
func TestErrorCountCleared(t *testing.T) {

tests := []struct {
Scenario string
Host *metal3v1alpha1.BareMetalHost
Scenario string
Host *metal3v1alpha1.BareMetalHost
PreserveErrorCountOnComplete bool
}{
{
Scenario: "registering",
Expand All @@ -246,8 +247,9 @@ func TestErrorCountCleared(t *testing.T) {
Host: host(metal3v1alpha1.StateInspecting).build(),
},
{
Scenario: "ready",
Host: host(metal3v1alpha1.StateReady).build(),
Scenario: "ready",
Host: host(metal3v1alpha1.StateReady).build(),
PreserveErrorCountOnComplete: true,
},
{
Scenario: "deprovisioning",
Expand All @@ -258,8 +260,9 @@ func TestErrorCountCleared(t *testing.T) {
Host: host(metal3v1alpha1.StateProvisioning).SetImageURL("imageSpecUrl").build(),
},
{
Scenario: "externallyProvisioned",
Host: host(metal3v1alpha1.StateExternallyProvisioned).SetExternallyProvisioned().build(),
Scenario: "externallyProvisioned",
Host: host(metal3v1alpha1.StateExternallyProvisioned).SetExternallyProvisioned().build(),
PreserveErrorCountOnComplete: true,
},
}
for _, tt := range tests {
Expand All @@ -272,8 +275,16 @@ func TestErrorCountCleared(t *testing.T) {
prov.setNextResult(true)
result := hsm.ReconcileState(info)

assert.Equal(t, tt.Host.Status.ErrorCount, 0)
assert.Equal(t, 1, tt.Host.Status.ErrorCount)
assert.True(t, result.Dirty())

prov.setNextResult(false)
hsm.ReconcileState(info)
if tt.PreserveErrorCountOnComplete {
assert.Equal(t, 1, tt.Host.Status.ErrorCount)
} else {
assert.Equal(t, 0, tt.Host.Status.ErrorCount)
}
})
}
}
Expand Down Expand Up @@ -355,11 +366,12 @@ func (m *mockProvisioner) setNextResult(dirty bool) {
}
}

func (m *mockProvisioner) ValidateManagementAccess(credentialsChanged bool) (result provisioner.Result, err error) {
func (m *mockProvisioner) ValidateManagementAccess(credentialsChanged, force bool) (result provisioner.Result, err error) {
return m.nextResult, err
}

func (m *mockProvisioner) InspectHardware() (result provisioner.Result, details *metal3v1alpha1.HardwareDetails, err error) {
func (m *mockProvisioner) InspectHardware(force bool) (result provisioner.Result, details *metal3v1alpha1.HardwareDetails, err error) {
details = &metal3v1alpha1.HardwareDetails{}
return m.nextResult, details, err
}

Expand All @@ -375,7 +387,7 @@ func (m *mockProvisioner) Provision(configData provisioner.HostConfigData) (resu
return m.nextResult, err
}

func (m *mockProvisioner) Deprovision() (result provisioner.Result, err error) {
func (m *mockProvisioner) Deprovision(force bool) (result provisioner.Result, err error) {
return m.nextResult, err
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/provisioner/demo/demo.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func New(host *metal3v1alpha1.BareMetalHost, bmcCreds bmc.Credentials, publisher

// ValidateManagementAccess tests the connection information for the
// host to verify that the location and credentials work.
func (p *demoProvisioner) ValidateManagementAccess(credentialsChanged bool) (result provisioner.Result, err error) {
func (p *demoProvisioner) ValidateManagementAccess(credentialsChanged, force bool) (result provisioner.Result, err error) {
p.log.Info("testing management access")

hostName := p.host.ObjectMeta.Name
Expand Down Expand Up @@ -102,7 +102,7 @@ func (p *demoProvisioner) ValidateManagementAccess(credentialsChanged bool) (res
// details of devices discovered on the hardware. It may be called
// multiple times, and should return true for its dirty flag until the
// inspection is completed.
func (p *demoProvisioner) InspectHardware() (result provisioner.Result, details *metal3v1alpha1.HardwareDetails, err error) {
func (p *demoProvisioner) InspectHardware(force bool) (result provisioner.Result, details *metal3v1alpha1.HardwareDetails, err error) {
p.log.Info("inspecting hardware", "status", p.host.OperationalStatus())

hostName := p.host.ObjectMeta.Name
Expand Down Expand Up @@ -218,7 +218,7 @@ func (p *demoProvisioner) Provision(hostConf provisioner.HostConfigData) (result
// Deprovision removes the host from the image. It may be called
// multiple times, and should return true for its dirty flag until the
// deprovisioning operation is completed.
func (p *demoProvisioner) Deprovision() (result provisioner.Result, err error) {
func (p *demoProvisioner) Deprovision(force bool) (result provisioner.Result, err error) {

hostName := p.host.ObjectMeta.Name
switch hostName {
Expand Down
6 changes: 3 additions & 3 deletions pkg/provisioner/empty/empty.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ func New(host *metal3v1alpha1.BareMetalHost, bmcCreds bmc.Credentials, publisher

// ValidateManagementAccess tests the connection information for the
// host to verify that the location and credentials work.
func (p *emptyProvisioner) ValidateManagementAccess(credentialsChanged bool) (provisioner.Result, error) {
func (p *emptyProvisioner) ValidateManagementAccess(credentialsChanged, force bool) (provisioner.Result, error) {
return provisioner.Result{}, nil
}

// InspectHardware updates the HardwareDetails field of the host with
// details of devices discovered on the hardware. It may be called
// multiple times, and should return true for its dirty flag until the
// inspection is completed.
func (p *emptyProvisioner) InspectHardware() (provisioner.Result, *metal3v1alpha1.HardwareDetails, error) {
func (p *emptyProvisioner) InspectHardware(force bool) (provisioner.Result, *metal3v1alpha1.HardwareDetails, error) {
return provisioner.Result{}, nil, nil
}

Expand Down Expand Up @@ -57,7 +57,7 @@ func (p *emptyProvisioner) Provision(hostConf provisioner.HostConfigData) (provi
// Deprovision removes the host from the image. It may be called
// multiple times, and should return true for its dirty flag until the
// deprovisioning operation is completed.
func (p *emptyProvisioner) Deprovision() (provisioner.Result, error) {
func (p *emptyProvisioner) Deprovision(force bool) (provisioner.Result, error) {
return provisioner.Result{}, nil
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/provisioner/fixture/fixture.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func NewMock(host *metal3v1alpha1.BareMetalHost, bmcCreds bmc.Credentials, publi

// ValidateManagementAccess tests the connection information for the
// host to verify that the location and credentials work.
func (p *fixtureProvisioner) ValidateManagementAccess(credentialsChanged bool) (result provisioner.Result, err error) {
func (p *fixtureProvisioner) ValidateManagementAccess(credentialsChanged, force bool) (result provisioner.Result, err error) {
p.log.Info("testing management access")

// Fill in the ID of the host in the provisioning system
Expand All @@ -98,7 +98,7 @@ func (p *fixtureProvisioner) ValidateManagementAccess(credentialsChanged bool) (
// details of devices discovered on the hardware. It may be called
// multiple times, and should return true for its dirty flag until the
// inspection is completed.
func (p *fixtureProvisioner) InspectHardware() (result provisioner.Result, details *metal3v1alpha1.HardwareDetails, err error) {
func (p *fixtureProvisioner) InspectHardware(force bool) (result provisioner.Result, details *metal3v1alpha1.HardwareDetails, err error) {
p.log.Info("inspecting hardware", "status", p.host.OperationalStatus())

// The inspection is ongoing. We'll need to check the fixture
Expand Down Expand Up @@ -201,7 +201,7 @@ func (p *fixtureProvisioner) Provision(hostConf provisioner.HostConfigData) (res
// Deprovision removes the host from the image. It may be called
// multiple times, and should return true for its dirty flag until the
// deprovisioning operation is completed.
func (p *fixtureProvisioner) Deprovision() (result provisioner.Result, err error) {
func (p *fixtureProvisioner) Deprovision(force bool) (result provisioner.Result, err error) {
p.log.Info("ensuring host is deprovisioned")

result.RequeueAfter = deprovisionRequeueDelay
Expand Down
2 changes: 1 addition & 1 deletion pkg/provisioner/ironic/inspecthardware_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func TestInspectHardware(t *testing.T) {
}

prov.status.ID = nodeUUID
result, details, err := prov.InspectHardware()
result, details, err := prov.InspectHardware(false)

assert.Equal(t, tc.expectedDirty, result.Dirty)
assert.Equal(t, time.Second*time.Duration(tc.expectedRequestAfter), result.RequeueAfter)
Expand Down
Loading