diff --git a/controllers/metal3.io/baremetalhost_controller.go b/controllers/metal3.io/baremetalhost_controller.go index 5d25bd9e80..30c06ab5d9 100644 --- a/controllers/metal3.io/baremetalhost_controller.go +++ b/controllers/metal3.io/baremetalhost_controller.go @@ -1005,9 +1005,12 @@ func (r *BareMetalHostReconciler) manageHostPower(prov provisioner.Provisioner, "reboot process", desiredPowerOnState != info.host.Spec.Online) if desiredPowerOnState { - provResult, err = prov.PowerOn() + provResult, err = prov.PowerOn(info.host.Status.ErrorType == metal3v1alpha1.PowerManagementError) } else { - provResult, err = prov.PowerOff(desiredRebootMode) + if info.host.Status.ErrorCount > 0 { + desiredRebootMode = metal3v1alpha1.RebootModeHard + } + provResult, err = prov.PowerOff(desiredRebootMode, info.host.Status.ErrorType == metal3v1alpha1.PowerManagementError) } if err != nil { return actionError{errors.Wrap(err, "failed to manage power state of host")} diff --git a/controllers/metal3.io/host_state_machine_test.go b/controllers/metal3.io/host_state_machine_test.go index acab71fbf4..4982fbd7aa 100644 --- a/controllers/metal3.io/host_state_machine_test.go +++ b/controllers/metal3.io/host_state_machine_test.go @@ -1172,11 +1172,11 @@ func (m *mockProvisioner) Detach() (result provisioner.Result, err error) { return res, err } -func (m *mockProvisioner) PowerOn() (result provisioner.Result, err error) { +func (m *mockProvisioner) PowerOn(force bool) (result provisioner.Result, err error) { return m.getNextResultByMethod("PowerOn"), err } -func (m *mockProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode) (result provisioner.Result, err error) { +func (m *mockProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode, force bool) (result provisioner.Result, err error) { return m.getNextResultByMethod("PowerOff"), err } diff --git a/pkg/provisioner/demo/demo.go b/pkg/provisioner/demo/demo.go index 00741b33c4..be79a6f464 100644 --- a/pkg/provisioner/demo/demo.go +++ b/pkg/provisioner/demo/demo.go @@ -305,7 +305,7 @@ func (p *demoProvisioner) Detach() (result provisioner.Result, err error) { // PowerOn ensures the server is powered on independently of any image // provisioning operation. -func (p *demoProvisioner) PowerOn() (result provisioner.Result, err error) { +func (p *demoProvisioner) PowerOn(force bool) (result provisioner.Result, err error) { hostName := p.objectMeta.Name switch hostName { @@ -326,7 +326,7 @@ func (p *demoProvisioner) PowerOn() (result provisioner.Result, err error) { // PowerOff ensures the server is powered off independently of any image // provisioning operation. -func (p *demoProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode) (result provisioner.Result, err error) { +func (p *demoProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode, force bool) (result provisioner.Result, err error) { hostName := p.objectMeta.Name switch hostName { diff --git a/pkg/provisioner/fixture/fixture.go b/pkg/provisioner/fixture/fixture.go index ab0b2e21e5..b3eee1cf4b 100644 --- a/pkg/provisioner/fixture/fixture.go +++ b/pkg/provisioner/fixture/fixture.go @@ -291,7 +291,7 @@ func (p *fixtureProvisioner) Detach() (result provisioner.Result, err error) { // PowerOn ensures the server is powered on independently of any image // provisioning operation. -func (p *fixtureProvisioner) PowerOn() (result provisioner.Result, err error) { +func (p *fixtureProvisioner) PowerOn(force bool) (result provisioner.Result, err error) { p.log.Info("ensuring host is powered on") if !p.state.poweredOn { @@ -307,7 +307,7 @@ func (p *fixtureProvisioner) PowerOn() (result provisioner.Result, err error) { // PowerOff ensures the server is powered off independently of any image // provisioning operation. -func (p *fixtureProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode) (result provisioner.Result, err error) { +func (p *fixtureProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode, force bool) (result provisioner.Result, err error) { p.log.Info("ensuring host is powered off") if p.state.poweredOn { diff --git a/pkg/provisioner/ironic/errors.go b/pkg/provisioner/ironic/errors.go index c3384bcdc4..8a69c0cb9b 100644 --- a/pkg/provisioner/ironic/errors.go +++ b/pkg/provisioner/ironic/errors.go @@ -9,15 +9,6 @@ func (e SoftPowerOffUnsupportedError) Error() string { return "soft power off is unsupported on BMC" } -// SoftPowerOffFailed is returned when the soft power off command -// finishes with failure. -type SoftPowerOffFailed struct { -} - -func (e SoftPowerOffFailed) Error() string { - return "Soft power off has failed on BMC" -} - // HostLockedError is returned when the BMC host is // locked. type HostLockedError struct { diff --git a/pkg/provisioner/ironic/ironic.go b/pkg/provisioner/ironic/ironic.go index 7d4a35809e..eb42929c5b 100644 --- a/pkg/provisioner/ironic/ironic.go +++ b/pkg/provisioner/ironic/ironic.go @@ -1519,7 +1519,7 @@ func (p *ironicProvisioner) changePower(ironicNode *nodes.Node, target nodes.Tar // PowerOn ensures the server is powered on independently of any image // provisioning operation. -func (p *ironicProvisioner) PowerOn() (result provisioner.Result, err error) { +func (p *ironicProvisioner) PowerOn(force bool) (result provisioner.Result, err error) { p.log.Info("ensuring host is powered on") ironicNode, err := p.getNode() @@ -1535,46 +1535,39 @@ func (p *ironicProvisioner) PowerOn() (result provisioner.Result, err error) { p.log.Info("waiting for power status to change") return operationContinuing(powerRequeueDelay) } - result, err = p.changePower(ironicNode, nodes.PowerOn) + if ironicNode.LastError != "" && !force { + p.log.Info("PowerOn operation failed", "msg", ironicNode.LastError) + return operationFailed(fmt.Sprintf("PowerOn operation failed: %s", + ironicNode.LastError)) + } + if result, err = p.changePower(ironicNode, nodes.PowerOn); err == nil { + p.publisher("PowerOn", "Host powered on") + return result, nil + } switch err.(type) { - case nil: case HostLockedError: + return retryAfterDelay(powerRequeueDelay) default: - return transientError(errors.Wrap(err, "failed to power on host")) + return transientError(errors.Wrap(err, "failed to PowerOn node")) } - p.publisher("PowerOn", "Host powered on") } - return result, nil } // PowerOff ensures the server is powered off independently of any image // provisioning operation. -func (p *ironicProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode) (result provisioner.Result, err error) { +func (p *ironicProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode, force bool) (result provisioner.Result, err error) { p.log.Info(fmt.Sprintf("ensuring host is powered off (mode: %s)", rebootMode)) - if rebootMode == metal3v1alpha1.RebootModeHard { - result, err = p.hardPowerOff() - } else { - result, err = p.softPowerOff() + if rebootMode == metal3v1alpha1.RebootModeSoft { + return p.softPowerOff() } - if err != nil { - switch err.(type) { - // In case of soft power off is unsupported or has failed, - // we activate hard power off. - case SoftPowerOffUnsupportedError, SoftPowerOffFailed: - return p.hardPowerOff() - case HostLockedError: - return retryAfterDelay(powerRequeueDelay) - default: - return transientError(err) - } - } - return result, nil + // Reboot mode is hard or force flag is set + return p.hardPowerOff(force) } // hardPowerOff sends 'power off' request to BM node and waits for the result -func (p *ironicProvisioner) hardPowerOff() (result provisioner.Result, err error) { +func (p *ironicProvisioner) hardPowerOff(force bool) (result provisioner.Result, err error) { p.log.Info("ensuring host is powered off by \"hard power off\" command") ironicNode, err := p.getNode() @@ -1583,13 +1576,22 @@ func (p *ironicProvisioner) hardPowerOff() (result provisioner.Result, err error } if ironicNode.PowerState != powerOff { + if ironicNode.LastError != "" && !force { + p.log.Info("hard power off error", "msg", ironicNode.LastError) + return operationFailed(ironicNode.LastError) + } if ironicNode.TargetPowerState == powerOff { p.log.Info("waiting for power status to change") return operationContinuing(powerRequeueDelay) } result, err = p.changePower(ironicNode, nodes.PowerOff) if err != nil { - return transientError(errors.Wrap(err, "failed to power off host")) + switch err.(type) { + case HostLockedError: + return retryAfterDelay(powerRequeueDelay) + default: + return transientError(errors.Wrap(err, "failed to power off host")) + } } p.publisher("PowerOff", "Host powered off") return result, err @@ -1621,11 +1623,17 @@ func (p *ironicProvisioner) softPowerOff() (result provisioner.Result, err error // If the target state is unset while the last error is set, // then the last execution of soft power off has failed. if targetState == "" && ironicNode.LastError != "" { - return result, SoftPowerOffFailed{} + p.log.Info("soft power off error", "msg", ironicNode.LastError) + return operationFailed(ironicNode.LastError) } result, err = p.changePower(ironicNode, nodes.SoftPowerOff) if err != nil { - return transientError(err) + switch err.(type) { + case HostLockedError: + return retryAfterDelay(powerRequeueDelay) + default: + return transientError(errors.Wrap(err, "failed to power off host")) + } } p.publisher("PowerOff", "Host soft powered off") } diff --git a/pkg/provisioner/ironic/power_test.go b/pkg/provisioner/ironic/power_test.go index 2db3357069..0a899d5f46 100644 --- a/pkg/provisioner/ironic/power_test.go +++ b/pkg/provisioner/ironic/power_test.go @@ -20,11 +20,13 @@ func TestPowerOn(t *testing.T) { nodeUUID := "33ce8659-7400-4c68-9535-d10766f07a58" cases := []struct { name string + force bool ironic *testserver.IronicMock expectedDirty bool expectedError bool expectedRequestAfter int + expectedErrorResult bool }{ { name: "node-already-power-on", @@ -75,6 +77,30 @@ func TestPowerOn(t *testing.T) { expectedRequestAfter: 10, expectedDirty: true, }, + { + name: "power-on with LastError", + ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ + PowerState: powerOff, + TargetPowerState: powerOff, + UUID: nodeUUID, + LastError: "power on failed", + }), + expectedRequestAfter: 0, + expectedDirty: false, + expectedErrorResult: true, + }, + { + name: "power-on with LastError", + force: true, + ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ + PowerState: powerOff, + TargetPowerState: powerOff, + UUID: nodeUUID, + LastError: "power on failed", + }), + expectedError: true, + expectedErrorResult: false, + }, } for _, tc := range cases { @@ -101,7 +127,7 @@ func TestPowerOn(t *testing.T) { t.Fatalf("could not create provisioner: %s", err) } - result, err := prov.PowerOn() + result, err := prov.PowerOn(tc.force) assert.Equal(t, tc.expectedDirty, result.Dirty) assert.Equal(t, time.Second*time.Duration(tc.expectedRequestAfter), result.RequeueAfter) @@ -110,6 +136,9 @@ func TestPowerOn(t *testing.T) { } else { assert.Error(t, err) } + if tc.expectedErrorResult { + assert.Contains(t, result.ErrorMessage, "PowerOn operation failed") + } }) } } @@ -120,10 +149,12 @@ func TestPowerOff(t *testing.T) { cases := []struct { name string ironic *testserver.IronicMock + force bool expectedDirty bool expectedError bool expectedRequestAfter int + expectedErrorResult bool rebootMode metal3v1alpha1.RebootMode }{ { @@ -187,6 +218,44 @@ func TestPowerOff(t *testing.T) { expectedRequestAfter: 10, expectedDirty: true, }, + { + name: "power-off soft with force", + ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ + PowerState: powerOn, + TargetPowerState: powerOn, + TargetProvisionState: "", + UUID: nodeUUID, + }), + rebootMode: metal3v1alpha1.RebootModeSoft, + force: true, + expectedDirty: true, + }, + { + name: "power-off hard with LastError", + ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ + PowerState: powerOn, + TargetPowerState: powerOn, + TargetProvisionState: "", + UUID: nodeUUID, + LastError: "hard power off failed", + }), + rebootMode: metal3v1alpha1.RebootModeHard, + expectedDirty: false, + expectedErrorResult: true, + }, + { + name: "power-off hard with force", + ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ + PowerState: powerOn, + TargetPowerState: powerOn, + TargetProvisionState: "", + UUID: nodeUUID, + LastError: "hard power off failed", + }), + rebootMode: metal3v1alpha1.RebootModeHard, + force: true, + expectedDirty: true, + }, } for _, tc := range cases { @@ -214,7 +283,7 @@ func TestPowerOff(t *testing.T) { } // We pass the RebootMode type here to define the reboot action - result, err := prov.PowerOff(tc.rebootMode) + result, err := prov.PowerOff(tc.rebootMode, tc.force) assert.Equal(t, tc.expectedDirty, result.Dirty) assert.Equal(t, time.Second*time.Duration(tc.expectedRequestAfter), result.RequeueAfter) @@ -223,6 +292,10 @@ func TestPowerOff(t *testing.T) { } else { assert.Error(t, err) } + if tc.expectedErrorResult { + assert.Contains(t, result.ErrorMessage, "hard power off failed") + } + }) } } diff --git a/pkg/provisioner/provisioner.go b/pkg/provisioner/provisioner.go index 39d241b8e1..a579e44c1d 100644 --- a/pkg/provisioner/provisioner.go +++ b/pkg/provisioner/provisioner.go @@ -144,12 +144,12 @@ type Provisioner interface { // PowerOn ensures the server is powered on independently of any image // provisioning operation. - PowerOn() (result Result, err error) + PowerOn(force bool) (result Result, err error) // PowerOff ensures the server is powered off independently of any image // provisioning operation. The boolean argument may be used to specify // if a hard reboot (force power off) is required - true if so. - PowerOff(rebootMode metal3v1alpha1.RebootMode) (result Result, err error) + PowerOff(rebootMode metal3v1alpha1.RebootMode, force bool) (result Result, err error) // IsReady checks if the provisioning backend is available to accept // all the incoming requests.