From 4364f519e0d0bce1eaa58235324b0e8959665bd9 Mon Sep 17 00:00:00 2001 From: Zane Bitter Date: Mon, 20 Sep 2021 11:28:29 -0400 Subject: [PATCH 1/2] ironic: Refactor PowerOff() The code to check whether a previous PowerOff command succeeded is essentially the same between the hard and soft power off modes, so check it once at the start instead of duplicating it. (cherry picked from commit 9ba2fd27887a6197a98d1efc86e992de06c6d8c7) --- pkg/provisioner/ironic/ironic.go | 93 ++++++++++++---------------- pkg/provisioner/ironic/power_test.go | 48 ++++++++------ 2 files changed, 67 insertions(+), 74 deletions(-) diff --git a/pkg/provisioner/ironic/ironic.go b/pkg/provisioner/ironic/ironic.go index 4974b5bdd2..99b8a14681 100644 --- a/pkg/provisioner/ironic/ironic.go +++ b/pkg/provisioner/ironic/ironic.go @@ -1569,85 +1569,70 @@ func (p *ironicProvisioner) PowerOn(force bool) (result provisioner.Result, err func (p *ironicProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode, force bool) (result provisioner.Result, err error) { p.log.Info(fmt.Sprintf("ensuring host is powered off (mode: %s)", rebootMode)) - if rebootMode == metal3v1alpha1.RebootModeSoft { - return p.softPowerOff() - } - // Reboot mode is hard or force flag is set - return p.hardPowerOff(force) -} - -// hardPowerOff sends 'power off' request to BM node and waits for the result -func (p *ironicProvisioner) hardPowerOff(force bool) (result provisioner.Result, err error) { - p.log.Info("ensuring host is powered off by \"hard power off\" command") - ironicNode, err := p.getNode() if err != nil { return transientError(err) } if ironicNode.PowerState != powerOff { - if ironicNode.LastError != "" && !force { - p.log.Info("hard power off error", "msg", ironicNode.LastError) - return operationFailed(ironicNode.LastError) - } - if ironicNode.TargetPowerState == powerOff { + targetState := ironicNode.TargetPowerState + // If the target state is either powerOff or softPowerOff, then we should wait + if targetState == powerOff || targetState == softPowerOff { p.log.Info("waiting for power status to change") return operationContinuing(powerRequeueDelay) } - result, err = p.changePower(ironicNode, nodes.PowerOff) - if err != nil { - switch err.(type) { - case HostLockedError: - return retryAfterDelay(powerRequeueDelay) - default: - return transientError(errors.Wrap(err, "failed to power off host")) - } + // If the target state is unset while the last error is set, + // then the last execution of power off has failed. + if targetState == "" && ironicNode.LastError != "" && !force { + p.log.Info("power off error", "msg", ironicNode.LastError) + return operationFailed(ironicNode.LastError) } - p.publisher("PowerOff", "Host powered off") - return result, err + + if rebootMode == metal3v1alpha1.RebootModeSoft && !force { + return p.softPowerOff(ironicNode) + } + // Reboot mode is hard or force flag is set + return p.hardPowerOff(ironicNode) } return operationComplete() } +// hardPowerOff sends 'power off' request to BM node and waits for the result +func (p *ironicProvisioner) hardPowerOff(ironicNode *nodes.Node) (result provisioner.Result, err error) { + p.log.Info("ensuring host is powered off by \"hard power off\" command") + + result, err = p.changePower(ironicNode, nodes.PowerOff) + if err != nil { + switch err.(type) { + case HostLockedError: + return retryAfterDelay(powerRequeueDelay) + default: + return transientError(errors.Wrap(err, "failed to power off host")) + } + } + p.publisher("PowerOff", "Host powered off") + return result, err +} + // softPowerOff sends 'soft power off' request to BM node. // If soft power off is not supported, the request ends with an error. // Otherwise the request ends with no error and the result should be // checked later via node fields "power_state", "target_power_state" // and "last_error". -func (p *ironicProvisioner) softPowerOff() (result provisioner.Result, err error) { +func (p *ironicProvisioner) softPowerOff(ironicNode *nodes.Node) (result provisioner.Result, err error) { p.log.Info("ensuring host is powered off by \"soft power off\" command") - ironicNode, err := p.getNode() + result, err = p.changePower(ironicNode, nodes.SoftPowerOff) if err != nil { - return transientError(err) - } - - if ironicNode.PowerState != powerOff { - targetState := ironicNode.TargetPowerState - // If the target state is either powerOff or softPowerOff, then we should wait - if targetState == powerOff || targetState == softPowerOff { - p.log.Info("waiting for power status to change") - return operationContinuing(powerRequeueDelay) - } - // If the target state is unset while the last error is set, - // then the last execution of soft power off has failed. - if targetState == "" && ironicNode.LastError != "" { - p.log.Info("soft power off error", "msg", ironicNode.LastError) - return operationFailed(ironicNode.LastError) - } - result, err = p.changePower(ironicNode, nodes.SoftPowerOff) - if err != nil { - switch err.(type) { - case HostLockedError: - return retryAfterDelay(powerRequeueDelay) - default: - return transientError(errors.Wrap(err, "failed to power off host")) - } + switch err.(type) { + case HostLockedError: + return retryAfterDelay(powerRequeueDelay) + default: + return transientError(errors.Wrap(err, "failed to power off host")) } - p.publisher("PowerOff", "Host soft powered off") } - + p.publisher("PowerOff", "Host soft powered off") return result, nil } diff --git a/pkg/provisioner/ironic/power_test.go b/pkg/provisioner/ironic/power_test.go index 0a899d5f46..fc5f52d491 100644 --- a/pkg/provisioner/ironic/power_test.go +++ b/pkg/provisioner/ironic/power_test.go @@ -144,7 +144,8 @@ func TestPowerOn(t *testing.T) { } func TestPowerOff(t *testing.T) { - + hardPowerOffReason := "Host powered off" + softPowerOffReason := "Host soft powered off" nodeUUID := "33ce8659-7400-4c68-9535-d10766f07a58" cases := []struct { name string @@ -155,6 +156,7 @@ func TestPowerOff(t *testing.T) { expectedError bool expectedRequestAfter int expectedErrorResult bool + expectedReason string rebootMode metal3v1alpha1.RebootMode }{ { @@ -178,40 +180,39 @@ func TestPowerOff(t *testing.T) { name: "power-off normal", ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ PowerState: powerOn, - TargetPowerState: powerOn, TargetProvisionState: "", UUID: nodeUUID, }), - expectedDirty: true, - rebootMode: metal3v1alpha1.RebootModeSoft, + rebootMode: metal3v1alpha1.RebootModeSoft, + expectedDirty: true, + expectedReason: softPowerOffReason, }, { name: "power-off hard", ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ PowerState: powerOn, - TargetPowerState: powerOn, TargetProvisionState: "", UUID: nodeUUID, }), - expectedDirty: true, - rebootMode: metal3v1alpha1.RebootModeHard, + expectedDirty: true, + rebootMode: metal3v1alpha1.RebootModeHard, + expectedReason: hardPowerOffReason, }, { name: "power-off wait for Provisioning state", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ PowerState: powerOn, - TargetPowerState: powerOn, TargetProvisionState: string(nodes.TargetDeleted), UUID: nodeUUID, }), expectedRequestAfter: 10, expectedDirty: true, + expectedReason: hardPowerOffReason, }, { name: "power-off wait for locked host", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ PowerState: powerOn, - TargetPowerState: powerOn, TargetProvisionState: "", UUID: nodeUUID, }).WithNodeStatesPower(nodeUUID, http.StatusConflict).WithNodeStatesPowerUpdate(nodeUUID, http.StatusConflict), @@ -222,19 +223,18 @@ func TestPowerOff(t *testing.T) { name: "power-off soft with force", ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ PowerState: powerOn, - TargetPowerState: powerOn, TargetProvisionState: "", UUID: nodeUUID, }), - rebootMode: metal3v1alpha1.RebootModeSoft, - force: true, - expectedDirty: true, + rebootMode: metal3v1alpha1.RebootModeSoft, + force: true, + expectedDirty: true, + expectedReason: hardPowerOffReason, }, { name: "power-off hard with LastError", ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ PowerState: powerOn, - TargetPowerState: powerOn, TargetProvisionState: "", UUID: nodeUUID, LastError: "hard power off failed", @@ -247,14 +247,14 @@ func TestPowerOff(t *testing.T) { name: "power-off hard with force", ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ PowerState: powerOn, - TargetPowerState: powerOn, TargetProvisionState: "", UUID: nodeUUID, LastError: "hard power off failed", }), - rebootMode: metal3v1alpha1.RebootModeHard, - force: true, - expectedDirty: true, + rebootMode: metal3v1alpha1.RebootModeHard, + force: true, + expectedDirty: true, + expectedReason: hardPowerOffReason, }, } @@ -273,7 +273,10 @@ func TestPowerOff(t *testing.T) { host := makeHost() host.Status.Provisioning.ID = nodeUUID - publisher := func(reason, message string) {} + var eventReasons []string + publisher := func(reason, message string) { + eventReasons = append(eventReasons, message) + } auth := clients.AuthConfig{Type: clients.NoAuth} prov, err := newProvisionerWithSettings(host, bmc.Credentials{}, publisher, tc.ironic.Endpoint(), auth, inspector.Endpoint(), auth, @@ -295,7 +298,12 @@ func TestPowerOff(t *testing.T) { if tc.expectedErrorResult { assert.Contains(t, result.ErrorMessage, "hard power off failed") } - + if tc.expectedReason != "" { + assert.Len(t, eventReasons, 1) + assert.Contains(t, eventReasons, tc.expectedReason) + } else { + assert.Empty(t, eventReasons) + } }) } } From 249598e09445a683267a156058d6d4edd0413fad Mon Sep 17 00:00:00 2001 From: Zane Bitter Date: Mon, 20 Sep 2021 11:29:53 -0400 Subject: [PATCH 2/2] ironic: Restore check for unsupported soft power off If the ironic driver does not support soft power off, we need to fall back to hard power off. Due to an oversight in 67a27dc85996462b8f08348480072eec224e26e6 we stopped checking for this and just returned a transient error in this case. While returning a failure would have resulted in retrying with a hard power off (albeit after reporting an error to the user), returning a transient error means that it will retry the soft power off forever. Restore the fallback and add tests to cover this case. Because we can't set the dummy ironic server to return different responses on subsequent calls we will always get an error, so check that it is from the fallback hard power off. (cherry picked from commit f75c6b7f0e0f4f65bacf5b98b2da7186001557c2) --- pkg/provisioner/ironic/ironic.go | 18 +++++++----- pkg/provisioner/ironic/power_test.go | 41 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/pkg/provisioner/ironic/ironic.go b/pkg/provisioner/ironic/ironic.go index 99b8a14681..a32eded7bf 100644 --- a/pkg/provisioner/ironic/ironic.go +++ b/pkg/provisioner/ironic/ironic.go @@ -1519,12 +1519,13 @@ func (p *ironicProvisioner) changePower(ironicNode *nodes.Node, target nodes.Tar return result, HostLockedError{} case gophercloud.ErrDefault400: // Error 400 Bad Request means target power state is not supported by vendor driver - p.log.Info("power change error", "message", changeResult.Err) - return result, SoftPowerOffUnsupportedError{} - default: - p.log.Info("power change error", "message", changeResult.Err) - return transientError(errors.Wrap(changeResult.Err, "failed to change power state")) + if target == nodes.SoftPowerOff { + p.log.Info("power change error", "message", changeResult.Err) + return result, SoftPowerOffUnsupportedError{} + } } + p.log.Info("power change error", "message", changeResult.Err) + return transientError(errors.Wrap(changeResult.Err, "failed to change power state")) } // PowerOn ensures the server is powered on independently of any image @@ -1589,9 +1590,12 @@ func (p *ironicProvisioner) PowerOff(rebootMode metal3v1alpha1.RebootMode, force } if rebootMode == metal3v1alpha1.RebootModeSoft && !force { - return p.softPowerOff(ironicNode) + result, err := p.softPowerOff(ironicNode) + if !errors.As(err, &SoftPowerOffUnsupportedError{}) { + return result, err + } } - // Reboot mode is hard or force flag is set + // Reboot mode is hard, force flag is set, or soft power off is not supported return p.hardPowerOff(ironicNode) } diff --git a/pkg/provisioner/ironic/power_test.go b/pkg/provisioner/ironic/power_test.go index fc5f52d491..541d08934c 100644 --- a/pkg/provisioner/ironic/power_test.go +++ b/pkg/provisioner/ironic/power_test.go @@ -1,6 +1,7 @@ package ironic import ( + "errors" "net/http" "testing" "time" @@ -307,3 +308,43 @@ func TestPowerOff(t *testing.T) { }) } } + +func TestSoftPowerOffFallback(t *testing.T) { + nodeUUID := "33ce8659-7400-4c68-9535-d10766f07a58" + node := nodes.Node{ + PowerState: powerOn, + UUID: nodeUUID, + } + ironic := testserver.NewIronic(t).Ready().Node(node).WithNodeStatesPowerUpdate(nodeUUID, http.StatusBadRequest) + ironic.Start() + defer ironic.Stop() + inspector := testserver.NewInspector(t).Ready() + inspector.Start() + defer inspector.Stop() + + host := makeHost() + host.Status.Provisioning.ID = nodeUUID + var eventReasons []string + publisher := func(reason, message string) { + eventReasons = append(eventReasons, message) + } + auth := clients.AuthConfig{Type: clients.NoAuth} + prov, err := newProvisionerWithSettings(host, bmc.Credentials{}, publisher, + ironic.Endpoint(), auth, inspector.Endpoint(), auth, + ) + if err != nil { + t.Fatalf("could not create provisioner: %s", err) + } + + _, err = prov.PowerOff(metal3v1alpha1.RebootModeSoft, false) + assert.Error(t, err) + assert.False(t, errors.As(err, &SoftPowerOffUnsupportedError{})) + + _, err = prov.changePower(&node, nodes.PowerOff) + assert.Error(t, err) + assert.False(t, errors.As(err, &SoftPowerOffUnsupportedError{})) + + _, err = prov.changePower(&node, nodes.SoftPowerOff) + assert.Error(t, err) + assert.True(t, errors.As(err, &SoftPowerOffUnsupportedError{})) +}