Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/BaremetalHost_ProvisioningState.dot
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
digraph BaremetalHost {
Created [shape=house]
Created -> Discovered [label="BMC.* == \"\""]
Created -> Unmanaged [label="BMC.* == \"\""]
Created -> Registering [label="BMC.* != \"\""]

Discovered [shape=doublecircle]
Discovered -> Registering [label="BMC.* != \"\""]
Discovered -> Deleting1 [label="!DeletionTimestamp.IsZero()"]
Unmanaged [shape=doublecircle]
Unmanaged -> Registering [label="BMC.* != \"\""]
Unmanaged -> Deleting1 [label="!DeletionTimestamp.IsZero()"]

Deleting1 [shape=point]

Expand Down
Binary file modified docs/BaremetalHost_ProvisioningState.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 3 additions & 3 deletions docs/baremetalhost-states.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ Newly created hosts move immediately to Discovered or Registering. No
host stays in the Created state while the operator is working
properly.

## Discovered
## Unmanaged

A Discovered host is missing either the BMC address or credentials
secret name, and does not have enough information to access the BMC
An Unmanaged host is missing both the BMC address and credentials
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it need to be both? Could it be either?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Working assumption:

  • If you didn't provide either, you meant to create an unmanaged host
  • If you provided one but not the other, linked to a missing credential secret, or linked to an incorrectly formatted secret, you screwed up and should see big error messages in the UI.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Come to think of it, it's academic anyway because both are required fields within the BMC struct. So you can only specify both or neither.

secret name, and does not have any information to access the BMC
for registration.

## Externally Provisioned
Expand Down
9 changes: 9 additions & 0 deletions pkg/apis/metal3/v1alpha1/baremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ const (
// StateNone means the state is unknown
StateNone ProvisioningState = ""

// StateUnmanaged means there is insufficient information available to
// register the host
StateUnmanaged ProvisioningState = "unmanaged"

// StateRegistrationError means there was an error registering the
// host with the backend
StateRegistrationError ProvisioningState = "registration error"
Expand Down Expand Up @@ -637,6 +641,11 @@ func (host *BareMetalHost) getLabel(name string) string {
return host.Labels[name]
}

// HasBMCDetails returns true if the BMC details are set
func (host *BareMetalHost) HasBMCDetails() bool {
return host.Spec.BMC.Address != "" || host.Spec.BMC.CredentialsName != ""
Copy link
Copy Markdown
Member

@n1r1 n1r1 Jun 28, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like this assumes that host.spec.BMC exists. is this a valid assumption?

}

// NeedsHardwareProfile returns true if the profile is not set
func (host *BareMetalHost) NeedsHardwareProfile() bool {
return host.Status.HardwareProfile == ""
Expand Down
63 changes: 28 additions & 35 deletions pkg/controller/baremetalhost/baremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
const (
hostErrorRetryDelay = time.Second * 10
pauseRetryDelay = time.Second * 30
unmanagedRetryDelay = time.Minute * 10
rebootAnnotationPrefix = "reboot.metal3.io"
)

Expand Down Expand Up @@ -247,14 +248,21 @@ func (r *ReconcileBareMetalHost) Reconcile(request reconcile.Request) (result re
// Retrieve the BMC details from the host spec and validate host
// BMC details and build the credentials for talking to the
// management controller.
bmcCreds, bmcCredsSecret, err := r.buildAndValidateBMCCredentials(request, host)
if err != nil || bmcCreds == nil {
if !host.DeletionTimestamp.IsZero() {
// If we are in the process of deletion, try with empty credentials
bmcCreds = &bmc.Credentials{}
bmcCredsSecret = &corev1.Secret{}
} else {
return r.credentialsErrorResult(err, request, host)
var bmcCreds *bmc.Credentials
var bmcCredsSecret *corev1.Secret
switch host.Status.Provisioning.State {
case metal3v1alpha1.StateNone, metal3v1alpha1.StateUnmanaged:
bmcCreds = &bmc.Credentials{}
Comment thread
zaneb marked this conversation as resolved.
default:
bmcCreds, bmcCredsSecret, err = r.buildAndValidateBMCCredentials(request, host)
if err != nil || bmcCreds == nil {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we have a default username without a password? does it make sense to check if there are no credentials or if user or password are empty?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's what buildAndValidateBMCCredentials() does.

if !host.DeletionTimestamp.IsZero() {
// If we are in the process of deletion, try with empty credentials
bmcCreds = &bmc.Credentials{}
bmcCredsSecret = &corev1.Secret{}
} else {
return r.credentialsErrorResult(err, request, host)
}
}
}

Expand Down Expand Up @@ -340,30 +348,6 @@ func recordActionFailure(info *reconcileInfo, errorType metal3v1alpha1.ErrorType

func (r *ReconcileBareMetalHost) credentialsErrorResult(err error, request reconcile.Request, host *metal3v1alpha1.BareMetalHost) (reconcile.Result, error) {
switch err.(type) {
// We treat an empty bmc address and empty bmc credentials fields as a
// trigger the host needs to be put into a discovered status. We also set
// an error message (but not an error state) on the host so we can understand
// what we may be waiting on. Editing the host to set these values will
// cause the host to be reconciled again so we do not Requeue.
case *EmptyBMCAddressError, *EmptyBMCSecretError:
credentialsInvalid.Inc()
dirty := host.SetOperationalStatus(metal3v1alpha1.OperationalStatusDiscovered)
if dirty {
// Set the host error message directly
// as we cannot use SetErrorCondition which
// overwrites our discovered state
host.Status.ErrorMessage = err.Error()
host.Status.ErrorType = ""
saveErr := r.saveHostStatus(host)
if saveErr != nil {
return reconcile.Result{Requeue: true}, saveErr
}
// Only publish the event if we do not have an error
// after saving so that we only publish one time.
r.publishEvent(request,
host.NewEvent("Discovered", fmt.Sprintf("Discovered host with unusable BMC details: %s", err.Error())))
}
return reconcile.Result{}, nil
// In the event a credential secret is defined, but we cannot find it
// we requeue the host as we will not know if they create the secret
// at some point in the future.
Expand All @@ -379,12 +363,14 @@ func (r *ReconcileBareMetalHost) credentialsErrorResult(err error, request recon
r.publishEvent(request, host.NewEvent("BMCCredentialError", err.Error()))
}
return reconcile.Result{Requeue: true, RequeueAfter: hostErrorRetryDelay}, nil
// If we have found the secret but it is missing the required fields
// or the BMC address is defined but malformed we set the
// If a managed Host is missing a BMC address or secret, or
// we have found the secret but it is missing the required fields,
// or the BMC address is defined but malformed, we set the
// host into an error state but we do not Requeue it
// as fixing the secret or the host BMC info will trigger
// the host to be reconciled again
case *bmc.CredentialsValidationError, *bmc.UnknownBMCTypeError:
case *EmptyBMCAddressError, *EmptyBMCSecretError,
*bmc.CredentialsValidationError, *bmc.UnknownBMCTypeError:
credentialsInvalid.Inc()
_, saveErr := r.setErrorCondition(request, host, metal3v1alpha1.RegistrationError, err.Error())
if saveErr != nil {
Expand Down Expand Up @@ -460,6 +446,13 @@ func (r *ReconcileBareMetalHost) actionDeleting(prov provisioner.Provisioner, in
return deleteComplete{}
}

func (r *ReconcileBareMetalHost) actionUnmanaged(prov provisioner.Provisioner, info *reconcileInfo) actionResult {
if info.host.HasBMCDetails() {
return actionComplete{}
}
return actionContinueNoWrite{actionContinue{unmanagedRetryDelay}}
}

// Test the credentials by connecting to the management controller.
func (r *ReconcileBareMetalHost) actionRegistering(prov provisioner.Provisioner, info *reconcileInfo) actionResult {
info.log.Info("registering and validating access to management controller",
Expand Down
24 changes: 13 additions & 11 deletions pkg/controller/baremetalhost/baremetalhost_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -630,25 +630,21 @@ func TestUpdateGoodCredentialsOnBadSecret(t *testing.T) {
// TestDiscoveredHost ensures that a host without a BMC IP and
// credentials is placed into the "discovered" state.
func TestDiscoveredHost(t *testing.T) {
noAddress := newHost("missing-bmc-address",
&metal3v1alpha1.BareMetalHostSpec{
BMC: metal3v1alpha1.BMCDetails{
Address: "",
CredentialsName: "bmc-creds-valid",
},
})
r := newTestReconciler(noAddress)
waitForStatus(t, r, noAddress, metal3v1alpha1.OperationalStatusDiscovered)

noAddressOrSecret := newHost("missing-bmc-address",
&metal3v1alpha1.BareMetalHostSpec{
BMC: metal3v1alpha1.BMCDetails{
Address: "",
CredentialsName: "",
},
})
r = newTestReconciler(noAddressOrSecret)
r := newTestReconciler(noAddressOrSecret)
waitForStatus(t, r, noAddressOrSecret, metal3v1alpha1.OperationalStatusDiscovered)
if noAddressOrSecret.Status.ErrorType != "" {
t.Errorf("Unexpected error type %s", noAddressOrSecret.Status.ErrorType)
}
if noAddressOrSecret.Status.ErrorMessage != "" {
t.Errorf("Unexpected error message %s", noAddressOrSecret.Status.ErrorMessage)
}
}

// TestMissingBMCParameters ensures that a host that is missing some
Expand Down Expand Up @@ -737,6 +733,12 @@ func TestMissingBMCParameters(t *testing.T) {
t.Run(tc.Scenario, func(t *testing.T) {
r := newTestReconciler(tc.Secret, tc.Host)
waitForError(t, r, tc.Host)
if tc.Host.Status.OperationalStatus != metal3v1alpha1.OperationalStatusError {
t.Errorf("Unexpected operational status %s", tc.Host.Status.OperationalStatus)
}
if tc.Host.Status.ErrorType != metal3v1alpha1.RegistrationError {
t.Errorf("Unexpected error type %s", tc.Host.Status.ErrorType)
}
})
}
}
Expand Down
23 changes: 18 additions & 5 deletions pkg/controller/baremetalhost/host_state_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type stateHandler func(*reconcileInfo) actionResult
func (hsm *hostStateMachine) handlers() map[metal3v1alpha1.ProvisioningState]stateHandler {
return map[metal3v1alpha1.ProvisioningState]stateHandler{
metal3v1alpha1.StateNone: hsm.handleNone,
metal3v1alpha1.StateUnmanaged: hsm.handleUnmanaged,
metal3v1alpha1.StateRegistering: hsm.handleRegistering,
metal3v1alpha1.StateRegistrationError: hsm.handleRegistrationError,
metal3v1alpha1.StateInspecting: hsm.handleInspecting,
Expand Down Expand Up @@ -140,7 +141,7 @@ func (hsm *hostStateMachine) shouldInitiateRegister(info *reconcileInfo) bool {
switch hsm.NextState {
default:
changeState = !hsm.Host.Status.GoodCredentials.Match(*info.bmcCredsSecret)
case metal3v1alpha1.StateNone:
case metal3v1alpha1.StateNone, metal3v1alpha1.StateUnmanaged:
case metal3v1alpha1.StateRegistering, metal3v1alpha1.StateRegistrationError:
case metal3v1alpha1.StateDeleting:
}
Expand All @@ -152,13 +153,25 @@ func (hsm *hostStateMachine) shouldInitiateRegister(info *reconcileInfo) bool {
}

func (hsm *hostStateMachine) handleNone(info *reconcileInfo) actionResult {
// Running the state machine at all means we have successfully validated
// the BMC credentials once, so we can move to the Registering state.
hsm.Host.ClearError()
hsm.NextState = metal3v1alpha1.StateRegistering
// No state is set, so immediately move to either Registering or Unmanaged
if hsm.Host.HasBMCDetails() {
hsm.NextState = metal3v1alpha1.StateRegistering
} else {
info.publishEvent("Discovered", "Discovered host with no BMC details")
hsm.Host.SetOperationalStatus(metal3v1alpha1.OperationalStatusDiscovered)
hsm.NextState = metal3v1alpha1.StateUnmanaged
}
return actionComplete{}
}

func (hsm *hostStateMachine) handleUnmanaged(info *reconcileInfo) actionResult {
actResult := hsm.Reconciler.actionUnmanaged(hsm.Provisioner, info)
if _, complete := actResult.(actionComplete); complete {
hsm.NextState = metal3v1alpha1.StateRegistering
}
return actResult
}

func (hsm *hostStateMachine) handleRegistering(info *reconcileInfo) actionResult {
actResult := hsm.Reconciler.actionRegistering(hsm.Provisioner, info)

Expand Down