diff --git a/docs/BaremetalHost_ProvisioningState.dot b/docs/BaremetalHost_ProvisioningState.dot index c7e6f55278..63973a317e 100644 --- a/docs/BaremetalHost_ProvisioningState.dot +++ b/docs/BaremetalHost_ProvisioningState.dot @@ -1,11 +1,11 @@ digraph BaremetalHost { Created [shape=house] - Created -> Discovered [label="BMC.* == \"\""] + Created -> Unmanaged [label="BMC.* == \"\""] Created -> Registering [label="BMC.* != \"\""] - Discovered [shape=doublecircle] - Discovered -> Registering [label="BMC.* != \"\""] - Discovered -> Deleting1 [label="!DeletionTimestamp.IsZero()"] + Unmanaged [shape=doublecircle] + Unmanaged -> Registering [label="BMC.* != \"\""] + Unmanaged -> Deleting1 [label="!DeletionTimestamp.IsZero()"] Deleting1 [shape=point] diff --git a/docs/BaremetalHost_ProvisioningState.png b/docs/BaremetalHost_ProvisioningState.png index 7bd14a9fff..f2424b74fd 100644 Binary files a/docs/BaremetalHost_ProvisioningState.png and b/docs/BaremetalHost_ProvisioningState.png differ diff --git a/docs/baremetalhost-states.md b/docs/baremetalhost-states.md index 639875e963..9cffc2b154 100644 --- a/docs/baremetalhost-states.md +++ b/docs/baremetalhost-states.md @@ -11,10 +11,10 @@ Newly created hosts move immediately to Discovered or Registering. No host stays in the Created state while the operator is working properly. -## Discovered +## Unmanaged -A Discovered host is missing either the BMC address or credentials -secret name, and does not have enough information to access the BMC +An Unmanaged host is missing both the BMC address and credentials +secret name, and does not have any information to access the BMC for registration. ## Externally Provisioned diff --git a/pkg/apis/metal3/v1alpha1/baremetalhost_types.go b/pkg/apis/metal3/v1alpha1/baremetalhost_types.go index e360ed40b5..70522a726f 100644 --- a/pkg/apis/metal3/v1alpha1/baremetalhost_types.go +++ b/pkg/apis/metal3/v1alpha1/baremetalhost_types.go @@ -122,6 +122,10 @@ const ( // StateNone means the state is unknown StateNone ProvisioningState = "" + // StateUnmanaged means there is insufficient information available to + // register the host + StateUnmanaged ProvisioningState = "unmanaged" + // StateRegistrationError means there was an error registering the // host with the backend StateRegistrationError ProvisioningState = "registration error" @@ -637,6 +641,11 @@ func (host *BareMetalHost) getLabel(name string) string { return host.Labels[name] } +// HasBMCDetails returns true if the BMC details are set +func (host *BareMetalHost) HasBMCDetails() bool { + return host.Spec.BMC.Address != "" || host.Spec.BMC.CredentialsName != "" +} + // NeedsHardwareProfile returns true if the profile is not set func (host *BareMetalHost) NeedsHardwareProfile() bool { return host.Status.HardwareProfile == "" diff --git a/pkg/controller/baremetalhost/baremetalhost_controller.go b/pkg/controller/baremetalhost/baremetalhost_controller.go index 101bea8320..cbc94fbd09 100644 --- a/pkg/controller/baremetalhost/baremetalhost_controller.go +++ b/pkg/controller/baremetalhost/baremetalhost_controller.go @@ -43,6 +43,7 @@ import ( const ( hostErrorRetryDelay = time.Second * 10 pauseRetryDelay = time.Second * 30 + unmanagedRetryDelay = time.Minute * 10 rebootAnnotationPrefix = "reboot.metal3.io" ) @@ -247,14 +248,21 @@ func (r *ReconcileBareMetalHost) Reconcile(request reconcile.Request) (result re // Retrieve the BMC details from the host spec and validate host // BMC details and build the credentials for talking to the // management controller. - bmcCreds, bmcCredsSecret, err := r.buildAndValidateBMCCredentials(request, host) - if err != nil || bmcCreds == nil { - if !host.DeletionTimestamp.IsZero() { - // If we are in the process of deletion, try with empty credentials - bmcCreds = &bmc.Credentials{} - bmcCredsSecret = &corev1.Secret{} - } else { - return r.credentialsErrorResult(err, request, host) + var bmcCreds *bmc.Credentials + var bmcCredsSecret *corev1.Secret + switch host.Status.Provisioning.State { + case metal3v1alpha1.StateNone, metal3v1alpha1.StateUnmanaged: + bmcCreds = &bmc.Credentials{} + default: + bmcCreds, bmcCredsSecret, err = r.buildAndValidateBMCCredentials(request, host) + if err != nil || bmcCreds == nil { + if !host.DeletionTimestamp.IsZero() { + // If we are in the process of deletion, try with empty credentials + bmcCreds = &bmc.Credentials{} + bmcCredsSecret = &corev1.Secret{} + } else { + return r.credentialsErrorResult(err, request, host) + } } } @@ -340,30 +348,6 @@ func recordActionFailure(info *reconcileInfo, errorType metal3v1alpha1.ErrorType func (r *ReconcileBareMetalHost) credentialsErrorResult(err error, request reconcile.Request, host *metal3v1alpha1.BareMetalHost) (reconcile.Result, error) { switch err.(type) { - // We treat an empty bmc address and empty bmc credentials fields as a - // trigger the host needs to be put into a discovered status. We also set - // an error message (but not an error state) on the host so we can understand - // what we may be waiting on. Editing the host to set these values will - // cause the host to be reconciled again so we do not Requeue. - case *EmptyBMCAddressError, *EmptyBMCSecretError: - credentialsInvalid.Inc() - dirty := host.SetOperationalStatus(metal3v1alpha1.OperationalStatusDiscovered) - if dirty { - // Set the host error message directly - // as we cannot use SetErrorCondition which - // overwrites our discovered state - host.Status.ErrorMessage = err.Error() - host.Status.ErrorType = "" - saveErr := r.saveHostStatus(host) - if saveErr != nil { - return reconcile.Result{Requeue: true}, saveErr - } - // Only publish the event if we do not have an error - // after saving so that we only publish one time. - r.publishEvent(request, - host.NewEvent("Discovered", fmt.Sprintf("Discovered host with unusable BMC details: %s", err.Error()))) - } - return reconcile.Result{}, nil // In the event a credential secret is defined, but we cannot find it // we requeue the host as we will not know if they create the secret // at some point in the future. @@ -379,12 +363,14 @@ func (r *ReconcileBareMetalHost) credentialsErrorResult(err error, request recon r.publishEvent(request, host.NewEvent("BMCCredentialError", err.Error())) } return reconcile.Result{Requeue: true, RequeueAfter: hostErrorRetryDelay}, nil - // If we have found the secret but it is missing the required fields - // or the BMC address is defined but malformed we set the + // If a managed Host is missing a BMC address or secret, or + // we have found the secret but it is missing the required fields, + // or the BMC address is defined but malformed, we set the // host into an error state but we do not Requeue it // as fixing the secret or the host BMC info will trigger // the host to be reconciled again - case *bmc.CredentialsValidationError, *bmc.UnknownBMCTypeError: + case *EmptyBMCAddressError, *EmptyBMCSecretError, + *bmc.CredentialsValidationError, *bmc.UnknownBMCTypeError: credentialsInvalid.Inc() _, saveErr := r.setErrorCondition(request, host, metal3v1alpha1.RegistrationError, err.Error()) if saveErr != nil { @@ -460,6 +446,13 @@ func (r *ReconcileBareMetalHost) actionDeleting(prov provisioner.Provisioner, in return deleteComplete{} } +func (r *ReconcileBareMetalHost) actionUnmanaged(prov provisioner.Provisioner, info *reconcileInfo) actionResult { + if info.host.HasBMCDetails() { + return actionComplete{} + } + return actionContinueNoWrite{actionContinue{unmanagedRetryDelay}} +} + // Test the credentials by connecting to the management controller. func (r *ReconcileBareMetalHost) actionRegistering(prov provisioner.Provisioner, info *reconcileInfo) actionResult { info.log.Info("registering and validating access to management controller", diff --git a/pkg/controller/baremetalhost/baremetalhost_controller_test.go b/pkg/controller/baremetalhost/baremetalhost_controller_test.go index df47234bf6..ed6ca1a958 100644 --- a/pkg/controller/baremetalhost/baremetalhost_controller_test.go +++ b/pkg/controller/baremetalhost/baremetalhost_controller_test.go @@ -630,16 +630,6 @@ func TestUpdateGoodCredentialsOnBadSecret(t *testing.T) { // TestDiscoveredHost ensures that a host without a BMC IP and // credentials is placed into the "discovered" state. func TestDiscoveredHost(t *testing.T) { - noAddress := newHost("missing-bmc-address", - &metal3v1alpha1.BareMetalHostSpec{ - BMC: metal3v1alpha1.BMCDetails{ - Address: "", - CredentialsName: "bmc-creds-valid", - }, - }) - r := newTestReconciler(noAddress) - waitForStatus(t, r, noAddress, metal3v1alpha1.OperationalStatusDiscovered) - noAddressOrSecret := newHost("missing-bmc-address", &metal3v1alpha1.BareMetalHostSpec{ BMC: metal3v1alpha1.BMCDetails{ @@ -647,8 +637,14 @@ func TestDiscoveredHost(t *testing.T) { CredentialsName: "", }, }) - r = newTestReconciler(noAddressOrSecret) + r := newTestReconciler(noAddressOrSecret) waitForStatus(t, r, noAddressOrSecret, metal3v1alpha1.OperationalStatusDiscovered) + if noAddressOrSecret.Status.ErrorType != "" { + t.Errorf("Unexpected error type %s", noAddressOrSecret.Status.ErrorType) + } + if noAddressOrSecret.Status.ErrorMessage != "" { + t.Errorf("Unexpected error message %s", noAddressOrSecret.Status.ErrorMessage) + } } // TestMissingBMCParameters ensures that a host that is missing some @@ -737,6 +733,12 @@ func TestMissingBMCParameters(t *testing.T) { t.Run(tc.Scenario, func(t *testing.T) { r := newTestReconciler(tc.Secret, tc.Host) waitForError(t, r, tc.Host) + if tc.Host.Status.OperationalStatus != metal3v1alpha1.OperationalStatusError { + t.Errorf("Unexpected operational status %s", tc.Host.Status.OperationalStatus) + } + if tc.Host.Status.ErrorType != metal3v1alpha1.RegistrationError { + t.Errorf("Unexpected error type %s", tc.Host.Status.ErrorType) + } }) } } diff --git a/pkg/controller/baremetalhost/host_state_machine.go b/pkg/controller/baremetalhost/host_state_machine.go index 891ef31f68..6691056105 100644 --- a/pkg/controller/baremetalhost/host_state_machine.go +++ b/pkg/controller/baremetalhost/host_state_machine.go @@ -36,6 +36,7 @@ type stateHandler func(*reconcileInfo) actionResult func (hsm *hostStateMachine) handlers() map[metal3v1alpha1.ProvisioningState]stateHandler { return map[metal3v1alpha1.ProvisioningState]stateHandler{ metal3v1alpha1.StateNone: hsm.handleNone, + metal3v1alpha1.StateUnmanaged: hsm.handleUnmanaged, metal3v1alpha1.StateRegistering: hsm.handleRegistering, metal3v1alpha1.StateRegistrationError: hsm.handleRegistrationError, metal3v1alpha1.StateInspecting: hsm.handleInspecting, @@ -140,7 +141,7 @@ func (hsm *hostStateMachine) shouldInitiateRegister(info *reconcileInfo) bool { switch hsm.NextState { default: changeState = !hsm.Host.Status.GoodCredentials.Match(*info.bmcCredsSecret) - case metal3v1alpha1.StateNone: + case metal3v1alpha1.StateNone, metal3v1alpha1.StateUnmanaged: case metal3v1alpha1.StateRegistering, metal3v1alpha1.StateRegistrationError: case metal3v1alpha1.StateDeleting: } @@ -152,13 +153,25 @@ func (hsm *hostStateMachine) shouldInitiateRegister(info *reconcileInfo) bool { } func (hsm *hostStateMachine) handleNone(info *reconcileInfo) actionResult { - // Running the state machine at all means we have successfully validated - // the BMC credentials once, so we can move to the Registering state. - hsm.Host.ClearError() - hsm.NextState = metal3v1alpha1.StateRegistering + // No state is set, so immediately move to either Registering or Unmanaged + if hsm.Host.HasBMCDetails() { + hsm.NextState = metal3v1alpha1.StateRegistering + } else { + info.publishEvent("Discovered", "Discovered host with no BMC details") + hsm.Host.SetOperationalStatus(metal3v1alpha1.OperationalStatusDiscovered) + hsm.NextState = metal3v1alpha1.StateUnmanaged + } return actionComplete{} } +func (hsm *hostStateMachine) handleUnmanaged(info *reconcileInfo) actionResult { + actResult := hsm.Reconciler.actionUnmanaged(hsm.Provisioner, info) + if _, complete := actResult.(actionComplete); complete { + hsm.NextState = metal3v1alpha1.StateRegistering + } + return actResult +} + func (hsm *hostStateMachine) handleRegistering(info *reconcileInfo) actionResult { actResult := hsm.Reconciler.actionRegistering(hsm.Provisioner, info)