diff --git a/api/v1alpha3/conversion.go b/api/v1alpha3/conversion.go index ee0668b7f8..ce83007284 100644 --- a/api/v1alpha3/conversion.go +++ b/api/v1alpha3/conversion.go @@ -112,6 +112,7 @@ func (src *Metal3MachineTemplate) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.Template.Spec.NetworkData = restored.Spec.Template.Spec.NetworkData dst.Spec.Template.Spec.DataTemplate = restored.Spec.Template.Spec.DataTemplate dst.Spec.Template.Spec.Image = restored.Spec.Template.Spec.Image + dst.Spec.NodeReuse = restored.Spec.NodeReuse return nil } @@ -165,3 +166,11 @@ func Convert_v1alpha4_Image_To_v1alpha3_Image(in *v1alpha4.Image, out *Image, s return nil } + +func Convert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTemplateSpec(in *v1alpha4.Metal3MachineTemplateSpec, out *Metal3MachineTemplateSpec, s apiconversion.Scope) error { + if err := autoConvert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTemplateSpec(in, out, s); err != nil { + return err + } + + return nil +} diff --git a/api/v1alpha3/zz_generated.conversion.go b/api/v1alpha3/zz_generated.conversion.go index 4692cbfc95..d65298d3d9 100644 --- a/api/v1alpha3/zz_generated.conversion.go +++ b/api/v1alpha3/zz_generated.conversion.go @@ -180,11 +180,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1alpha4.Metal3MachineTemplateSpec)(nil), (*Metal3MachineTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTemplateSpec(a.(*v1alpha4.Metal3MachineTemplateSpec), b.(*Metal3MachineTemplateSpec), scope) - }); err != nil { - return err - } if err := s.AddConversionFunc((*v1alpha4.Image)(nil), (*Image)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_Image_To_v1alpha3_Image(a.(*v1alpha4.Image), b.(*Image), scope) }); err != nil { @@ -200,6 +195,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1alpha4.Metal3MachineTemplateSpec)(nil), (*Metal3MachineTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTemplateSpec(a.(*v1alpha4.Metal3MachineTemplateSpec), b.(*Metal3MachineTemplateSpec), scope) + }); err != nil { + return err + } return nil } @@ -641,10 +641,6 @@ func autoConvert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTem if err := Convert_v1alpha4_Metal3MachineTemplateResource_To_v1alpha3_Metal3MachineTemplateResource(&in.Template, &out.Template, s); err != nil { return err } + // WARNING: in.NodeReuse requires manual conversion: does not exist in peer-type return nil } - -// Convert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTemplateSpec is an autogenerated conversion function. -func Convert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTemplateSpec(in *v1alpha4.Metal3MachineTemplateSpec, out *Metal3MachineTemplateSpec, s conversion.Scope) error { - return autoConvert_v1alpha4_Metal3MachineTemplateSpec_To_v1alpha3_Metal3MachineTemplateSpec(in, out, s) -} diff --git a/api/v1alpha4/metal3machinetemplate_types.go b/api/v1alpha4/metal3machinetemplate_types.go index dd6fd3503f..7f16140fdd 100644 --- a/api/v1alpha4/metal3machinetemplate_types.go +++ b/api/v1alpha4/metal3machinetemplate_types.go @@ -23,6 +23,12 @@ import ( // Metal3MachineTemplateSpec defines the desired state of Metal3MachineTemplate type Metal3MachineTemplateSpec struct { Template Metal3MachineTemplateResource `json:"template"` + + // When set to True, CAPM3 Machine controller will + // pick the same pool of BMHs' that were released during the upgrade operation. + // +kubebuilder:default=false + // +optional + NodeReuse bool `json:"nodeReuse"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/baremetal/metal3data_manager.go b/baremetal/metal3data_manager.go index a9a7ee1386..930b453401 100644 --- a/baremetal/metal3data_manager.go +++ b/baremetal/metal3data_manager.go @@ -270,7 +270,7 @@ func (m *DataManager) createSecrets(ctx context.Context) error { return nil } -// CreateSecrets creates the secret if they do not exist. +// ReleaseLeases releases addresses from pool. func (m *DataManager) ReleaseLeases(ctx context.Context) error { if m.Data.Spec.Template.Name == "" { return nil diff --git a/baremetal/metal3machine_manager.go b/baremetal/metal3machine_manager.go index 8bd6f31676..cd9ff43b70 100644 --- a/baremetal/metal3machine_manager.go +++ b/baremetal/metal3machine_manager.go @@ -43,6 +43,7 @@ import ( "k8s.io/client-go/tools/cache" "k8s.io/utils/pointer" capi "sigs.k8s.io/cluster-api/api/v1alpha3" + ctplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/patch" @@ -50,11 +51,13 @@ import ( ) const ( - // ProviderName is exported + // ProviderName is exported. ProviderName = "metal3" // HostAnnotation is the key for an annotation that should go on a Metal3Machine to // reference what BareMetalHost it corresponds to. - HostAnnotation = "metal3.io/BareMetalHost" + HostAnnotation = "metal3.io/BareMetalHost" + // nodeReuseLabelName is the label set on BMH when node reuse feature is enabled. + nodeReuseLabelName = "infrastructure.cluster.x-k8s.io/node-reuse" requeueAfter = time.Second * 30 bmRoleControlPlane = "control-plane" bmRoleNode = "node" @@ -86,11 +89,13 @@ type MachineManagerInterface interface { type MachineManager struct { client client.Client - Cluster *capi.Cluster - Metal3Cluster *capm3.Metal3Cluster - Machine *capi.Machine - Metal3Machine *capm3.Metal3Machine - Log logr.Logger + Cluster *capi.Cluster + Metal3Cluster *capm3.Metal3Cluster + Machine *capi.Machine + Metal3Machine *capm3.Metal3Machine + Metal3MachineTemplate *capm3.Metal3MachineTemplate + MachineSetList []*capi.MachineSet + Log logr.Logger } // NewMachineManager returns a new helper for managing a machine @@ -110,6 +115,18 @@ func NewMachineManager(client client.Client, }, nil } +func NewMachineSetManager(client client.Client, + machine *capi.Machine, machinesetlist []*capi.MachineSet, + machineset *capi.MachineSet, machineLog logr.Logger) (*MachineManager, error) { + + return &MachineManager{ + client: client, + Machine: machine, + MachineSetList: machinesetlist, + Log: machineLog, + }, nil +} + // SetFinalizer sets finalizer func (m *MachineManager) SetFinalizer() { // If the Metal3Machine doesn't have finalizer, add it. @@ -431,6 +448,7 @@ func (m *MachineManager) getUserData(ctx context.Context, host *bmh.BareMetalHos return nil } +// createSecret creates secret for bootstrap func (m *MachineManager) createSecret(ctx context.Context, name string, namespace string, content map[string][]byte, ) error { @@ -552,6 +570,64 @@ func (m *MachineManager) Delete(ctx context.Context) error { return &RequeueAfterError{RequeueAfter: requeueAfter} } + // Fetch corresponding Metal3MachineTemplate, to see if NodeReuse + // feature is enabled. If set to true, check the machine role. In case + // machine role is ControlPlane, set nodeReuseLabelName to KubeadmControlPlane + // name, otherwise to MachineDeployment name. + m.Log.Info("Getting Metal3MachineTemplate") + m3mt := &capm3.Metal3MachineTemplate{} + if m.Metal3Machine == nil { + return errors.New("Metal3Machine associated with Metal3MachineTemplate is not found") + } + if m.hasTemplateAnnotation() { + m3mtKey := client.ObjectKey{ + Name: m.Metal3Machine.ObjectMeta.GetAnnotations()[capi.TemplateClonedFromNameAnnotation], + Namespace: m.Metal3Machine.Namespace, + } + if err := m.client.Get(ctx, m3mtKey, m3mt); err != nil { + // we are here, because while normal deprovisioning, Metal3MachineTemplate will be deleted first + // and we can't get it even though Metal3Machine has reference to it. We consider it nil and move + // forward with normal deprovisioning. + m3mt = nil + m.Log.Info("Metal3MachineTemplate associated with Metal3Machine is deleted") + } else { + // in case of upgrading, Metal3MachineTemplate will not be deleted and we can fetch it, + // in order to check for node reuse feature in the next step. + m.Log.Info("Found Metal3machineTemplate", "metal3machinetemplate", m3mtKey.Name) + } + } + if m3mt != nil { + if m3mt.Spec.NodeReuse { + if host.Labels == nil { + host.Labels = make(map[string]string) + } + // Check if machine is ControlPlane + if m.isControlPlane() { + // Fetch KubeadmControlPlane name for controlplane machine + m.Log.Info("Fetch KubeadmControlPlane name") + kcpName, err := m.getKubeadmControlPlaneName(ctx) + if err != nil { + return err + } + m.Log.Info("Fetched KubeadmControlPlane name:", "kubeadmcontrolplane", kcpName) + // Set the nodeReuseLabelName to KubeadmControlPlane name on the host + m.Log.Info("Setting nodeReuseLabelName in BaremetalHost to fetched KubeadmControlPlane name") + host.Labels[nodeReuseLabelName] = kcpName + } else { + // Fetch MachineDeployment name for worker machine + m.Log.Info("Fetch MachineDeployment name") + mdName, err := m.getMachineDeploymentName(ctx) + if err != nil { + return err + } + m.Log.Info("Fetched MachineDeployment name:", "machinedeployment", mdName) + // Set the nodeReuseLabelName to MachineDeployment name + m.Log.Info("Setting nodeReuseLabelName in BaremetalHost to fetched MachineDeployment name") + host.Labels[nodeReuseLabelName] = mdName + } + } + } + host.Spec.ConsumerRef = nil // Delete created secret, if data was set without DataSecretName but with @@ -775,6 +851,7 @@ func (m *MachineManager) chooseHost(ctx context.Context) (*bmh.BareMetalHost, *p labelSelector = labelSelector.Add(reqs...) availableHosts := []*bmh.BareMetalHost{} + availableHostsWithNodeReuse := []*bmh.BareMetalHost{} for i, host := range hosts.Items { if host.Spec.ConsumerRef != nil && consumerRefMatches(host.Spec.ConsumerRef, m.Metal3Machine) { @@ -782,7 +859,9 @@ func (m *MachineManager) chooseHost(ctx context.Context) (*bmh.BareMetalHost, *p helper, err := patch.NewHelper(&hosts.Items[i], m.client) return &hosts.Items[i], helper, err } - if host.Spec.ConsumerRef != nil { + if host.Spec.ConsumerRef != nil || + (m.nodeReuseLabelExists(ctx, &host) && + !m.nodeReuseLabelMatches(ctx, &host)) { continue } if host.GetDeletionTimestamp() != nil { @@ -809,20 +888,56 @@ func (m *MachineManager) chooseHost(ctx context.Context) (*bmh.BareMetalHost, *p } if labelSelector.Matches(labels.Set(host.ObjectMeta.Labels)) { - m.Log.Info("Host matched hostSelector for Metal3Machine", "host", host.Name) - availableHosts = append(availableHosts, &hosts.Items[i]) + if m.nodeReuseLabelExists(ctx, &host) && m.nodeReuseLabelMatches(ctx, &host) { + m.Log.Info("Found host with matching nodeReuseLabelName", "host", host.Name) + availableHostsWithNodeReuse = append(availableHostsWithNodeReuse, &hosts.Items[i]) + } else if !m.nodeReuseLabelExists(ctx, &host) { + m.Log.Info("Host matched hostSelector for Metal3Machine", "host", host.Name) + availableHosts = append(availableHosts, &hosts.Items[i]) + } } else { m.Log.Info("Host did not match hostSelector for Metal3Machine", "host", host.Name) } } + + m.Log.Info(fmt.Sprintf("%d hosts available with nodeReuseLabelName while choosing host for Metal3 machine", len(availableHostsWithNodeReuse))) m.Log.Info(fmt.Sprintf("%d hosts available while choosing host for Metal3 machine", len(availableHosts))) - if len(availableHosts) == 0 { + if len(availableHostsWithNodeReuse) == 0 && len(availableHosts) == 0 { return nil, nil, nil } - // choose a host at random from available hosts + // choose a host rand.Seed(time.Now().Unix()) - chosenHost := availableHosts[rand.Intn(len(availableHosts))] + var chosenHost *bmh.BareMetalHost + + // If there are hosts with nodeReuseLabelName: + if len(availableHostsWithNodeReuse) != 0 { + for _, host := range availableHostsWithNodeReuse { + // Build list of hosts in Ready state with nodeReuseLabelName + hostsInReadyStateWithNodeReuse := []*bmh.BareMetalHost{} + // Build list of hosts in any other state than Ready state with nodeReuseLabelName + hostsInNotReadyStateWithNodeReuse := []*bmh.BareMetalHost{} + if host.Status.Provisioning.State == bmh.StateReady { + hostsInReadyStateWithNodeReuse = append(hostsInReadyStateWithNodeReuse, host) + } else { + hostsInNotReadyStateWithNodeReuse = append(hostsInNotReadyStateWithNodeReuse, host) + } + + // If host is found in `Ready` state, pick it + if len(hostsInReadyStateWithNodeReuse) != 0 { + m.Log.Info(fmt.Sprintf("Found %v host(s) with nodeReuseLabelName in Ready state", len(hostsInReadyStateWithNodeReuse))) + chosenHost = hostsInReadyStateWithNodeReuse[rand.Intn(len(hostsInReadyStateWithNodeReuse))] + } else if len(hostsInNotReadyStateWithNodeReuse) != 0 { + m.Log.Info(fmt.Sprintf("Found %v host(s) with nodeReuseLabelName in other state than Ready, requeuing", len(hostsInNotReadyStateWithNodeReuse))) + return nil, nil, &RequeueAfterError{RequeueAfter: requeueAfter} + } + } + } else { + // If there are no hosts with nodeReuseLabelName, fall back + // to the current flow and select hosts randomly. + m.Log.Info(fmt.Sprintf("%d host(s) available, choosing a random host", len(availableHosts))) + chosenHost = availableHosts[rand.Intn(len(availableHosts))] + } helper, err := patch.NewHelper(chosenHost, m.client) return chosenHost, helper, err @@ -846,6 +961,56 @@ func consumerRefMatches(consumer *corev1.ObjectReference, m3machine *capm3.Metal return true } +// nodeReuseLabelMatches returns true if nodeReuseLabelName matches KubeadmControlPlane or MachineDeployment name on the host +func (m *MachineManager) nodeReuseLabelMatches(ctx context.Context, host *bmh.BareMetalHost) bool { + + if host == nil { + return false + } + if host.Labels == nil { + return false + } + if m.isControlPlane() { + kcp, err := m.getKubeadmControlPlaneName(ctx) + if err != nil { + return false + } + if host.Labels[nodeReuseLabelName] == "" { + return false + } + if host.Labels[nodeReuseLabelName] != kcp { + return false + } + return true + } else { + md, err := m.getMachineDeploymentName(ctx) + if err != nil { + return false + } + if host.Labels[nodeReuseLabelName] == "" { + return false + } + if host.Labels[nodeReuseLabelName] != md { + return false + } + return true + } +} + +// nodeReuseLabelExists returns true if host contains nodeReuseLabelName label +func (m *MachineManager) nodeReuseLabelExists(ctx context.Context, host *bmh.BareMetalHost) bool { + + if host == nil { + return false + } + if host.Labels == nil { + return false + } + _, ok := host.Labels[nodeReuseLabelName] + m.Log.Info("nodeReuseLabelName exists on the host") + return ok +} + // getBMCSecret will return the BMCSecret associated with BMH func (m *MachineManager) getBMCSecret(ctx context.Context, host *bmh.BareMetalHost) (*corev1.Secret, error) { @@ -958,6 +1123,17 @@ func (m *MachineManager) setHostConsumerRef(ctx context.Context, host *bmh.BareM } host.OwnerReferences = hostOwnerReferences + // Delete nodeReuseLabelName from host + m.Log.Info("Deleting nodeReuseLabelName from host, if any") + + labels := host.GetLabels() + if labels != nil { + if _, ok := labels[nodeReuseLabelName]; ok { + delete(host.Labels, nodeReuseLabelName) + m.Log.Info("Finished deleting nodeReuseLabelName") + } + } + return nil } @@ -996,6 +1172,17 @@ func (m *MachineManager) HasAnnotation() bool { return ok } +// hasTemplateAnnotation makes sure the metal3 machine has infrastructure machine +// annotation that stores the name of the infrastructure template resource. +func (m *MachineManager) hasTemplateAnnotation() bool { + annotations := m.Metal3Machine.ObjectMeta.GetAnnotations() + if annotations == nil { + return false + } + _, ok := annotations[capi.TemplateClonedFromNameAnnotation] + return ok +} + // SetError sets the ErrorMessage and ErrorReason fields on the machine and logs // the message. It assumes the reason is invalid configuration, since that is // currently the only relevant MachineStatusError choice. @@ -1065,6 +1252,7 @@ func (m *MachineManager) nodeAddresses(host *bmh.BareMetalHost) []capi.MachineAd return addrs } +// GetProviderIDAndBMHID returns providerID and bmhID func (m *MachineManager) GetProviderIDAndBMHID() (string, *string) { providerID := m.Metal3Machine.Spec.ProviderID if providerID == nil { @@ -1193,6 +1381,7 @@ func setOwnerRefInList(refList []metav1.OwnerReference, controller bool, return refList, nil } +// findOwnerRefFromList finds OwnerRef to this Metal3 machine func findOwnerRefFromList(refList []metav1.OwnerReference, objType metav1.TypeMeta, objMeta metav1.ObjectMeta, ) (int, error) { @@ -1381,3 +1570,101 @@ func (m *MachineManager) DissociateM3Metadata(ctx context.Context) error { return deleteObject(m.client, ctx, metal3DataClaim) } + +// getKubeadmControlPlaneName retrieves the KubeadmControlPlane object corresponding to the CAPI machine. +func (m *MachineManager) getKubeadmControlPlaneName(ctx context.Context) (string, error) { + m.Log.Info("Fetching KubeadmControlPlane name") + if m.Machine == nil { + return "", errors.New("Could not find corresponding machine object") + } + if m.Machine.ObjectMeta.OwnerReferences == nil { + return "", errors.New("Machine owner reference is not populated") + } + for _, mOwnerRef := range m.Machine.ObjectMeta.OwnerReferences { + if mOwnerRef.Kind != "KubeadmControlPlane" { + continue + } + aGV, err := schema.ParseGroupVersion(mOwnerRef.APIVersion) + if err != nil { + return "", errors.New("Failed to parse the group and version") + } + if aGV.Group != ctplanev1.GroupVersion.Group { + continue + } + m.Log.Info("Fetched KubeadmControlPlane name", "kubeadmcontrolplane", mOwnerRef.Name) + // adding prefix to KubeadmControlPlane name in order to be able to differentiate + // KubeadmControlPlane and MachineDeployment when they have the same name set in the cluster. + return string("kcp-" + mOwnerRef.Name), nil + } + return "", errors.New("KubeadmControlPlane name is not found") +} + +// getMachineDeploymentName retrieves the MachineDeployment object name corresponding to the MachineSet. +func (m *MachineManager) getMachineDeploymentName(ctx context.Context) (string, error) { + m.Log.Info("Fetching MachineDeployment name") + + // Fetch MachineSet + m.Log.Info("Fetching MachineSet first to find corresponding MachineDeployment later") + + machineSet, err := m.getMachineSet(ctx) + if err != nil { + return "", err + } + if machineSet.ObjectMeta.OwnerReferences == nil { + return "", errors.New("Machineset owner reference is not populated") + } + for _, msOwnerRef := range machineSet.ObjectMeta.OwnerReferences { + if msOwnerRef.Kind != "MachineDeployment" { + continue + } + aGV, err := schema.ParseGroupVersion(msOwnerRef.APIVersion) + if err != nil { + return "", errors.New("Failed to parse the group and version") + } + if aGV.Group != capi.GroupVersion.Group { + continue + } + m.Log.Info("Fetched MachineDeployment name", "machinedeployment", msOwnerRef.Name) + // adding prefix to MachineDeployment name in order to be able to differentiate + // MachineDeployment and KubeadmControlPlane when they have the same name set in the cluster. + return string("md-" + msOwnerRef.Name), nil + } + return "", errors.New("MachineDeployment name is not found") +} + +// getMachineSet retrieves the MachineSet object corresponding to the CAPI machine. +func (m *MachineManager) getMachineSet(ctx context.Context) (*capi.MachineSet, error) { + m.Log.Info("Fetching MachineSet name") + // Get list of MachineSets + machineSets := &capi.MachineSetList{} + if m.isControlPlane() { + return nil, errors.New("Machine is controlplane, MachineSet can not be associated with it") + } + if m.Machine == nil { + return nil, errors.New("Could not find corresponding machine object") + } + if m.Machine.ObjectMeta.OwnerReferences == nil { + return nil, errors.New("Machine owner reference is not populated") + } + if err := m.client.List(ctx, machineSets, client.InNamespace(m.Machine.Namespace)); err != nil { + return nil, err + } + + // Iterate over MachineSets list and find MachineSet which references specific machine + for index := range machineSets.Items { + machineset := &machineSets.Items[index] + for _, mOwnerRef := range m.Machine.ObjectMeta.OwnerReferences { + if mOwnerRef.Kind != machineset.Kind { + continue + } + if mOwnerRef.APIVersion != machineset.APIVersion { + continue + } + if mOwnerRef.Name == machineset.Name { + m.Log.Info(fmt.Sprintf("Found MachineSet %v corresponding to machine", machineset)) + return machineset, nil + } + } + } + return nil, errors.New("MachineSet is not found") +} diff --git a/baremetal/metal3machine_manager_test.go b/baremetal/metal3machine_manager_test.go index 424dedb665..e797bca6c2 100644 --- a/baremetal/metal3machine_manager_test.go +++ b/baremetal/metal3machine_manager_test.go @@ -41,6 +41,7 @@ import ( "k8s.io/klog/klogr" "k8s.io/utils/pointer" capi "sigs.k8s.io/cluster-api/api/v1alpha3" + ctplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/controller-runtime/pkg/client" fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -52,6 +53,7 @@ const ( testUserDataSecretName = "worker-user-data" testMetaDataSecretName = "worker-metadata" testNetworkDataSecretName = "worker-network-data" + kcpName = "kcp-pool1" ) var ProviderID = "metal3://12345ID6789" @@ -267,6 +269,59 @@ func m3mObjectMetaNoAnnotations() *metav1.ObjectMeta { } } +func machineOwnerRefToMachineSet() *capi.Machine { + return &capi.Machine{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: capi.GroupVersion.String(), + Kind: "MachineSet", + Name: "test1", + }, + }, + }, + } +} + +func machineSetsList() []*capi.MachineSet { + return []*capi.MachineSet{ + { + TypeMeta: metav1.TypeMeta{ + APIVersion: capi.GroupVersion.String(), + Kind: "MachineSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test1", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: capi.GroupVersion.String(), + Kind: "MachineDeployment", + Name: "test1", + }, + }, + }, + }, + { + TypeMeta: metav1.TypeMeta{ + APIVersion: capi.GroupVersion.String(), + Kind: "MachineSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test2", + }, + }, + { + TypeMeta: metav1.TypeMeta{ + APIVersion: capi.GroupVersion.String(), + Kind: "MachineSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test3", + }, + }, + } +} + func bmhPowerStatus() *bmh.BareMetalHostStatus { return &bmh.BareMetalHostStatus{ Provisioning: bmh.ProvisionStatus{ @@ -503,7 +558,20 @@ var _ = Describe("Metal3Machine manager", func() { Annotations: map[string]string{capm3.UnhealthyAnnotation: "unhealthy"}, }, } - + hostWithNodeReuseLabelSetToKCP := bmh.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hostWithNodeReuseLabelSetToKCP", + Namespace: "myns", + Labels: map[string]string{nodeReuseLabelName: "kcp-pool1"}, + }, + } + hostWithNodeReuseLabelSetToMD := bmh.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hostWithNodeReuseLabelSetToMD", + Namespace: "myns", + Labels: map[string]string{nodeReuseLabelName: "md-pool1"}, + }, + } m3mconfig, infrastructureRef := newConfig("", map[string]string{}, []capm3.HostSelectorRequirement{}, ) @@ -564,6 +632,18 @@ var _ = Describe("Metal3Machine manager", func() { M3Machine: m3mconfig2, ExpectedHostName: host2.Name, }), + Entry("Pick hostWithNodeReuseLabelSetToKCP, which has a matching nodeReuseLabelName", testCaseChooseHost{ + Machine: newMachine("machine1", "", infrastructureRef2), + Hosts: []runtime.Object{&hostWithNodeReuseLabelSetToKCP, &host3, &host2}, + M3Machine: m3mconfig2, + ExpectedHostName: hostWithNodeReuseLabelSetToKCP.Name, + }), + Entry("Pick hostWithNodeReuseLabelSetToMD, which has a matching nodeReuseLabelName", testCaseChooseHost{ + Machine: newMachine("machine1", "", infrastructureRef2), + Hosts: []runtime.Object{&hostWithNodeReuseLabelSetToMD, &host3, &host2}, + M3Machine: m3mconfig2, + ExpectedHostName: hostWithNodeReuseLabelSetToMD.Name, + }), Entry("Ignore discoveredHost and pick host2, which lacks a ConsumerRef", testCaseChooseHost{ Machine: newMachine("machine1", "", infrastructureRef2), @@ -602,6 +682,14 @@ var _ = Describe("Metal3Machine manager", func() { ExpectedHostName: hostWithLabel.Name, }, ), + Entry("Choose hosts with a nodeReuseLabelName set to KCP, even without a label selector", + testCaseChooseHost{ + Machine: newMachine("machine1", "", infrastructureRef), + Hosts: []runtime.Object{&hostWithNodeReuseLabelSetToKCP}, + M3Machine: m3mconfig, + ExpectedHostName: hostWithNodeReuseLabelSetToKCP.Name, + }, + ), Entry("Choose the host with the right label", testCaseChooseHost{ Machine: newMachine("machine1", "", infrastructureRef2), Hosts: []runtime.Object{&hostWithLabel, &host2}, @@ -833,11 +921,12 @@ var _ = Describe("Metal3Machine manager", func() { ) type testCaseSetHostSpec struct { - UserDataNamespace string - ExpectedUserDataNamespace string - Host *bmh.BareMetalHost - ExpectedImage *bmh.Image - ExpectUserData bool + UserDataNamespace string + ExpectedUserDataNamespace string + Host *bmh.BareMetalHost + ExpectedImage *bmh.Image + ExpectUserData bool + expectNodeReuseLabelDeleted bool } DescribeTable("Test SetHostSpec", @@ -956,6 +1045,10 @@ var _ = Describe("Metal3Machine manager", func() { Expect(tc.Host.Spec.ConsumerRef.Kind).To(Equal("Metal3Machine")) _, err = machineMgr.FindOwnerRef(tc.Host.OwnerReferences) Expect(err).NotTo(HaveOccurred()) + + if tc.expectNodeReuseLabelDeleted { + Expect(tc.Host.Labels[nodeReuseLabelName]).To(Equal("")) + } }, Entry("User data has explicit alternate namespace", testCaseSetHostSpec{ UserDataNamespace: "otherns", @@ -1347,6 +1440,9 @@ var _ = Describe("Metal3Machine manager", func() { ExpectSecretDeleted bool ExpectClusterLabelDeleted bool ExpectedPausedAnnotationDeleted bool + NodeReuseEnabled bool + MachineIsControlPlane bool + MachineIsNotControlPlane bool } DescribeTable("Test Delete function", @@ -1453,6 +1549,18 @@ var _ = Describe("Metal3Machine manager", func() { Expect(savedHost.Labels["foo"]).To(Equal("bar")) Expect(savedCred.Labels["foo"]).To(Equal("bar")) } + if tc.NodeReuseEnabled { + m3mTemplate := capm3.Metal3MachineTemplate{} + err = c.Get(context.TODO(), + client.ObjectKey{ + Name: tc.M3Machine.ObjectMeta.GetAnnotations()[capi.TemplateClonedFromNameAnnotation], + Namespace: tc.M3Machine.Namespace, + }, + &m3mTemplate, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(m3mTemplate.Spec.NodeReuse).To(BeTrue()) + } }, Entry("Deprovisioning needed", testCaseDelete{ Host: newBareMetalHost("myhost", bmhSpec(), @@ -3201,6 +3309,277 @@ var _ = Describe("Metal3Machine manager", func() { }, }), ) + + type testCaseNodeReuseLabelExists struct { + Host *bmh.BareMetalHost + expectNodeReuseLabel bool + } + DescribeTable("Test NodeReuseLabelExists", + func(tc testCaseNodeReuseLabelExists) { + c := fakeclient.NewFakeClientWithScheme(setupSchemeMm(), tc.Host) + + machineMgr, err := NewMachineManager(c, nil, nil, nil, + nil, klogr.New(), + ) + Expect(err).NotTo(HaveOccurred()) + + check := machineMgr.nodeReuseLabelExists(context.TODO(), tc.Host) + Expect(err).NotTo(HaveOccurred()) + if tc.expectNodeReuseLabel { + Expect(check).To(BeTrue()) + } else { + Expect(check).To(BeFalse()) + } + }, + Entry("Node reuse label exists on the host", testCaseNodeReuseLabelExists{ + Host: &bmh.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeReuseLabelName: kcpName, + "foo": "bar", + }, + }, + }, + expectNodeReuseLabel: true, + }), + Entry("Node reuse label does not exist on the host", testCaseNodeReuseLabelExists{ + Host: &bmh.BareMetalHost{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{}, + }, + }, + expectNodeReuseLabel: false, + }), + ) + + type testCaseGetKubeadmControlPlaneName struct { + Machine *capi.Machine + expectedKcp bool + expectedKcpName string + expectError bool + } + + DescribeTable("Test getKubeadmControlPlaneName", + func(tc testCaseGetKubeadmControlPlaneName) { + objects := []runtime.Object{} + if tc.Machine != nil { + objects = append(objects, tc.Machine) + } + c := fakeclient.NewFakeClientWithScheme(setupSchemeMm(), objects...) + machineMgr, err := NewMachineManager(c, nil, nil, tc.Machine, + nil, klogr.New(), + ) + Expect(err).NotTo(HaveOccurred()) + + result, err := machineMgr.getKubeadmControlPlaneName(context.TODO()) + if tc.expectError { + Expect(err).To(HaveOccurred()) + } else { + Expect(err).NotTo(HaveOccurred()) + } + + if tc.expectedKcp { + Expect(result).To(Equal(tc.expectedKcpName)) + } + + }, + Entry("Should find the expected kcp", testCaseGetKubeadmControlPlaneName{ + Machine: &capi.Machine{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: ctplanev1.GroupVersion.String(), + Kind: "KubeadmControlPlane", + Name: "test1", + }, + }, + }, + }, + expectError: false, + expectedKcp: true, + expectedKcpName: "kcp-test1", + }), + Entry("Should not find the expected kcp, kind is not correct", testCaseGetKubeadmControlPlaneName{ + Machine: &capi.Machine{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: ctplanev1.GroupVersion.String(), + Kind: "kcp", + Name: "test1", + }, + }, + }, + }, + expectError: true, + }), + Entry("Should not find the expected kcp, API version is not correct", testCaseGetKubeadmControlPlaneName{ + Machine: &capi.Machine{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: capm3.GroupVersion.String(), + Kind: "KubeadmControlPlane", + Name: "test1", + }, + }, + }, + }, + expectError: true, + }), + ) + + type testCaseGetMachineDeploymentName struct { + Machine *capi.Machine + MachineSets []*capi.MachineSet + expectedMachineSet *capi.MachineSet + expectedMD bool + expectedMDName string + expectError bool + } + DescribeTable("Test GetMachineDeploymentName", + func(tc testCaseGetMachineDeploymentName) { + objects := []runtime.Object{} + if tc.expectedMachineSet != nil { + objects = append(objects, tc.expectedMachineSet) + } + for _, ms := range tc.MachineSets { + objects = append(objects, ms) + } + c := fakeclient.NewFakeClientWithScheme(setupSchemeMm(), objects...) + machineMgr, err := NewMachineSetManager(c, tc.Machine, + tc.MachineSets, tc.expectedMachineSet, klogr.New(), + ) + Expect(err).NotTo(HaveOccurred()) + + result, err := machineMgr.getMachineDeploymentName(context.TODO()) + if tc.expectError { + Expect(err).To(HaveOccurred()) + } else { + Expect(err).NotTo(HaveOccurred()) + } + machineSetObjects := capi.MachineSetList{} + for ms := range machineSetObjects.Items { + tc.expectedMachineSet = &machineSetObjects.Items[ms] + Expect(result).To(Equal(tc.expectedMachineSet)) + } + if tc.expectedMD { + Expect(result).To(Equal(tc.expectedMDName)) + } + }, + Entry("Should find the expected MachineDeployment name", testCaseGetMachineDeploymentName{ + Machine: machineOwnerRefToMachineSet(), + MachineSets: machineSetsList(), + expectedMachineSet: &capi.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: capi.GroupVersion.String(), + Kind: "MachineDeployment", + Name: "test1", + }, + }, + }, + }, + expectError: false, + expectedMD: true, + expectedMDName: "md-test1", + }), + ) + + type testCaseGetMachineSet struct { + Machine *capi.Machine + MachineSets []*capi.MachineSet + expectedMachineSet *capi.MachineSet + expectError bool + } + DescribeTable("Test GetMachineSet", + func(tc testCaseGetMachineSet) { + objects := []runtime.Object{} + for _, ms := range tc.MachineSets { + objects = append(objects, ms) + } + c := fakeclient.NewFakeClientWithScheme(setupSchemeMm(), objects...) + machineMgr, err := NewMachineSetManager(c, tc.Machine, + tc.MachineSets, nil, klogr.New(), + ) + Expect(err).NotTo(HaveOccurred()) + + result, err := machineMgr.getMachineSet(context.TODO()) + if tc.expectError { + Expect(err).To(HaveOccurred()) + } else { + Expect(err).NotTo(HaveOccurred()) + } + + machineSetObjects := capi.MachineSetList{} + + for ms := range machineSetObjects.Items { + tc.expectedMachineSet = &machineSetObjects.Items[ms] + Expect(result).To(Equal(tc.expectedMachineSet)) + } + }, + Entry("Should find the expected Machineset", testCaseGetMachineSet{ + Machine: machineOwnerRefToMachineSet(), + MachineSets: machineSetsList(), + expectedMachineSet: &capi.MachineSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: capi.GroupVersion.String(), + Kind: "MachineSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test1", + }, + }, + expectError: false, + }), + Entry("Should not find the expected Machineset, one of the MachineSets has different API version, second has different name", testCaseGetMachineSet{ + Machine: machineOwnerRefToMachineSet(), + MachineSets: []*capi.MachineSet{ + { + TypeMeta: metav1.TypeMeta{ + APIVersion: ctplanev1.GroupVersion.String(), + Kind: "MachineSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "", + }, + }, + { + TypeMeta: metav1.TypeMeta{ + APIVersion: capi.GroupVersion.String(), + Kind: "MachineSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test3", + }, + }, + }, + expectedMachineSet: nil, + expectError: true, + }), + Entry("Should not find the expected Machineset, one of the MachineSets is empty, second has different Kind", testCaseGetMachineSet{ + Machine: machineOwnerRefToMachineSet(), + MachineSets: []*capi.MachineSet{ + { + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{}, + }, + { + TypeMeta: metav1.TypeMeta{ + APIVersion: capi.GroupVersion.String(), + Kind: "KubeadmControlPlane", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test1", + }, + }, + }, + expectedMachineSet: nil, + expectError: true, + }), + ) + }) //----------------- diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_metal3machinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_metal3machinetemplates.yaml index 2a17c77aa2..c21b94527e 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_metal3machinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_metal3machinetemplates.yaml @@ -143,6 +143,10 @@ spec: spec: description: Metal3MachineTemplateSpec defines the desired state of Metal3MachineTemplate properties: + nodeReuse: + default: false + description: When set to True, CAPM3 Machine controller will pick the same pool of BMHs' that were released during the upgrade operation. + type: boolean template: description: Metal3MachineTemplateResource describes the data needed to create a Metal3Machine from a template properties: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c41f207ee9..bf090645e0 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -52,6 +52,30 @@ rules: - clusters/status verbs: - get +- apiGroups: + - cluster.x-k8s.io + resources: + - kubeadmcontrolplanes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinedeployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - cluster.x-k8s.io resources: @@ -193,6 +217,18 @@ rules: - get - patch - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - metal3machinetemplates + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - metal3.io resources: diff --git a/controllers/metal3machine_controller.go b/controllers/metal3machine_controller.go index 3e5ddb2f6b..cf4c7c8ec3 100644 --- a/controllers/metal3machine_controller.go +++ b/controllers/metal3machine_controller.go @@ -57,7 +57,10 @@ type Metal3MachineReconciler struct { // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metal3dataclaims/status,verbs=get // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metal3datas,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metal3datas/status,verbs=get +// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metal3machinetemplates,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=kubeadmcontrolplanes,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete diff --git a/docs/api.md b/docs/api.md index 5ba664cc72..31475cd133 100644 --- a/docs/api.md +++ b/docs/api.md @@ -510,7 +510,20 @@ spec: ## Metal3MachineTemplate -The Metal3MachineTemplate contains the template to create Metal3Machine. +The Metal3MachineTemplate contains following two specification fields: + +* **nodeReuse**: (true/false) Whether the same pool of BareMetalHosts will be re-used during the + upgrade/remediation operations. By default set to false, if set to true, CAPM3 Machine controller + will pick the same pool of BareMetalHosts that were released while upgrading/remediation - for the + next provisioning phase. +* **template**: is a template containing the data needed to create a Metal3Machine. + +### Enabling nodeReuse feature + +This feature can be desirable and enabled in scenarios such as upgrade/remediation, where root and externally attached disks of the BareMetalHosts needs to be left untouched and same pool of BareMetalHosts reused during the re-provisioning. To achieve that, `nodeReuse` field in `Metal3MachineTemplateSpec` must be set to `True`, and next CAPM3 Machine controller: + +* Sets `infrastructure.cluster.x-k8s.io/node-reuse` label to the corresponding CAPI object name (`KubeadmControlPlane` or `MachineDeployment`) on the BareMetalHost during deprovisioning; +* Selects the BareMetalHost that contains `infrastructure.cluster.x-k8s.io/node-reuse` label and matches exact same CAPI object name set in the previous step during next provisioning. Example Metal3MachineTemplate : @@ -520,6 +533,7 @@ kind: Metal3MachineTemplate metadata: name: md-0 spec: + nodeReuse: false template: spec: image: