From daaf1d48daaad8db4e8fe95198683bc768a15fcb Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Tue, 30 Nov 2021 17:36:24 +0000 Subject: [PATCH 1/3] Ensure OpenStack resource are immutable actuator.Update() could in certain circumstances cause a server to be recreated. This is always an error and can only result in data loss. This change removes all functionality from CAPO which relates to recreating OpenStack resources. Instead, when we get an Update we simply synchronise the Machine object with the current state of the OpenStack resource. Additionally, we are no longer setting the instance-status annotation on the machine object, as this is no longer relevant. Instead we detect a deleted OpenStack resource by setting the providerID on the machine. This also has the advantage of making nodelink matching more robust. --- pkg/cloud/openstack/machine/actuator.go | 148 +++++------------- pkg/cloud/openstack/machine/instancestatus.go | 142 ----------------- 2 files changed, 36 insertions(+), 254 deletions(-) delete mode 100644 pkg/cloud/openstack/machine/instancestatus.go diff --git a/pkg/cloud/openstack/machine/actuator.go b/pkg/cloud/openstack/machine/actuator.go index be922aca10..3b9a580dfa 100644 --- a/pkg/cloud/openstack/machine/actuator.go +++ b/pkg/cloud/openstack/machine/actuator.go @@ -22,7 +22,6 @@ import ( "fmt" "net" "os" - "reflect" "strconv" "time" @@ -106,15 +105,23 @@ func getTimeout(name string, timeout int) time.Duration { return time.Duration(timeout) } +func (oc *OpenstackClient) getClusterInfraName() (string, error) { + clusterInfra, err := oc.params.ConfigClient.Infrastructures().Get(context.TODO(), "cluster", metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("Failed to retrieve cluster Infrastructure object: %v", err) + } + + return clusterInfra.Status.InfrastructureName, nil +} + func (oc *OpenstackClient) Create(ctx context.Context, machine *machinev1.Machine) error { // First check that provided labels are correct // TODO(mfedosin): stop sending the infrastructure request when we start to receive the cluster value - clusterInfra, err := oc.params.ConfigClient.Infrastructures().Get(context.TODO(), "cluster", metav1.GetOptions{}) + clusterInfraName, err := oc.getClusterInfraName() if err != nil { - return fmt.Errorf("Failed to retrieve cluster Infrastructure object: %v", err) + return err } - clusterInfraName := clusterInfra.Status.InfrastructureName clusterNameLabel := machine.Labels["machine.openshift.io/cluster-api-cluster"] if clusterNameLabel != clusterInfraName { @@ -142,21 +149,12 @@ func (oc *OpenstackClient) Create(ctx context.Context, machine *machinev1.Machin return oc.handleMachineError(machine, verr, createEventAction) } - instance, err := oc.instanceExists(machine) - if err != nil { - return err - } - if instance != nil { - klog.Infof("Skipped creating a VM that already exists.\n") - return nil - } - // Here we check whether we want to create a new instance or recreate the destroyed // one. If this is the second case, we have to return an error, because if we just // create an instance with the old name, because the CSR for it will not be approved // automatically. // See https://bugzilla.redhat.com/show_bug.cgi?id=1746369 - if machine.ObjectMeta.Annotations[InstanceStatusAnnotationKey] != "" { + if machine.Spec.ProviderID != nil { klog.Errorf("The instance has been destroyed for the machine %v, cannot recreate it.\n", machine.ObjectMeta.Name) verr := apierrors.InvalidMachineConfiguration("the instance has been destroyed for the machine %v, cannot recreate it.\n", machine.ObjectMeta.Name) @@ -256,7 +254,7 @@ func (oc *OpenstackClient) Create(ctx context.Context, machine *machinev1.Machin } } - instance, err = machineService.InstanceCreate(clusterName, machine.Name, &clusterSpec, providerSpec, userDataRendered, providerSpec.KeyName, oc.params.ConfigClient) + instance, err := machineService.InstanceCreate(clusterName, machine.Name, &clusterSpec, providerSpec, userDataRendered, providerSpec.KeyName, oc.params.ConfigClient) if err != nil { return oc.handleMachineError(machine, apierrors.CreateMachine( @@ -265,7 +263,7 @@ func (oc *OpenstackClient) Create(ctx context.Context, machine *machinev1.Machin instanceCreateTimeout := getTimeout("CLUSTER_API_OPENSTACK_INSTANCE_CREATE_TIMEOUT", TimeoutInstanceCreate) instanceCreateTimeout = instanceCreateTimeout * time.Minute err = util.PollImmediate(RetryIntervalInstanceStatus, instanceCreateTimeout, func() (bool, error) { - instance, err := machineService.GetInstance(instance.ID) + instance, err = machineService.GetInstance(instance.ID) if err != nil { return false, nil } @@ -291,7 +289,7 @@ func (oc *OpenstackClient) Create(ctx context.Context, machine *machinev1.Machin } oc.eventRecorder.Eventf(machine, corev1.EventTypeNormal, "Created", "Created machine %v", machine.Name) - return oc.updateAnnotation(machine, instance.ID, clusterInfraName) + return oc.updateAnnotation(machine, instance, clusterInfraName) } func (oc *OpenstackClient) Delete(ctx context.Context, machine *machinev1.Machine) error { @@ -322,92 +320,16 @@ func (oc *OpenstackClient) Delete(ctx context.Context, machine *machinev1.Machin } func (oc *OpenstackClient) Update(ctx context.Context, machine *machinev1.Machine) error { - if err := oc.validateMachine(machine); err != nil { - verr := &apierrors.MachineError{ - Reason: machinev1.UpdateMachineError, - Message: err.Error(), - } - return oc.handleMachineError(machine, verr, updateEventAction) - } - - clusterInfra, err := oc.params.ConfigClient.Infrastructures().Get(context.TODO(), "cluster", metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("Failed to retrieve cluster Infrastructure object: %v", err) - } - - status, err := oc.instanceStatus(machine) + clusterInfraName, err := oc.getClusterInfraName() if err != nil { return err } - - currentMachine := (*machinev1.Machine)(status) - if currentMachine == nil { - instance, err := oc.instanceExists(machine) - if err != nil { - return err - } - if instance != nil && instance.Status == "ACTIVE" { - klog.Infof("Populating current state for boostrap machine %v", machine.ObjectMeta.Name) - - kubeClient := oc.params.KubeClient - machineService, err := clients.NewInstanceServiceFromMachine(kubeClient, machine) - if err != nil { - return err - } - - err = machineService.SetMachineLabels(machine, instance.ID) - if err != nil { - return nil - } - - return oc.updateAnnotation(machine, instance.ID, clusterInfra.Status.InfrastructureName) - } else { - return fmt.Errorf("Cannot retrieve current state to update machine %v", machine.ObjectMeta.Name) - } - } - - if !oc.requiresUpdate(currentMachine, machine) { - return nil - } - - if _, ok := currentMachine.Labels["node-role.kubernetes.io/master"]; ok { - // In this conditional block, Machine is Control Plane - // TODO: add master inplace - klog.Errorf("master inplace update failed: not supported") - return oc.handleMachineError(machine, apierrors.UpdateMachine( - "master inplace update failed: not supported"), updateEventAction) - } else { - // In this conditional block, Machine is Compute Node - klog.Infof("re-creating machine %s for update.", currentMachine.ObjectMeta.Name) - err = oc.Create(ctx, machine) - if err != nil { - klog.Errorf("create machine %s for update failed: %v", machine.ObjectMeta.Name, err) - return fmt.Errorf("Cannot create machine %s: %v", machine.ObjectMeta.Name, err) - } - - err = oc.Delete(ctx, currentMachine) - if err != nil { - klog.Errorf("delete machine %s for update failed: %v", currentMachine.ObjectMeta.Name, err) - return fmt.Errorf("Cannot delete machine %s: %v", currentMachine.ObjectMeta.Name, err) - } - instanceDeleteTimeout := getTimeout("CLUSTER_API_OPENSTACK_INSTANCE_DELETE_TIMEOUT", TimeoutInstanceDelete) - instanceDeleteTimeout = instanceDeleteTimeout * time.Minute - err = util.PollImmediate(RetryIntervalInstanceStatus, instanceDeleteTimeout, func() (bool, error) { - instance, err := oc.instanceExists(machine) - if err != nil { - return false, nil - } - return instance == nil, nil - }) - if err != nil { - return oc.handleMachineError(machine, apierrors.DeleteMachine( - "error deleting Openstack instance: %v", err), updateEventAction) - } - klog.Infof("Successfully updated machine %s", currentMachine.ObjectMeta.Name) + instance, err := oc.instanceExists(machine) + if err != nil { + return fmt.Errorf("error fetching OpenStack server for machine %s: %w", machine.Name, err) } - oc.eventRecorder.Eventf(currentMachine, corev1.EventTypeNormal, "Updated", "Updated machine %v", currentMachine.ObjectMeta.Name) - return nil + return oc.updateAnnotation(machine, instance, clusterInfraName) } func (oc *OpenstackClient) Exists(ctx context.Context, machine *machinev1.Machine) (bool, error) { @@ -575,14 +497,26 @@ func (oc *OpenstackClient) handleMachineError(machine *machinev1.Machine, err *a return err } -func (oc *OpenstackClient) updateAnnotation(machine *machinev1.Machine, instanceID string, clusterInfraName string) error { +func (oc *OpenstackClient) updateAnnotation(machine *machinev1.Machine, instance *clients.Instance, clusterInfraName string) error { + providerID := fmt.Sprintf("openstack:///%s", instance.ID) + + if machine.Spec.ProviderID != nil { + // We can't recover if the provider ID has changed + if *machine.Spec.ProviderID != providerID { + verr := apierrors.InvalidMachineConfiguration("providerID has changed from %s to %s. This is not supported. "+ + "The recommended action is to delete and recreate this machine.", *machine.Spec.ProviderID, providerID) + return oc.handleMachineError(machine, verr, updateEventAction) + } + } else { + machine.Spec.ProviderID = &providerID + } + statusCopy := *machine.Status.DeepCopy() if machine.ObjectMeta.Annotations == nil { machine.ObjectMeta.Annotations = make(map[string]string) } - machine.ObjectMeta.Annotations[openstack.OpenstackIdAnnotationKey] = instanceID - instance, _ := oc.instanceExists(machine) + machine.ObjectMeta.Annotations[openstack.OpenstackIdAnnotationKey] = instance.ID mapAddr, err := getIPsFromInstance(instance) if err != nil { return err @@ -627,17 +561,7 @@ func (oc *OpenstackClient) updateAnnotation(machine *machinev1.Machine, instance } machine.Status = statusCopy - return oc.updateInstanceStatus(machine) -} - -func (oc *OpenstackClient) requiresUpdate(a *machinev1.Machine, b *machinev1.Machine) bool { - if a == nil || b == nil { - return true - } - // Do not want status changes. Do want changes that impact machine provisioning - return !reflect.DeepEqual(a.Spec.ObjectMeta, b.Spec.ObjectMeta) || - !reflect.DeepEqual(a.Spec.ProviderSpec, b.Spec.ProviderSpec) || - a.ObjectMeta.Name != b.ObjectMeta.Name + return oc.client.Update(context.TODO(), machine) } func (oc *OpenstackClient) instanceExists(machine *machinev1.Machine) (instance *clients.Instance, err error) { diff --git a/pkg/cloud/openstack/machine/instancestatus.go b/pkg/cloud/openstack/machine/instancestatus.go deleted file mode 100644 index 1013d7bffe..0000000000 --- a/pkg/cloud/openstack/machine/instancestatus.go +++ /dev/null @@ -1,142 +0,0 @@ -/* -Copyright 2018 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package machine - -import ( - "bytes" - "context" - "fmt" - - "k8s.io/apimachinery/pkg/runtime/serializer/json" - - machinev1 "github.com/openshift/api/machine/v1beta1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// Long term, we should retrieve the current status by asking k8s, openstack etc. for all the needed info. -// For now, it is stored in the matching CRD under an annotation. This is similar to -// the spec and status concept where the machine CRD is the instance spec and the annotation is the instance status. - -const InstanceStatusAnnotationKey = "instance-status" - -type instanceStatus *machinev1.Machine - -// Get the status of the instance identified by the given machine -func (oc *OpenstackClient) instanceStatus(machine *machinev1.Machine) (instanceStatus, error) { - currentMachine, err := GetMachineIfExists(oc.client, machine.Namespace, machine.Name) - - if err != nil { - return nil, err - } - - if currentMachine == nil { - // The current status no longer exists because the matching CRD has been deleted (or does not exist yet ie. bootstrapping) - return nil, nil - } - return oc.machineInstanceStatus(currentMachine) -} - -// Get a `machinev1.Machine` matching the specified name and namespace. -// -// Same as cluster-api's `util.GetMachineIfExists`, but works with -// `machinev1` instead of `clusterv1`. The latter does not work with -// OpenShift deployments. -func GetMachineIfExists(c client.Client, namespace, name string) (*machinev1.Machine, error) { - if c == nil { - // Being called before k8s is setup as part of control plane VM creation - return nil, nil - } - - machine := &machinev1.Machine{} - key := client.ObjectKey{Namespace: namespace, Name: name} - err := c.Get(context.Background(), key, machine) - - if err != nil { - if errors.IsNotFound(err) { - return nil, nil - } - return nil, err - } - - return machine, nil -} - -// Sets the status of the instance identified by the given machine to the given machine -func (oc *OpenstackClient) updateInstanceStatus(machine *machinev1.Machine) error { - status := instanceStatus(machine) - currentMachine, err := GetMachineIfExists(oc.client, machine.Namespace, machine.Name) - if err != nil { - return err - } - - if currentMachine == nil { - // The current status no longer exists because the matching CRD has been deleted. - return fmt.Errorf("Machine has already been deleted. Cannot update current instance status for machine %v", machine.ObjectMeta.Name) - } - - m, err := oc.setMachineInstanceStatus(currentMachine, status) - if err != nil { - return err - } - - return oc.client.Update(context.TODO(), m) -} - -// Gets the state of the instance stored on the given machine CRD -func (oc *OpenstackClient) machineInstanceStatus(machine *machinev1.Machine) (instanceStatus, error) { - if machine.ObjectMeta.Annotations == nil { - // No state - return nil, nil - } - - a := machine.ObjectMeta.Annotations[InstanceStatusAnnotationKey] - if a == "" { - // No state - return nil, nil - } - - serializer := json.NewSerializer(json.DefaultMetaFactory, oc.scheme, oc.scheme, false) - var status machinev1.Machine - _, _, err := serializer.Decode([]byte(a), &schema.GroupVersionKind{Group: "machine.openshift.io", Version: "v1beta1", Kind: "Machine"}, &status) - if err != nil { - return nil, fmt.Errorf("decoding failure: %v", err) - } - - return instanceStatus(&status), nil -} - -// Applies the state of an instance onto a given machine CRD -func (oc *OpenstackClient) setMachineInstanceStatus(machine *machinev1.Machine, status instanceStatus) (*machinev1.Machine, error) { - // Avoid status within status within status ... - status.ObjectMeta.Annotations[InstanceStatusAnnotationKey] = "" - - serializer := json.NewSerializer(json.DefaultMetaFactory, oc.scheme, oc.scheme, false) - b := []byte{} - buff := bytes.NewBuffer(b) - err := serializer.Encode((*machinev1.Machine)(status), buff) - if err != nil { - return nil, fmt.Errorf("encoding failure: %v", err) - } - - if machine.ObjectMeta.Annotations == nil { - machine.ObjectMeta.Annotations = make(map[string]string) - } - machine.ObjectMeta.Annotations[InstanceStatusAnnotationKey] = buff.String() - return machine, nil -} From 22c143c9e4dee8d8971a05ad742272169078ad88 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Thu, 2 Dec 2021 12:12:03 +0000 Subject: [PATCH 2/3] Remove the DeploymentClient It was dead code. Move the OpenstackIdAnnotationKey constant to machine, which is the only place it is used. Remove the OpenstackIPAnnotationKey and no longer set the corresponding annotation, because it is no longer used. --- pkg/cloud/openstack/deployer.go | 93 ------------------------- pkg/cloud/openstack/machine/actuator.go | 23 +++--- 2 files changed, 11 insertions(+), 105 deletions(-) delete mode 100644 pkg/cloud/openstack/deployer.go diff --git a/pkg/cloud/openstack/deployer.go b/pkg/cloud/openstack/deployer.go deleted file mode 100644 index fa23e93582..0000000000 --- a/pkg/cloud/openstack/deployer.go +++ /dev/null @@ -1,93 +0,0 @@ -/* -Copyright 2018 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package openstack - -import ( - "errors" - "fmt" - "os" - "os/exec" - "strings" - - machinev1 "github.com/openshift/api/machine/v1beta1" - - "k8s.io/klog/v2" - openstackconfigv1 "sigs.k8s.io/cluster-api-provider-openstack/pkg/apis/openstackproviderconfig/v1alpha1" -) - -const ProviderName = "openstack" -const ( - OpenstackIPAnnotationKey = "openstack-ip-address" - OpenstackIdAnnotationKey = "openstack-resourceId" -) - -type DeploymentClient struct{} - -func NewDeploymentClient() *DeploymentClient { - return &DeploymentClient{} -} - -func (*DeploymentClient) GetIP(machine *machinev1.Machine) (string, error) { - if machine.ObjectMeta.Annotations != nil { - if ip, ok := machine.ObjectMeta.Annotations[OpenstackIPAnnotationKey]; ok { - klog.Infof("Returning IP from machine annotation %s", ip) - return ip, nil - } - } - - return "", errors.New("could not get IP") -} - -// execCommand executes a local command in the current shell. -func execCommand(name string, args ...string) string { - cmdOut, err := exec.Command(name, args...).Output() - if err != nil { - s := strings.Join(append([]string{name}, args...), " ") - klog.Errorf("error executing command %q: %v", s, err) - } - return string(cmdOut) -} - -func (d *DeploymentClient) GetKubeConfig(master *machinev1.Machine) (string, error) { - ip, err := d.GetIP(master) - if err != nil { - return "", err - } - - homeDir, ok := os.LookupEnv("HOME") - if !ok { - return "", fmt.Errorf("unable to use HOME environment variable to find SSH key: %v", err) - } - - machineSpec, err := openstackconfigv1.MachineSpecFromProviderSpec(master.Spec.ProviderSpec) - if err != nil { - return "", err - } - - result := strings.TrimSpace(execCommand( - "ssh", "-i", homeDir+"/.ssh/openstack_tmp", - "-o", "StrictHostKeyChecking no", - "-o", "UserKnownHostsFile /dev/null", - "-o", "BatchMode=yes", - fmt.Sprintf("%s@%s", machineSpec.SshUserName, ip), - "echo STARTFILE; sudo cat /etc/kubernetes/admin.conf")) - parts := strings.Split(result, "STARTFILE") - if len(parts) != 2 { - return "", nil - } - return strings.TrimSpace(parts[1]), nil -} diff --git a/pkg/cloud/openstack/machine/actuator.go b/pkg/cloud/openstack/machine/actuator.go index 3b9a580dfa..f104236bbb 100644 --- a/pkg/cloud/openstack/machine/actuator.go +++ b/pkg/cloud/openstack/machine/actuator.go @@ -67,6 +67,8 @@ const ( // ErrorState is assigned to the machine if its instance has been destroyed ErrorState = "ERROR" + + OpenstackIdAnnotationKey = "openstack-resourceId" ) // Event Action Constants @@ -78,20 +80,18 @@ const ( ) type OpenstackClient struct { - params openstack.ActuatorParams - scheme *runtime.Scheme - client client.Client - *openstack.DeploymentClient + params openstack.ActuatorParams + scheme *runtime.Scheme + client client.Client eventRecorder record.EventRecorder } func NewActuator(params openstack.ActuatorParams) (*OpenstackClient, error) { return &OpenstackClient{ - params: params, - client: params.Client, - scheme: params.Scheme, - DeploymentClient: openstack.NewDeploymentClient(), - eventRecorder: params.EventRecorder, + params: params, + client: params.Client, + scheme: params.Scheme, + eventRecorder: params.EventRecorder, }, nil } @@ -308,7 +308,7 @@ func (oc *OpenstackClient) Delete(ctx context.Context, machine *machinev1.Machin return nil } - id := machine.ObjectMeta.Annotations[openstack.OpenstackIdAnnotationKey] + id := machine.ObjectMeta.Annotations[OpenstackIdAnnotationKey] err = machineService.InstanceDelete(id) if err != nil { return oc.handleMachineError(machine, apierrors.DeleteMachine( @@ -516,7 +516,7 @@ func (oc *OpenstackClient) updateAnnotation(machine *machinev1.Machine, instance if machine.ObjectMeta.Annotations == nil { machine.ObjectMeta.Annotations = make(map[string]string) } - machine.ObjectMeta.Annotations[openstack.OpenstackIdAnnotationKey] = instance.ID + machine.ObjectMeta.Annotations[OpenstackIdAnnotationKey] = instance.ID mapAddr, err := getIPsFromInstance(instance) if err != nil { return err @@ -528,7 +528,6 @@ func (oc *OpenstackClient) updateAnnotation(machine *machinev1.Machine, instance } klog.Infof("Found the primary address for the machine %v: %v", machine.Name, primaryIP) - machine.ObjectMeta.Annotations[openstack.OpenstackIPAnnotationKey] = primaryIP machine.ObjectMeta.Annotations[MachineInstanceStateAnnotationName] = instance.Status if err := oc.client.Update(context.TODO(), machine); err != nil { From 67b4aa8fff7fb95000c21d0d2c5d78484bdcdc71 Mon Sep 17 00:00:00 2001 From: Matthew Booth Date: Thu, 2 Dec 2021 12:41:49 +0000 Subject: [PATCH 3/3] Report all machine addresses We were previously only reporting a single 'primary' address. This would cause CSRs for the machine not to be approved if additional ports were added. This removes the last use of code for determining the 'primary' IP, so we also remove all the dead code. Fixes: rhbz#2022627 --- pkg/cloud/openstack/machine/actuator.go | 173 ++++++------------------ 1 file changed, 40 insertions(+), 133 deletions(-) diff --git a/pkg/cloud/openstack/machine/actuator.go b/pkg/cloud/openstack/machine/actuator.go index f104236bbb..a6f130305f 100644 --- a/pkg/cloud/openstack/machine/actuator.go +++ b/pkg/cloud/openstack/machine/actuator.go @@ -20,7 +20,6 @@ import ( "context" "encoding/json" "fmt" - "net" "os" "strconv" "time" @@ -28,10 +27,6 @@ import ( "k8s.io/apimachinery/pkg/api/equality" "k8s.io/client-go/tools/record" - "github.com/gophercloud/gophercloud" - gophercloudopenstack "github.com/gophercloud/gophercloud/openstack" - "github.com/gophercloud/gophercloud/openstack/networking/v2/networks" - "github.com/gophercloud/gophercloud/openstack/networking/v2/subnets" machinev1 "github.com/openshift/api/machine/v1beta1" apierrors "github.com/openshift/machine-api-operator/pkg/controller/machine" "github.com/openshift/machine-api-operator/pkg/util" @@ -340,132 +335,56 @@ func (oc *OpenstackClient) Exists(ctx context.Context, machine *machinev1.Machin return instance != nil, err } -func getIPsFromInstance(instance *clients.Instance) (map[string]string, error) { - if instance.AccessIPv4 != "" && net.ParseIP(instance.AccessIPv4) != nil { - return map[string]string{ - "": instance.AccessIPv4, - }, nil - } +func getIPsFromInstance(instance *clients.Instance) ([]corev1.NodeAddress, error) { type networkInterface struct { Address string `json:"addr"` Version float64 `json:"version"` Type string `json:"OS-EXT-IPS:type"` } - addrMap := map[string]string{} - for networkName, b := range instance.Addresses { + var nodeAddresses []corev1.NodeAddress + + // This is heavily based on the related upstream code: + // https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/244d31b1d583ee9e760d2bc2f18a80e1fc61f5eb/pkg/cloud/services/compute/instance_types.go#L131-L183 + for _, b := range instance.Addresses { list, err := json.Marshal(b) if err != nil { - return nil, fmt.Errorf("extract IP from instance err: %v", err) - } - var networks []interface{} - json.Unmarshal(list, &networks) - for _, network := range networks { - var netInterface networkInterface - b, _ := json.Marshal(network) - json.Unmarshal(b, &netInterface) - if netInterface.Version == 4.0 { - addrMap[networkName] = netInterface.Address - } + return nil, fmt.Errorf("error marshalling addresses for instance %s: %w", instance.ID, err) } - } - if len(addrMap) == 0 { - return nil, fmt.Errorf("extract IP from instance err") - } - - return addrMap, nil -} - -func getNetworkBySubnet(client *gophercloud.ServiceClient, subnetID string) (*networks.Network, error) { - subnet, err := subnets.Get(client, subnetID).Extract() - if err != nil { - return nil, fmt.Errorf("Could not get subnet %s, %v", subnetID, err) - } - - network, err := networks.Get(client, subnet.NetworkID).Extract() - if err != nil { - return nil, fmt.Errorf("Could not get network %s, %v", subnet.NetworkID, err) - } - return network, nil -} - -func getNetworkByPrimaryNetworkTag(client *gophercloud.ServiceClient, primaryNetworkTag string) (*networks.Network, error) { - opts := networks.ListOpts{ - Tags: primaryNetworkTag, - } - - allPages, err := networks.List(client, opts).AllPages() - if err != nil { - return nil, err - } - - allNetworks, err := networks.ExtractNetworks(allPages) - if err != nil { - return nil, err - } - - switch len(allNetworks) { - case 0: - return nil, fmt.Errorf("There are no networks with primary network tag: %v", primaryNetworkTag) - case 1: - return &allNetworks[0], nil - } - return nil, fmt.Errorf("Too many networks with the same primary network tag: %v", primaryNetworkTag) -} - -func (oc *OpenstackClient) getPrimaryMachineIP(mapAddr map[string]string, machine *machinev1.Machine, clusterInfraName string) (string, error) { - // If there is only one network in the list, we consider it as the primary one - if len(mapAddr) == 1 { - for _, addr := range mapAddr { - return addr, nil + var interfaceList []networkInterface + err = json.Unmarshal(list, &interfaceList) + if err != nil { + return nil, fmt.Errorf("error unmarshalling addresses for instance %s: %w", instance.ID, err) } - } - - config, err := openstackconfigv1.MachineSpecFromProviderSpec(machine.Spec.ProviderSpec) - if err != nil { - return "", fmt.Errorf("Invalid provider spec for machine %s", machine.Name) - } - // PrimarySubnet should always be set in the machine api in 4.6 - primarySubnet := config.PrimarySubnet + for i := range interfaceList { + address := &interfaceList[i] - cloud, err := clients.GetCloud(oc.params.KubeClient, machine) - if err != nil { - return "", err - } - provider, err := clients.GetProviderClient(cloud, clients.GetCACertificate(oc.params.KubeClient)) - if err != nil { - return "", err - } - netClient, err := gophercloudopenstack.NewNetworkV2(provider, gophercloud.EndpointOpts{ - Region: cloud.RegionName, - }) - if err != nil { - return "", err - } + // Only consider IPv4 + if address.Version != 4 { + klog.V(6).Info("Ignoring IPv%d address %s: only IPv4 is supported", address.Version, address.Address) + continue + } - var primaryNetwork *networks.Network - if primarySubnet != "" { - primaryNetwork, err = getNetworkBySubnet(netClient, primarySubnet) - if err != nil { - return "", err - } - } else { - // Support legacy versions - primaryNetworkTag := clusterInfraName + "-primaryClusterNetwork" - primaryNetwork, err = getNetworkByPrimaryNetworkTag(netClient, primaryNetworkTag) - if err != nil { - return "", err - } - } + var addressType corev1.NodeAddressType + switch address.Type { + case "floating": + addressType = corev1.NodeExternalIP + case "fixed": + addressType = corev1.NodeInternalIP + default: + klog.V(6).Info("Ignoring address %s with unknown type '%s'", address.Address, address.Type) + continue + } - for networkName, addr := range mapAddr { - if networkName == primaryNetwork.Name { - return addr, nil + nodeAddresses = append(nodeAddresses, corev1.NodeAddress{ + Type: addressType, + Address: address.Address, + }) } } - return "", fmt.Errorf("No primary network was found for the machine %v", machine.Name) + return nodeAddresses, nil } // If the OpenstackClient has a client for updating Machine objects, this will set @@ -517,41 +436,29 @@ func (oc *OpenstackClient) updateAnnotation(machine *machinev1.Machine, instance machine.ObjectMeta.Annotations = make(map[string]string) } machine.ObjectMeta.Annotations[OpenstackIdAnnotationKey] = instance.ID - mapAddr, err := getIPsFromInstance(instance) - if err != nil { - return err - } - - primaryIP, err := oc.getPrimaryMachineIP(mapAddr, machine, clusterInfraName) - if err != nil { - return err - } - klog.Infof("Found the primary address for the machine %v: %v", machine.Name, primaryIP) - machine.ObjectMeta.Annotations[MachineInstanceStateAnnotationName] = instance.Status if err := oc.client.Update(context.TODO(), machine); err != nil { return err } - networkAddresses := []corev1.NodeAddress{} - networkAddresses = append(networkAddresses, corev1.NodeAddress{ - Type: corev1.NodeInternalIP, - Address: primaryIP, - }) + nodeAddresses, err := getIPsFromInstance(instance) + if err != nil { + return err + } - networkAddresses = append(networkAddresses, corev1.NodeAddress{ + nodeAddresses = append(nodeAddresses, corev1.NodeAddress{ Type: corev1.NodeHostName, Address: machine.Name, }) - networkAddresses = append(networkAddresses, corev1.NodeAddress{ + nodeAddresses = append(nodeAddresses, corev1.NodeAddress{ Type: corev1.NodeInternalDNS, Address: machine.Name, }) machineCopy := machine.DeepCopy() - machineCopy.Status.Addresses = networkAddresses + machineCopy.Status.Addresses = nodeAddresses if !equality.Semantic.DeepEqual(machine.Status.Addresses, machineCopy.Status.Addresses) { if err := oc.client.Status().Update(context.TODO(), machineCopy); err != nil {