From 9027dde0b5134f85ebcf56406ea52daf13495dcf Mon Sep 17 00:00:00 2001 From: Cecile Robert-Michon Date: Thu, 11 Mar 2021 14:42:47 -0800 Subject: [PATCH 1/3] Set AzureMachine and AzureMachinePool BootstrapSucceeded condition using VM extension script --- api/v1alpha4/conditions_consts.go | 40 ++++--- azure/defaults.go | 17 +++ azure/scope/machine.go | 50 +++++++++ azure/scope/machinepool.go | 27 +++++ azure/services/scalesets/scalesets.go | 2 +- azure/services/scalesets/scalesets_test.go | 15 ++- .../virtualmachines_mock.go | 12 +++ .../virtualmachines/virtualmachines.go | 2 + azure/services/vmextensions/client.go | 16 +-- .../mock_vmextensions/client_mock.go | 12 +-- .../mock_vmextensions/vmextensions_mock.go | 13 +++ azure/services/vmextensions/vmextensions.go | 20 ++-- .../vmextensions/vmextensions_test.go | 101 +++++++++++++++++- azure/services/vmssextensions/client.go | 36 ------- .../mock_vmssextensions/client_mock.go | 28 ----- .../vmssextensions_mock.go | 13 +++ .../services/vmssextensions/vmssextensions.go | 37 ++----- .../vmssextensions/vmssextensions_test.go | 25 +++-- azure/types.go | 18 ++-- controllers/azuremachine_controller.go | 43 +------- .../azuremachinepool_controller.go | 12 ++- 21 files changed, 341 insertions(+), 198 deletions(-) diff --git a/api/v1alpha4/conditions_consts.go b/api/v1alpha4/conditions_consts.go index d6fc851b9a9..08c0799791e 100644 --- a/api/v1alpha4/conditions_consts.go +++ b/api/v1alpha4/conditions_consts.go @@ -21,11 +21,7 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" // AzureCluster Conditions and Reasons const ( // NetworkInfrastructureReadyCondition reports of current status of cluster infrastructure - NetworkInfrastructureReadyCondition = "NetworkInfrastructureReady" - // LoadBalancerProvisioningReason API Server endpoint for the loadbalancer - LoadBalancerProvisioningReason = "LoadBalancerProvisioning" - // LoadBalancerProvisioningFailedReason used for failure during provisioning of loadbalancer. - LoadBalancerProvisioningFailedReason = "LoadBalancerProvisioningFailed" + NetworkInfrastructureReadyCondition clusterv1.ConditionType = "NetworkInfrastructureReady" // NamespaceNotAllowedByIdentity used to indicate cluster in a namespace not allowed by identity NamespaceNotAllowedByIdentity = "NamespaceNotAllowedByIdentity" ) @@ -34,20 +30,36 @@ const ( const ( // VMRunningCondition reports on current status of the Azure VM. VMRunningCondition clusterv1.ConditionType = "VMRunning" - // VMNCreatingReason used when the vm creation is in progress. - VMNCreatingReason = "VMCreating" - // VMNUpdatingReason used when the vm updating is in progress. - VMNUpdatingReason = "VMUpdating" - // VMNotFoundReason used when the vm couldn't be retrieved. - VMNotFoundReason = "VMNotFound" + // VMCreatingReason used when the vm creation is in progress. + VMCreatingReason = "VMCreating" + // VMUpdatingReason used when the vm updating is in progress. + VMUpdatingReason = "VMUpdating" // VMDeletingReason used when the vm is in a deleting state. - VMDDeletingReason = "VMDeleting" - // VMStoppedReason vm is in a stopped state. - VMStoppedReason = "VMStopped" + VMDeletingReason = "VMDeleting" // VMProvisionFailedReason used for failures during vm provisioning. VMProvisionFailedReason = "VMProvisionFailed" // WaitingForClusterInfrastructureReason used when machine is waiting for cluster infrastructure to be ready before proceeding. WaitingForClusterInfrastructureReason = "WaitingForClusterInfrastructure" // WaitingForBootstrapDataReason used when machine is waiting for bootstrap data to be ready before proceeding. WaitingForBootstrapDataReason = "WaitingForBootstrapData" + // BootstrapSucceededCondition reports the result of the execution of the boostrap data on the machine. + BootstrapSucceededCondition = "BoostrapSucceeded" + // BootstrapInProgressReason is used to indicate the bootstrap data has not finished executing. + BootstrapInProgressReason = "BootstrapInProgress" + // BootstrapFailedReason is used to indicate the bootstrap process ran into an error. + BootstrapFailedReason = "BootstrapFailed" +) + +// AzureMachinePool Conditions and Reasons +const ( + // ScaleSetRunningCondition reports on current status of the Azure Scale Set. + ScaleSetRunningCondition clusterv1.ConditionType = "ScaleSetRunning" + // ScaleSetCreatingReason used when the scale set creation is in progress. + ScaleSetCreatingReason = "ScaleSetCreating" + // ScaleSetUpdatingReason used when the scale set updating is in progress. + ScaleSetUpdatingReason = "ScaleSetUpdating" + // ScaleSetDeletingReason used when the scale set is in a deleting state. + ScaleSetDeletingReason = "ScaleSetDeleting" + // ScaleSetProvisionFailedReason used for failures during scale set provisioning. + ScaleSetProvisionFailedReason = "ScaleSetProvisionFailed" ) diff --git a/azure/defaults.go b/azure/defaults.go index d87eee3dace..016e1e5d36b 100644 --- a/azure/defaults.go +++ b/azure/defaults.go @@ -65,6 +65,17 @@ const ( ControlPlaneNodeGroup = "control-plane" ) +const ( + // bootstrapExtensionRetries is the number of retries in the BootstrapExtensionCommand. + // NOTE: the overall timeout will be number of retries * retry sleep, in this case 240 * 5s = 1200s. + bootstrapExtensionRetries = 240 + // bootstrapExtensionSleep is the duration in seconds to sleep before each retry in the BootstrapExtensionCommand. + bootstrapExtensionSleep = 5 + // bootstrapSentinelFile is the file written by bootstrap provider on machines to indicate successful bootstrapping, + // as defined by the Cluster API Bootstrap Provider contract (https://cluster-api.sigs.k8s.io/developer/providers/bootstrap.html). + bootstrapSentinelFile = "/run/cluster-api/bootstrap-success.complete" +) + // GenerateBackendAddressPoolName generates a load balancer backend address pool name. func GenerateBackendAddressPoolName(lbName string) string { return fmt.Sprintf("%s-%s", lbName, "backendPool") @@ -262,6 +273,12 @@ func GetBootstrappingVMExtension(osType string, cloud string) (name, publisher, return "", "", "" } +// BootstrapExtensionCommand is the command that runs on the Boostrap VM extension to check for bootstrap success. +// The command checks for the existence of the bootstrapSentinelFile on the machine, with retries and sleep between retries. +func BootstrapExtensionCommand() string { + return fmt.Sprintf("for i in $(seq 1 %d); do test -f %s && break; if [ $i -eq %d ]; then return 1; else sleep %d; fi; done", bootstrapExtensionRetries, bootstrapSentinelFile, bootstrapExtensionRetries, bootstrapExtensionSleep) +} + // UserAgent specifies a string to append to the agent identifier. func UserAgent() string { return fmt.Sprintf("cluster-api-provider-azure/%s", version.Get().String()) diff --git a/azure/scope/machine.go b/azure/scope/machine.go index a80a32a243b..b8d881a1b30 100644 --- a/azure/scope/machine.go +++ b/azure/scope/machine.go @@ -21,6 +21,7 @@ import ( "encoding/base64" "encoding/json" "strings" + "time" "github.com/Azure/go-autorest/autorest/to" "github.com/go-logr/logr" @@ -241,6 +242,9 @@ func (m *MachineScope) VMExtensionSpecs() []azure.VMExtensionSpec { VMName: m.Name(), Publisher: publisher, Version: version, + ProtectedSettings: map[string]string{ + "commandToExecute": azure.BootstrapExtensionCommand(), + }, }, } } @@ -382,6 +386,52 @@ func (m *MachineScope) SetFailureReason(v capierrors.MachineStatusError) { m.AzureMachine.Status.FailureReason = &v } +// SetBootstrapConditions sets the AzureMachine BootstrapSucceeded condition based on the extension provisioning states. +func (m *MachineScope) SetBootstrapConditions(provisioningState string, extensionName string) error { + switch infrav1.VMState(provisioningState) { + case infrav1.VMStateSucceeded: + m.V(4).Info("extension provisioning state is succeeded", "vm extension", extensionName, "virtual machine", m.Name()) + conditions.MarkTrue(m.AzureMachine, infrav1.BootstrapSucceededCondition) + return nil + case infrav1.VMStateCreating: + m.V(4).Info("extension provisioning state is creating", "vm extension", extensionName, "virtual machine", m.Name()) + conditions.MarkFalse(m.AzureMachine, infrav1.BootstrapSucceededCondition, infrav1.BootstrapInProgressReason, clusterv1.ConditionSeverityInfo, "") + return azure.WithTransientError(errors.New("extension still provisioning"), 30*time.Second) + case infrav1.VMStateFailed: + m.V(4).Info("extension provisioning state is failed", "vm extension", extensionName, "virtual machine", m.Name()) + conditions.MarkFalse(m.AzureMachine, infrav1.BootstrapSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityError, "") + return azure.WithTerminalError(errors.New("extension state failed")) + default: + return nil + } +} + +// UpdateStatus updates the AzureMachine status. +func (m *MachineScope) UpdateStatus() { + switch m.VMState() { + case infrav1.VMStateSucceeded: + m.V(2).Info("VM is running", "id", m.GetVMID()) + conditions.MarkTrue(m.AzureMachine, infrav1.VMRunningCondition) + case infrav1.VMStateCreating: + m.V(2).Info("VM is creating", "id", m.GetVMID()) + conditions.MarkFalse(m.AzureMachine, infrav1.VMRunningCondition, infrav1.VMCreatingReason, clusterv1.ConditionSeverityInfo, "") + case infrav1.VMStateUpdating: + m.V(2).Info("VM is updating", "id", m.GetVMID()) + conditions.MarkFalse(m.AzureMachine, infrav1.VMRunningCondition, infrav1.VMUpdatingReason, clusterv1.ConditionSeverityInfo, "") + case infrav1.VMStateDeleting: + m.Info("Unexpected VM deletion", "id", m.GetVMID()) + conditions.MarkFalse(m.AzureMachine, infrav1.VMRunningCondition, infrav1.VMDeletingReason, clusterv1.ConditionSeverityWarning, "") + case infrav1.VMStateFailed: + m.Error(errors.New("Failed to create or update VM"), "VM is in failed state", "id", m.GetVMID()) + m.SetFailureReason(capierrors.UpdateMachineError) + m.SetFailureMessage(errors.Errorf("Azure VM state is %s", m.VMState())) + conditions.MarkFalse(m.AzureMachine, infrav1.VMRunningCondition, infrav1.VMProvisionFailedReason, clusterv1.ConditionSeverityError, "") + default: + m.V(2).Info("VM state is undefined", "id", m.GetVMID()) + conditions.MarkUnknown(m.AzureMachine, infrav1.VMRunningCondition, "", "") + } +} + // SetAnnotation sets a key value annotation on the AzureMachine. func (m *MachineScope) SetAnnotation(key, value string) { if m.AzureMachine.Annotations == nil { diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index 17dc53f3e81..f6cb5208c29 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -20,6 +20,10 @@ import ( "context" "encoding/base64" "fmt" + "time" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + "sigs.k8s.io/cluster-api/util/conditions" "github.com/Azure/go-autorest/autorest/to" "github.com/go-logr/logr" @@ -259,6 +263,26 @@ func (m *MachinePoolScope) SetFailureReason(v capierrors.MachineStatusError) { m.AzureMachinePool.Status.FailureReason = &v } +// SetBootstrapConditions sets the AzureMachinePool BootstrapSucceeded condition based on the extension provisioning states. +func (m *MachinePoolScope) SetBootstrapConditions(provisioningState string, extensionName string) error { + switch infrav1.ProvisioningState(provisioningState) { + case infrav1.Succeeded: + m.V(4).Info("extension provisioning state is succeeded", "vm extension", extensionName, "scale set", m.Name()) + conditions.MarkTrue(m.AzureMachinePool, infrav1.BootstrapSucceededCondition) + return nil + case infrav1.Creating: + m.V(4).Info("extension provisioning state is creating", "vm extension", extensionName, "scale set", m.Name()) + conditions.MarkFalse(m.AzureMachinePool, infrav1.BootstrapSucceededCondition, infrav1.BootstrapInProgressReason, clusterv1.ConditionSeverityInfo, "") + return azure.WithTransientError(errors.New("extension still provisioning"), 30*time.Second) + case infrav1.Failed: + m.V(4).Info("extension provisioning state is failed", "vm extension", extensionName, "scale set", m.Name()) + conditions.MarkFalse(m.AzureMachinePool, infrav1.BootstrapSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityError, "") + return azure.WithTerminalError(errors.New("extension state failed")) + default: + return nil + } +} + // AdditionalTags merges AdditionalTags from the scope's AzureCluster and AzureMachinePool. If the same key is present in both, // the value from AzureMachinePool takes precedence. func (m *MachinePoolScope) AdditionalTags() infrav1.Tags { @@ -368,6 +392,9 @@ func (m *MachinePoolScope) VMSSExtensionSpecs() []azure.VMSSExtensionSpec { ScaleSetName: m.Name(), Publisher: publisher, Version: version, + ProtectedSettings: map[string]string{ + "commandToExecute": azure.BootstrapExtensionCommand(), + }, }, } } diff --git a/azure/services/scalesets/scalesets.go b/azure/services/scalesets/scalesets.go index 4412dbcb25c..2575a5cd837 100644 --- a/azure/services/scalesets/scalesets.go +++ b/azure/services/scalesets/scalesets.go @@ -535,7 +535,7 @@ func (s *Service) generateExtensions() []compute.VirtualMachineScaleSetExtension Type: to.StringPtr(extensionSpec.Name), TypeHandlerVersion: to.StringPtr(extensionSpec.Version), Settings: nil, - ProtectedSettings: nil, + ProtectedSettings: extensionSpec.ProtectedSettings, }, } } diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index 4ae45149b3d..ab3f0a66de2 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -991,8 +991,9 @@ func newDefaultVMSS() compute.VirtualMachineScaleSet { Publisher: to.StringPtr("somePublisher"), Type: to.StringPtr("someExtension"), TypeHandlerVersion: to.StringPtr("someVersion"), - Settings: nil, - ProtectedSettings: nil, + ProtectedSettings: map[string]string{ + "commandToExecute": "echo hello", + }, }, }, }, @@ -1087,9 +1088,13 @@ func setupDefaultVMSSExpectations(s *mock_scalesets.MockScaleSetScopeMockRecorde }, nil) s.VMSSExtensionSpecs().Return([]azure.VMSSExtensionSpec{ { - Name: "someExtension", - Publisher: "somePublisher", - Version: "someVersion", + Name: "someExtension", + ScaleSetName: "my-vmss", + Publisher: "somePublisher", + Version: "someVersion", + ProtectedSettings: map[string]string{ + "commandToExecute": "echo hello", + }, }, }).AnyTimes() } diff --git a/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go b/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go index 7375832f0ca..ab01014b563 100644 --- a/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go +++ b/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go @@ -392,6 +392,18 @@ func (mr *MockVMScopeMockRecorder) TenantID() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TenantID", reflect.TypeOf((*MockVMScope)(nil).TenantID)) } +// UpdateStatus mocks base method. +func (m *MockVMScope) UpdateStatus() { + m.ctrl.T.Helper() + m.ctrl.Call(m, "UpdateStatus") +} + +// UpdateStatus indicates an expected call of UpdateStatus. +func (mr *MockVMScopeMockRecorder) UpdateStatus() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateStatus", reflect.TypeOf((*MockVMScope)(nil).UpdateStatus)) +} + // V mocks base method. func (m *MockVMScope) V(level int) logr.Logger { m.ctrl.T.Helper() diff --git a/azure/services/virtualmachines/virtualmachines.go b/azure/services/virtualmachines/virtualmachines.go index 38f3afb490e..2a458e00132 100644 --- a/azure/services/virtualmachines/virtualmachines.go +++ b/azure/services/virtualmachines/virtualmachines.go @@ -52,6 +52,7 @@ type VMScope interface { SetProviderID(string) SetAddresses([]corev1.NodeAddress) SetVMState(infrav1.VMState) + UpdateStatus() } // Service provides operations on azure resources @@ -97,6 +98,7 @@ func (s *Service) Reconcile(ctx context.Context) error { s.Scope.SetAnnotation("cluster-api-provider-azure", "true") s.Scope.SetAddresses(existingVM.Addresses) s.Scope.SetVMState(existingVM.State) + s.Scope.UpdateStatus() default: s.Scope.V(2).Info("creating VM", "vm", vmSpec.Name) sku, err := s.resourceSKUCache.Get(ctx, vmSpec.Size, resourceskus.VirtualMachines) diff --git a/azure/services/vmextensions/client.go b/azure/services/vmextensions/client.go index 404d0550bec..18f07307641 100644 --- a/azure/services/vmextensions/client.go +++ b/azure/services/vmextensions/client.go @@ -28,7 +28,7 @@ import ( // Client wraps go-sdk type client interface { Get(ctx context.Context, resourceGroupName, vmName, name string) (compute.VirtualMachineExtension, error) - CreateOrUpdate(context.Context, string, string, string, compute.VirtualMachineExtension) error + CreateOrUpdateAsync(context.Context, string, string, string, compute.VirtualMachineExtension) error Delete(context.Context, string, string, string) error } @@ -60,20 +60,12 @@ func (ac *azureClient) Get(ctx context.Context, resourceGroupName, vmName, name return ac.vmextensions.Get(ctx, resourceGroupName, vmName, name, "") } -// CreateOrUpdate creates or updates the virtual machine extension -func (ac *azureClient) CreateOrUpdate(ctx context.Context, resourceGroupName, vmName, name string, parameters compute.VirtualMachineExtension) error { +// CreateOrUpdateAsync creates or updates the virtual machine extension. +func (ac *azureClient) CreateOrUpdateAsync(ctx context.Context, resourceGroupName, vmName, name string, parameters compute.VirtualMachineExtension) error { ctx, span := tele.Tracer().Start(ctx, "vmextensions.AzureClient.CreateOrUpdate") defer span.End() - future, err := ac.vmextensions.CreateOrUpdate(ctx, resourceGroupName, vmName, name, parameters) - if err != nil { - return err - } - err = future.WaitForCompletionRef(ctx, ac.vmextensions.Client) - if err != nil { - return err - } - _, err = future.Result(ac.vmextensions) + _, err := ac.vmextensions.CreateOrUpdate(ctx, resourceGroupName, vmName, name, parameters) return err } diff --git a/azure/services/vmextensions/mock_vmextensions/client_mock.go b/azure/services/vmextensions/mock_vmextensions/client_mock.go index 1e78d2a45d6..6c41a246ce4 100644 --- a/azure/services/vmextensions/mock_vmextensions/client_mock.go +++ b/azure/services/vmextensions/mock_vmextensions/client_mock.go @@ -51,18 +51,18 @@ func (m *Mockclient) EXPECT() *MockclientMockRecorder { return m.recorder } -// CreateOrUpdate mocks base method. -func (m *Mockclient) CreateOrUpdate(arg0 context.Context, arg1, arg2, arg3 string, arg4 compute.VirtualMachineExtension) error { +// CreateOrUpdateAsync mocks base method. +func (m *Mockclient) CreateOrUpdateAsync(arg0 context.Context, arg1, arg2, arg3 string, arg4 compute.VirtualMachineExtension) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateOrUpdate", arg0, arg1, arg2, arg3, arg4) + ret := m.ctrl.Call(m, "CreateOrUpdateAsync", arg0, arg1, arg2, arg3, arg4) ret0, _ := ret[0].(error) return ret0 } -// CreateOrUpdate indicates an expected call of CreateOrUpdate. -func (mr *MockclientMockRecorder) CreateOrUpdate(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { +// CreateOrUpdateAsync indicates an expected call of CreateOrUpdateAsync. +func (mr *MockclientMockRecorder) CreateOrUpdateAsync(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateOrUpdate", reflect.TypeOf((*Mockclient)(nil).CreateOrUpdate), arg0, arg1, arg2, arg3, arg4) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateOrUpdateAsync", reflect.TypeOf((*Mockclient)(nil).CreateOrUpdateAsync), arg0, arg1, arg2, arg3, arg4) } // Delete mocks base method. diff --git a/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go b/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go index 75a371d15bf..63528c613f0 100644 --- a/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go +++ b/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go @@ -28,6 +28,7 @@ import ( gomock "github.com/golang/mock/gomock" v1alpha4 "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha4" azure "sigs.k8s.io/cluster-api-provider-azure/azure" + v1alpha40 "sigs.k8s.io/cluster-api/api/v1alpha4" ) // MockVMExtensionScope is a mock of VMExtensionScope interface. @@ -255,6 +256,18 @@ func (mr *MockVMExtensionScopeMockRecorder) ResourceGroup() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResourceGroup", reflect.TypeOf((*MockVMExtensionScope)(nil).ResourceGroup)) } +// SetCondition mocks base method. +func (m *MockVMExtensionScope) SetCondition(arg0 v1alpha40.ConditionType, arg1 string, arg2 v1alpha40.ConditionSeverity, arg3 bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetCondition", arg0, arg1, arg2, arg3) +} + +// SetCondition indicates an expected call of SetCondition. +func (mr *MockVMExtensionScopeMockRecorder) SetCondition(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetCondition", reflect.TypeOf((*MockVMExtensionScope)(nil).SetCondition), arg0, arg1, arg2, arg3) +} + // SubscriptionID mocks base method. func (m *MockVMExtensionScope) SubscriptionID() string { m.ctrl.T.Helper() diff --git a/azure/services/vmextensions/vmextensions.go b/azure/services/vmextensions/vmextensions.go index 4fc736ccd1f..1897b7cf8ce 100644 --- a/azure/services/vmextensions/vmextensions.go +++ b/azure/services/vmextensions/vmextensions.go @@ -19,11 +19,10 @@ package vmextensions import ( "context" - "github.com/Azure/azure-sdk-for-go/profiles/latest/compute/mgmt/compute" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-06-30/compute" "github.com/Azure/go-autorest/autorest/to" "github.com/go-logr/logr" "github.com/pkg/errors" - "sigs.k8s.io/cluster-api-provider-azure/azure" "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) @@ -33,6 +32,7 @@ type VMExtensionScope interface { logr.Logger azure.ClusterDescriber VMExtensionSpecs() []azure.VMExtensionSpec + SetBootstrapConditions(string, string) error } // Service provides operations on azure resources @@ -55,13 +55,19 @@ func (s *Service) Reconcile(ctx context.Context) error { defer span.End() for _, extensionSpec := range s.Scope.VMExtensionSpecs() { - if _, err := s.client.Get(ctx, s.Scope.ResourceGroup(), extensionSpec.VMName, extensionSpec.Name); err == nil { - // check for the extension and don't update if already exists - // TODO: set conditions based on extension status + if existing, err := s.client.Get(ctx, s.Scope.ResourceGroup(), extensionSpec.VMName, extensionSpec.Name); err == nil { + // check the extension status and set the associated conditions. + if retErr := s.Scope.SetBootstrapConditions(to.String(existing.ProvisioningState), extensionSpec.Name); retErr != nil { + return retErr + } + // if the extension already exists, do not update it. continue + } else if !azure.ResourceNotFound(err) { + return errors.Wrapf(err, "failed to get vm extension %s on vm %s", extensionSpec.Name, extensionSpec.VMName) } + s.Scope.V(2).Info("creating VM extension", "vm extension", extensionSpec.Name) - err := s.client.CreateOrUpdate( + err := s.client.CreateOrUpdateAsync( ctx, s.Scope.ResourceGroup(), extensionSpec.VMName, @@ -72,7 +78,7 @@ func (s *Service) Reconcile(ctx context.Context) error { Type: to.StringPtr(extensionSpec.Name), TypeHandlerVersion: to.StringPtr(extensionSpec.Version), Settings: nil, - ProtectedSettings: nil, + ProtectedSettings: extensionSpec.ProtectedSettings, }, Location: to.StringPtr(s.Scope.Location()), }, diff --git a/azure/services/vmextensions/vmextensions_test.go b/azure/services/vmextensions/vmextensions_test.go index e611ea4a28b..70b224a78ec 100644 --- a/azure/services/vmextensions/vmextensions_test.go +++ b/azure/services/vmextensions/vmextensions_test.go @@ -21,6 +21,8 @@ import ( "net/http" "testing" + "github.com/Azure/go-autorest/autorest/to" + "github.com/Azure/azure-sdk-for-go/profiles/latest/compute/mgmt/compute" "github.com/Azure/go-autorest/autorest" "sigs.k8s.io/cluster-api-provider-azure/azure/services/vmextensions/mock_vmextensions" @@ -39,7 +41,34 @@ func TestReconcileVMExtension(t *testing.T) { expect func(s *mock_vmextensions.MockVMExtensionScopeMockRecorder, m *mock_vmextensions.MockclientMockRecorder) }{ { - name: "extension already exists", + name: "extension is in succeeded state", + expectedError: "", + expect: func(s *mock_vmextensions.MockVMExtensionScopeMockRecorder, m *mock_vmextensions.MockclientMockRecorder) { + s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New()) + s.VMExtensionSpecs().Return([]azure.VMExtensionSpec{ + { + Name: "my-extension-1", + VMName: "my-vm", + Publisher: "some-publisher", + Version: "1.0", + }, + }) + s.ResourceGroup().AnyTimes().Return("my-rg") + s.Location().AnyTimes().Return("test-location") + m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1").Return(compute.VirtualMachineExtension{ + VirtualMachineExtensionProperties: &compute.VirtualMachineExtensionProperties{ + Publisher: to.StringPtr("some-publisher"), + Type: to.StringPtr("my-extension-1"), + ProvisioningState: to.StringPtr(string(compute.ProvisioningStateSucceeded)), + }, + ID: to.StringPtr("fake/id"), + Name: to.StringPtr("my-extension-1"), + }, nil) + s.SetBootstrapConditions(string(compute.ProvisioningStateSucceeded), "my-extension-1") + }, + }, + { + name: "extension is in failed state", expectedError: "", expect: func(s *mock_vmextensions.MockVMExtensionScopeMockRecorder, m *mock_vmextensions.MockclientMockRecorder) { s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New()) @@ -53,7 +82,43 @@ func TestReconcileVMExtension(t *testing.T) { }) s.ResourceGroup().AnyTimes().Return("my-rg") s.Location().AnyTimes().Return("test-location") - m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1") + m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1").Return(compute.VirtualMachineExtension{ + VirtualMachineExtensionProperties: &compute.VirtualMachineExtensionProperties{ + Publisher: to.StringPtr("some-publisher"), + Type: to.StringPtr("my-extension-1"), + ProvisioningState: to.StringPtr(string(compute.ProvisioningStateFailed)), + }, + ID: to.StringPtr("fake/id"), + Name: to.StringPtr("my-extension-1"), + }, nil) + s.SetBootstrapConditions(string(compute.ProvisioningStateFailed), "my-extension-1") + }, + }, + { + name: "extension is still creating", + expectedError: "", + expect: func(s *mock_vmextensions.MockVMExtensionScopeMockRecorder, m *mock_vmextensions.MockclientMockRecorder) { + s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New()) + s.VMExtensionSpecs().Return([]azure.VMExtensionSpec{ + { + Name: "my-extension-1", + VMName: "my-vm", + Publisher: "some-publisher", + Version: "1.0", + }, + }) + s.ResourceGroup().AnyTimes().Return("my-rg") + s.Location().AnyTimes().Return("test-location") + m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1").Return(compute.VirtualMachineExtension{ + VirtualMachineExtensionProperties: &compute.VirtualMachineExtensionProperties{ + Publisher: to.StringPtr("some-publisher"), + Type: to.StringPtr("my-extension-1"), + ProvisioningState: to.StringPtr(string(compute.ProvisioningStateCreating)), + }, + ID: to.StringPtr("fake/id"), + Name: to.StringPtr("my-extension-1"), + }, nil) + s.SetBootstrapConditions(string(compute.ProvisioningStateCreating), "my-extension-1") }, }, { @@ -79,10 +144,36 @@ func TestReconcileVMExtension(t *testing.T) { s.Location().AnyTimes().Return("test-location") m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1"). Return(compute.VirtualMachineExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) - m.CreateOrUpdate(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1", gomock.AssignableToTypeOf(compute.VirtualMachineExtension{})) + m.CreateOrUpdateAsync(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1", gomock.AssignableToTypeOf(compute.VirtualMachineExtension{})) m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "other-extension"). Return(compute.VirtualMachineExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) - m.CreateOrUpdate(gomockinternal.AContext(), "my-rg", "my-vm", "other-extension", gomock.AssignableToTypeOf(compute.VirtualMachineExtension{})) + m.CreateOrUpdateAsync(gomockinternal.AContext(), "my-rg", "my-vm", "other-extension", gomock.AssignableToTypeOf(compute.VirtualMachineExtension{})) + }, + }, + { + name: "error getting the extension", + expectedError: "failed to get vm extension my-extension-1 on vm my-vm: #: Internal Server Error: StatusCode=500", + expect: func(s *mock_vmextensions.MockVMExtensionScopeMockRecorder, m *mock_vmextensions.MockclientMockRecorder) { + s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New()) + s.VMExtensionSpecs().Return([]azure.VMExtensionSpec{ + { + Name: "my-extension-1", + VMName: "my-vm", + Publisher: "some-publisher", + Version: "1.0", + }, + { + Name: "other-extension", + VMName: "my-vm", + Publisher: "other-publisher", + Version: "2.0", + }, + }) + s.ResourceGroup().AnyTimes().Return("my-rg") + s.Location().AnyTimes().Return("test-location") + m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1"). + Return(compute.VirtualMachineExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 500}, "Internal Server Error")) + }, }, { @@ -108,7 +199,7 @@ func TestReconcileVMExtension(t *testing.T) { s.Location().AnyTimes().Return("test-location") m.Get(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1"). Return(compute.VirtualMachineExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) - m.CreateOrUpdate(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1", gomock.AssignableToTypeOf(compute.VirtualMachineExtension{})).Return(autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 500}, "Internal Server Error")) + m.CreateOrUpdateAsync(gomockinternal.AContext(), "my-rg", "my-vm", "my-extension-1", gomock.AssignableToTypeOf(compute.VirtualMachineExtension{})).Return(autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 500}, "Internal Server Error")) }, }, diff --git a/azure/services/vmssextensions/client.go b/azure/services/vmssextensions/client.go index d577630f495..ecfde3f43e8 100644 --- a/azure/services/vmssextensions/client.go +++ b/azure/services/vmssextensions/client.go @@ -28,8 +28,6 @@ import ( // Client wraps go-sdk type client interface { Get(context.Context, string, string, string) (compute.VirtualMachineScaleSetExtension, error) - CreateOrUpdate(context.Context, string, string, string, compute.VirtualMachineScaleSetExtension) error - Delete(context.Context, string, string, string) error } // AzureClient contains the Azure go-sdk Client @@ -59,37 +57,3 @@ func (ac *azureClient) Get(ctx context.Context, resourceGroupName, vmssName, nam return ac.vmssextensions.Get(ctx, resourceGroupName, vmssName, name, "") } - -// CreateOrUpdate creates or updates the virtual machine scale set extension -func (ac *azureClient) CreateOrUpdate(ctx context.Context, resourceGroupName, vmName, name string, parameters compute.VirtualMachineScaleSetExtension) error { - ctx, span := tele.Tracer().Start(ctx, "vmssextensions.AzureClient.CreateOrUpdate") - defer span.End() - - future, err := ac.vmssextensions.CreateOrUpdate(ctx, resourceGroupName, vmName, name, parameters) - if err != nil { - return err - } - err = future.WaitForCompletionRef(ctx, ac.vmssextensions.Client) - if err != nil { - return err - } - _, err = future.Result(ac.vmssextensions) - return err -} - -// Delete removes the virtual machine scale set extension. -func (ac *azureClient) Delete(ctx context.Context, resourceGroupName, vmName, name string) error { - ctx, span := tele.Tracer().Start(ctx, "vmssextensions.AzureClient.Delete") - defer span.End() - - future, err := ac.vmssextensions.Delete(ctx, resourceGroupName, vmName, name) - if err != nil { - return err - } - err = future.WaitForCompletionRef(ctx, ac.vmssextensions.Client) - if err != nil { - return err - } - _, err = future.Result(ac.vmssextensions) - return err -} diff --git a/azure/services/vmssextensions/mock_vmssextensions/client_mock.go b/azure/services/vmssextensions/mock_vmssextensions/client_mock.go index b65850bb568..4ec9171a373 100644 --- a/azure/services/vmssextensions/mock_vmssextensions/client_mock.go +++ b/azure/services/vmssextensions/mock_vmssextensions/client_mock.go @@ -51,34 +51,6 @@ func (m *Mockclient) EXPECT() *MockclientMockRecorder { return m.recorder } -// CreateOrUpdate mocks base method. -func (m *Mockclient) CreateOrUpdate(arg0 context.Context, arg1, arg2, arg3 string, arg4 compute.VirtualMachineScaleSetExtension) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateOrUpdate", arg0, arg1, arg2, arg3, arg4) - ret0, _ := ret[0].(error) - return ret0 -} - -// CreateOrUpdate indicates an expected call of CreateOrUpdate. -func (mr *MockclientMockRecorder) CreateOrUpdate(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateOrUpdate", reflect.TypeOf((*Mockclient)(nil).CreateOrUpdate), arg0, arg1, arg2, arg3, arg4) -} - -// Delete mocks base method. -func (m *Mockclient) Delete(arg0 context.Context, arg1, arg2, arg3 string) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Delete", arg0, arg1, arg2, arg3) - ret0, _ := ret[0].(error) - return ret0 -} - -// Delete indicates an expected call of Delete. -func (mr *MockclientMockRecorder) Delete(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Delete", reflect.TypeOf((*Mockclient)(nil).Delete), arg0, arg1, arg2, arg3) -} - // Get mocks base method. func (m *Mockclient) Get(arg0 context.Context, arg1, arg2, arg3 string) (compute.VirtualMachineScaleSetExtension, error) { m.ctrl.T.Helper() diff --git a/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go b/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go index 204b40abc0e..e7569294764 100644 --- a/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go +++ b/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go @@ -28,6 +28,7 @@ import ( gomock "github.com/golang/mock/gomock" v1alpha4 "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha4" azure "sigs.k8s.io/cluster-api-provider-azure/azure" + v1alpha40 "sigs.k8s.io/cluster-api/api/v1alpha4" ) // MockVMSSExtensionScope is a mock of VMSSExtensionScope interface. @@ -255,6 +256,18 @@ func (mr *MockVMSSExtensionScopeMockRecorder) ResourceGroup() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResourceGroup", reflect.TypeOf((*MockVMSSExtensionScope)(nil).ResourceGroup)) } +// SetCondition mocks base method. +func (m *MockVMSSExtensionScope) SetCondition(arg0 v1alpha40.ConditionType, arg1 string, arg2 v1alpha40.ConditionSeverity, arg3 bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "SetCondition", arg0, arg1, arg2, arg3) +} + +// SetCondition indicates an expected call of SetCondition. +func (mr *MockVMSSExtensionScopeMockRecorder) SetCondition(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetCondition", reflect.TypeOf((*MockVMSSExtensionScope)(nil).SetCondition), arg0, arg1, arg2, arg3) +} + // SubscriptionID mocks base method. func (m *MockVMSSExtensionScope) SubscriptionID() string { m.ctrl.T.Helper() diff --git a/azure/services/vmssextensions/vmssextensions.go b/azure/services/vmssextensions/vmssextensions.go index 26df4b056e6..ca23fb5ab74 100644 --- a/azure/services/vmssextensions/vmssextensions.go +++ b/azure/services/vmssextensions/vmssextensions.go @@ -19,11 +19,9 @@ package vmssextensions import ( "context" - "github.com/Azure/azure-sdk-for-go/profiles/latest/compute/mgmt/compute" "github.com/Azure/go-autorest/autorest/to" "github.com/go-logr/logr" "github.com/pkg/errors" - "sigs.k8s.io/cluster-api-provider-azure/azure" "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) @@ -33,6 +31,7 @@ type VMSSExtensionScope interface { logr.Logger azure.ClusterDescriber VMSSExtensionSpecs() []azure.VMSSExtensionSpec + SetBootstrapConditions(string, string) error } // Service provides operations on azure resources @@ -55,32 +54,16 @@ func (s *Service) Reconcile(ctx context.Context) error { defer span.End() for _, extensionSpec := range s.Scope.VMSSExtensionSpecs() { - if _, err := s.client.Get(ctx, s.Scope.ResourceGroup(), extensionSpec.ScaleSetName, extensionSpec.Name); err == nil { - // check for the extension and don't update if already exists - // TODO: set conditions based on extension status - continue - } - - s.Scope.V(2).Info("creating VMSS extension", "vssm extension", extensionSpec.Name) - err := s.client.CreateOrUpdate( - ctx, - s.Scope.ResourceGroup(), - extensionSpec.ScaleSetName, - extensionSpec.Name, - compute.VirtualMachineScaleSetExtension{ - VirtualMachineScaleSetExtensionProperties: &compute.VirtualMachineScaleSetExtensionProperties{ - Publisher: to.StringPtr(extensionSpec.Publisher), - Type: to.StringPtr(extensionSpec.Name), - TypeHandlerVersion: to.StringPtr(extensionSpec.Version), - Settings: nil, - ProtectedSettings: nil, - }, - }, - ) - if err != nil { - return errors.Wrapf(err, "failed to create VMSS extension %s on scale set %s in resource group %s", extensionSpec.Name, extensionSpec.ScaleSetName, s.Scope.ResourceGroup()) + if existing, err := s.client.Get(ctx, s.Scope.ResourceGroup(), extensionSpec.ScaleSetName, extensionSpec.Name); err == nil { + // check the extension status and set the associated conditions. + if retErr := s.Scope.SetBootstrapConditions(to.String(existing.ProvisioningState), extensionSpec.Name); retErr != nil { + return retErr + } + } else if !azure.ResourceNotFound(err) { + return errors.Wrapf(err, "failed to get vm extension %s on scale set %s", extensionSpec.Name, extensionSpec.ScaleSetName) } - s.Scope.V(2).Info("successfully created VMSS extension", "vm extension", extensionSpec.Name) + // Nothing else to do here, the extensions are applied to the model as part of the scale set Reconcile. + continue } return nil } diff --git a/azure/services/vmssextensions/vmssextensions_test.go b/azure/services/vmssextensions/vmssextensions_test.go index a87b1f994da..8403eb8d3c5 100644 --- a/azure/services/vmssextensions/vmssextensions_test.go +++ b/azure/services/vmssextensions/vmssextensions_test.go @@ -21,6 +21,8 @@ import ( "net/http" "testing" + "github.com/Azure/go-autorest/autorest/to" + "github.com/Azure/azure-sdk-for-go/profiles/latest/compute/mgmt/compute" "github.com/Azure/go-autorest/autorest" "sigs.k8s.io/cluster-api-provider-azure/azure/services/vmssextensions/mock_vmssextensions" @@ -53,11 +55,20 @@ func TestReconcileVMSSExtension(t *testing.T) { }) s.ResourceGroup().AnyTimes().Return("my-rg") s.Location().AnyTimes().Return("test-location") - m.Get(gomockinternal.AContext(), "my-rg", "my-vmss", "my-extension-1") + m.Get(gomockinternal.AContext(), "my-rg", "my-vmss", "my-extension-1").Return(compute.VirtualMachineScaleSetExtension{ + Name: to.StringPtr("my-extension-1"), + VirtualMachineScaleSetExtensionProperties: &compute.VirtualMachineScaleSetExtensionProperties{ + Publisher: to.StringPtr("some-publisher"), + Type: to.StringPtr("my-extension-1"), + ProvisioningState: to.StringPtr(string(compute.ProvisioningStateSucceeded)), + }, + ID: to.StringPtr("some/fake/id"), + }, nil) + s.SetBootstrapConditions(string(compute.ProvisioningStateSucceeded), "my-extension-1") }, }, { - name: "reconcile multiple extensions", + name: "extension does not exist", expectedError: "", expect: func(s *mock_vmssextensions.MockVMSSExtensionScopeMockRecorder, m *mock_vmssextensions.MockclientMockRecorder) { s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New()) @@ -79,15 +90,13 @@ func TestReconcileVMSSExtension(t *testing.T) { s.Location().AnyTimes().Return("test-location") m.Get(gomockinternal.AContext(), "my-rg", "my-vmss", "my-extension-1"). Return(compute.VirtualMachineScaleSetExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) - m.CreateOrUpdate(gomockinternal.AContext(), "my-rg", "my-vmss", "my-extension-1", gomock.AssignableToTypeOf(compute.VirtualMachineScaleSetExtension{})) m.Get(gomockinternal.AContext(), "my-rg", "my-vmss", "other-extension"). Return(compute.VirtualMachineScaleSetExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) - m.CreateOrUpdate(gomockinternal.AContext(), "my-rg", "my-vmss", "other-extension", gomock.AssignableToTypeOf(compute.VirtualMachineScaleSetExtension{})) }, }, { - name: "error creating the extension", - expectedError: "failed to create VMSS extension my-extension-1 on scale set my-vmss in resource group my-rg: #: Internal Server Error: StatusCode=500", + name: "error getting the extension", + expectedError: "failed to get vm extension my-extension-1 on scale set my-vmss: #: Internal Server Error: StatusCode=500", expect: func(s *mock_vmssextensions.MockVMSSExtensionScopeMockRecorder, m *mock_vmssextensions.MockclientMockRecorder) { s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New()) s.VMSSExtensionSpecs().Return([]azure.VMSSExtensionSpec{ @@ -107,9 +116,7 @@ func TestReconcileVMSSExtension(t *testing.T) { s.ResourceGroup().AnyTimes().Return("my-rg") s.Location().AnyTimes().Return("test-location") m.Get(gomockinternal.AContext(), "my-rg", "my-vmss", "my-extension-1"). - Return(compute.VirtualMachineScaleSetExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) - m.CreateOrUpdate(gomockinternal.AContext(), "my-rg", "my-vmss", "my-extension-1", gomock.AssignableToTypeOf(compute.VirtualMachineScaleSetExtension{})).Return(autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 500}, "Internal Server Error")) - + Return(compute.VirtualMachineScaleSetExtension{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 500}, "Internal Server Error")) }, }, } diff --git a/azure/types.go b/azure/types.go index 73147779618..654425babc4 100644 --- a/azure/types.go +++ b/azure/types.go @@ -191,16 +191,18 @@ type AvailabilitySetSpec struct { // VMExtensionSpec defines the specification for a VM extension. type VMExtensionSpec struct { - Name string - VMName string - Publisher string - Version string + Name string + VMName string + Publisher string + Version string + ProtectedSettings map[string]string } // VMSSExtensionSpec defines the specification for a VMSS extension. type VMSSExtensionSpec struct { - Name string - ScaleSetName string - Publisher string - Version string + Name string + ScaleSetName string + Publisher string + Version string + ProtectedSettings map[string]string } diff --git a/controllers/azuremachine_controller.go b/controllers/azuremachine_controller.go index 9c256f7806a..2c7309dee31 100644 --- a/controllers/azuremachine_controller.go +++ b/controllers/azuremachine_controller.go @@ -296,10 +296,8 @@ func (r *AzureMachineReconciler) reconcileNormal(ctx context.Context, machineSco // Handle transient and terminal errors var reconcileError azure.ReconcileError if errors.As(err, &reconcileError) { - r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "ReconcileError", errors.Wrap(err, "failed to reconcile AzureMachine").Error()) - conditions.MarkFalse(machineScope.AzureMachine, infrav1.VMRunningCondition, infrav1.VMProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) - if reconcileError.IsTerminal() { + r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "ReconcileError", errors.Wrapf(err, "failed to reconcile AzureMachine").Error()) machineScope.Error(err, "failed to reconcile AzureMachine", "name", machineScope.Name()) machineScope.SetFailureReason(capierrors.CreateMachineError) machineScope.SetFailureMessage(err) @@ -309,48 +307,17 @@ func (r *AzureMachineReconciler) reconcileNormal(ctx context.Context, machineSco } if reconcileError.IsTransient() { - machineScope.Error(err, "failed to reconcile AzureMachine", "name", machineScope.Name()) + machineScope.Error(err, "transient failure to reconcile AzureMachine, retrying", "name", machineScope.Name()) + machineScope.SetNotReady() return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil } - - return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachine") } - - r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "Error creating new AzureMachine", errors.Wrap(err, "failed to reconcile AzureMachine").Error()) + r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "ReconcileError", errors.Wrapf(err, "failed to reconcile AzureMachine").Error()) conditions.MarkFalse(machineScope.AzureMachine, infrav1.VMRunningCondition, infrav1.VMProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachine") } - switch machineScope.VMState() { - case infrav1.VMStateSucceeded: - machineScope.V(2).Info("VM is running", "id", machineScope.GetVMID()) - conditions.MarkTrue(machineScope.AzureMachine, infrav1.VMRunningCondition) - machineScope.SetReady() - case infrav1.VMStateCreating: - machineScope.V(2).Info("VM is creating", "id", machineScope.GetVMID()) - conditions.MarkFalse(machineScope.AzureMachine, infrav1.VMRunningCondition, infrav1.VMNCreatingReason, clusterv1.ConditionSeverityInfo, "") - machineScope.SetNotReady() - case infrav1.VMStateUpdating: - machineScope.V(2).Info("VM is updating", "id", machineScope.GetVMID()) - conditions.MarkFalse(machineScope.AzureMachine, infrav1.VMRunningCondition, infrav1.VMNUpdatingReason, clusterv1.ConditionSeverityInfo, "") - machineScope.SetNotReady() - case infrav1.VMStateDeleting: - machineScope.Info("Unexpected VM deletion", "id", machineScope.GetVMID()) - r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure VM deletion") - conditions.MarkFalse(machineScope.AzureMachine, infrav1.VMRunningCondition, infrav1.VMDDeletingReason, clusterv1.ConditionSeverityWarning, "") - machineScope.SetNotReady() - case infrav1.VMStateFailed: - machineScope.Error(errors.New("Failed to create or update VM"), "VM is in failed state", "id", machineScope.GetVMID()) - r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "FailedVMState", "Azure VM is in failed state") - machineScope.SetFailureReason(capierrors.UpdateMachineError) - machineScope.SetFailureMessage(errors.Errorf("Azure VM state is %s", machineScope.VMState())) - conditions.MarkFalse(machineScope.AzureMachine, infrav1.VMRunningCondition, infrav1.VMProvisionFailedReason, clusterv1.ConditionSeverityWarning, "") - machineScope.SetNotReady() - default: - machineScope.V(2).Info("VM state is undefined", "id", machineScope.GetVMID()) - conditions.MarkUnknown(machineScope.AzureMachine, infrav1.VMRunningCondition, "", "") - machineScope.SetNotReady() - } + machineScope.SetReady() return reconcile.Result{}, nil } diff --git a/exp/controllers/azuremachinepool_controller.go b/exp/controllers/azuremachinepool_controller.go index 6078d285be6..947b1b3a91e 100644 --- a/exp/controllers/azuremachinepool_controller.go +++ b/exp/controllers/azuremachinepool_controller.go @@ -20,6 +20,8 @@ import ( "context" "time" + "sigs.k8s.io/cluster-api/util/conditions" + "github.com/go-logr/logr" "github.com/pkg/errors" "go.opentelemetry.io/otel/api/trace" @@ -297,12 +299,15 @@ func (r *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machin switch machinePoolScope.ProvisioningState() { case infrav1.VMStateSucceeded: machinePoolScope.V(2).Info("Scale Set is running", "id", machinePoolScope.ProviderID()) + conditions.MarkTrue(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition) machinePoolScope.SetReady() case infrav1.VMStateCreating: machinePoolScope.V(2).Info("Scale Set is creating", "id", machinePoolScope.ProviderID()) + conditions.MarkFalse(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetCreatingReason, clusterv1.ConditionSeverityInfo, "") machinePoolScope.SetNotReady() case infrav1.VMStateUpdating: machinePoolScope.V(2).Info("Scale Set is updating", "id", machinePoolScope.ProviderID()) + conditions.MarkFalse(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetUpdatingReason, clusterv1.ConditionSeverityInfo, "") machinePoolScope.SetNotReady() // we may still be scaling up, so check back in a bit return reconcile.Result{ @@ -311,6 +316,7 @@ func (r *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machin case infrav1.VMStateDeleting: machinePoolScope.Info("Unexpected scale set deletion", "id", machinePoolScope.ProviderID()) r.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure scale set deletion") + conditions.MarkFalse(machinePoolScope.AzureMachinePool, infrav1.VMRunningCondition, infrav1.ScaleSetDeletingReason, clusterv1.ConditionSeverityWarning, "") machinePoolScope.SetNotReady() case infrav1.VMStateFailed: machinePoolScope.SetNotReady() @@ -318,14 +324,16 @@ func (r *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machin r.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "FailedVMState", "Azure scale set is in failed state") machinePoolScope.SetFailureReason(capierrors.UpdateMachineError) machinePoolScope.SetFailureMessage(errors.Errorf("Azure VM state is %s", machinePoolScope.ProvisioningState())) + conditions.MarkFalse(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetProvisionFailedReason, clusterv1.ConditionSeverityError, "") // If scale set failed provisioning, delete it so it can be recreated err := ams.Delete(ctx) if err != nil { - return reconcile.Result{}, errors.Wrap(err, "failed to delete VM in a failed state") + return reconcile.Result{}, errors.Wrap(err, "failed to delete scale set in a failed state") } - return reconcile.Result{}, errors.Wrap(err, "VM deleted, retry creating in next reconcile") + return reconcile.Result{}, errors.Wrap(err, "Scale set deleted, retry creating in next reconcile") default: machinePoolScope.SetNotReady() + conditions.MarkUnknown(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition, "", "") return reconcile.Result{}, nil } From 342ef15a71930eded27eb4bb5fa35f37bb33e40a Mon Sep 17 00:00:00 2001 From: Cecile Robert-Michon Date: Tue, 16 Mar 2021 16:30:01 -0700 Subject: [PATCH 2/3] :warning: Rename VMState to ProvisioningState --- api/v1alpha3/zz_generated.conversion.go | 4 +- api/v1alpha4/azuremachine_types.go | 2 +- api/v1alpha4/types.go | 40 ++++++++++--------- api/v1alpha4/zz_generated.deepcopy.go | 2 +- azure/converters/vm.go | 2 +- azure/converters/vmss.go | 4 +- azure/scope/machine.go | 22 +++++----- azure/scope/machinepool.go | 10 ++--- .../mock_scalesets/scalesets_mock.go | 2 +- azure/services/scalesets/scalesets.go | 6 +-- azure/services/scalesets/scalesets_test.go | 12 +++--- .../virtualmachines_mock.go | 2 +- .../virtualmachines/virtualmachines.go | 4 +- .../virtualmachines/virtualmachines_test.go | 2 +- .../mock_vmextensions/vmextensions_mock.go | 15 +++---- .../vmssextensions_mock.go | 15 +++---- controllers/azuremachine_controller.go | 2 +- exp/api/v1alpha3/zz_generated.conversion.go | 8 ++-- exp/api/v1alpha4/azuremachinepool_types.go | 4 +- exp/api/v1alpha4/types.go | 32 +++++++-------- exp/api/v1alpha4/zz_generated.deepcopy.go | 4 +- .../azuremachinepool_controller.go | 10 ++--- 22 files changed, 105 insertions(+), 99 deletions(-) diff --git a/api/v1alpha3/zz_generated.conversion.go b/api/v1alpha3/zz_generated.conversion.go index c2f419cf1b3..946d264b30d 100644 --- a/api/v1alpha3/zz_generated.conversion.go +++ b/api/v1alpha3/zz_generated.conversion.go @@ -879,7 +879,7 @@ func autoConvert_v1alpha4_AzureMachineSpec_To_v1alpha3_AzureMachineSpec(in *v1al func autoConvert_v1alpha3_AzureMachineStatus_To_v1alpha4_AzureMachineStatus(in *AzureMachineStatus, out *v1alpha4.AzureMachineStatus, s conversion.Scope) error { out.Ready = in.Ready out.Addresses = *(*[]v1.NodeAddress)(unsafe.Pointer(&in.Addresses)) - out.VMState = (*v1alpha4.VMState)(unsafe.Pointer(in.VMState)) + out.VMState = (*v1alpha4.ProvisioningState)(unsafe.Pointer(in.VMState)) out.FailureReason = (*errors.MachineStatusError)(unsafe.Pointer(in.FailureReason)) out.FailureMessage = (*string)(unsafe.Pointer(in.FailureMessage)) out.Conditions = *(*apiv1alpha4.Conditions)(unsafe.Pointer(&in.Conditions)) @@ -1583,7 +1583,7 @@ func autoConvert_v1alpha3_VM_To_v1alpha4_VM(in *VM, out *v1alpha4.VM, s conversi return err } out.StartupScript = in.StartupScript - out.State = v1alpha4.VMState(in.State) + out.State = v1alpha4.ProvisioningState(in.State) out.Identity = v1alpha4.VMIdentity(in.Identity) out.Tags = *(*v1alpha4.Tags)(unsafe.Pointer(&in.Tags)) out.Addresses = *(*[]v1.NodeAddress)(unsafe.Pointer(&in.Addresses)) diff --git a/api/v1alpha4/azuremachine_types.go b/api/v1alpha4/azuremachine_types.go index 5f55f1aa00d..aede34d1faf 100644 --- a/api/v1alpha4/azuremachine_types.go +++ b/api/v1alpha4/azuremachine_types.go @@ -134,7 +134,7 @@ type AzureMachineStatus struct { // VMState is the provisioning state of the Azure virtual machine. // +optional - VMState *VMState `json:"vmState,omitempty"` + VMState *ProvisioningState `json:"vmState,omitempty"` // ErrorReason will be set in the event that there is a terminal problem // reconciling the Machine and will contain a succinct value suitable diff --git a/api/v1alpha4/types.go b/api/v1alpha4/types.go index 89cbe3b3cc3..67456fb4c7e 100644 --- a/api/v1alpha4/types.go +++ b/api/v1alpha4/types.go @@ -203,24 +203,28 @@ type PublicIPSpec struct { } // VMState describes the state of an Azure virtual machine. +// DEPRECATED: use ProvisioningState type VMState string +// ProvisioningState describes the provisioning state of an Azure resource. +type ProvisioningState string + const ( - // VMStateCreating ... - VMStateCreating VMState = "Creating" - // VMStateDeleting ... - VMStateDeleting VMState = "Deleting" - // VMStateFailed ... - VMStateFailed VMState = "Failed" - // VMStateMigrating ... - VMStateMigrating VMState = "Migrating" - // VMStateSucceeded ... - VMStateSucceeded VMState = "Succeeded" - // VMStateUpdating ... - VMStateUpdating VMState = "Updating" - // VMStateDeleted represents a deleted VM - // NOTE: This state is specific to capz, and does not have corresponding mapping in Azure API (https://docs.microsoft.com/en-us/azure/virtual-machines/states-lifecycle#provisioning-states) - VMStateDeleted VMState = "Deleted" + // Creating ... + Creating ProvisioningState = "Creating" + // Deleting ... + Deleting ProvisioningState = "Deleting" + // Failed ... + Failed ProvisioningState = "Failed" + // Migrating ... + Migrating ProvisioningState = "Migrating" + // Succeeded ... + Succeeded ProvisioningState = "Succeeded" + // Updating ... + Updating ProvisioningState = "Updating" + // Deleted represents a deleted VM + // NOTE: This state is specific to capz, and does not have corresponding mapping in Azure API (https://docs.microsoft.com/en-us/azure/virtual-machines/states-billing#provisioning-states) + Deleted ProvisioningState = "Deleted" ) // VM describes an Azure virtual machine. @@ -235,9 +239,9 @@ type VM struct { OSDisk OSDisk `json:"osDisk,omitempty"` StartupScript string `json:"startupScript,omitempty"` // State - The provisioning state, which only appears in the response. - State VMState `json:"vmState,omitempty"` - Identity VMIdentity `json:"identity,omitempty"` - Tags Tags `json:"tags,omitempty"` + State ProvisioningState `json:"vmState,omitempty"` + Identity VMIdentity `json:"identity,omitempty"` + Tags Tags `json:"tags,omitempty"` // Addresses contains the addresses associated with the Azure VM. Addresses []corev1.NodeAddress `json:"addresses,omitempty"` diff --git a/api/v1alpha4/zz_generated.deepcopy.go b/api/v1alpha4/zz_generated.deepcopy.go index fbd70688885..bfe792d9f27 100644 --- a/api/v1alpha4/zz_generated.deepcopy.go +++ b/api/v1alpha4/zz_generated.deepcopy.go @@ -421,7 +421,7 @@ func (in *AzureMachineStatus) DeepCopyInto(out *AzureMachineStatus) { } if in.VMState != nil { in, out := &in.VMState, &out.VMState - *out = new(VMState) + *out = new(ProvisioningState) **out = **in } if in.FailureReason != nil { diff --git a/azure/converters/vm.go b/azure/converters/vm.go index 9c62a20c23f..49383da619e 100644 --- a/azure/converters/vm.go +++ b/azure/converters/vm.go @@ -27,7 +27,7 @@ func SDKToVM(v compute.VirtualMachine) (*infrav1.VM, error) { vm := &infrav1.VM{ ID: to.String(v.ID), Name: to.String(v.Name), - State: infrav1.VMState(to.String(v.ProvisioningState)), + State: infrav1.ProvisioningState(to.String(v.ProvisioningState)), } if v.VirtualMachineProperties != nil && v.VirtualMachineProperties.HardwareProfile != nil { diff --git a/azure/converters/vmss.go b/azure/converters/vmss.go index e7b499717ca..0780b6da957 100644 --- a/azure/converters/vmss.go +++ b/azure/converters/vmss.go @@ -29,7 +29,7 @@ func SDKToVMSS(sdkvmss compute.VirtualMachineScaleSet, sdkinstances []compute.Vi vmss := &infrav1exp.VMSS{ ID: to.String(sdkvmss.ID), Name: to.String(sdkvmss.Name), - State: infrav1.VMState(to.String(sdkvmss.ProvisioningState)), + State: infrav1.ProvisioningState(to.String(sdkvmss.ProvisioningState)), } if sdkvmss.Sku != nil { @@ -52,7 +52,7 @@ func SDKToVMSS(sdkvmss compute.VirtualMachineScaleSet, sdkinstances []compute.Vi ID: to.String(vm.ID), InstanceID: to.String(vm.InstanceID), Name: to.String(vm.OsProfile.ComputerName), - State: infrav1.VMState(to.String(vm.ProvisioningState)), + State: infrav1.ProvisioningState(to.String(vm.ProvisioningState)), } if vm.LatestModelApplied != nil { diff --git a/azure/scope/machine.go b/azure/scope/machine.go index b8d881a1b30..6de07e23ccb 100644 --- a/azure/scope/machine.go +++ b/azure/scope/machine.go @@ -354,7 +354,7 @@ func (m *MachineScope) SetProviderID(v string) { } // VMState returns the AzureMachine VM state. -func (m *MachineScope) VMState() infrav1.VMState { +func (m *MachineScope) VMState() infrav1.ProvisioningState { if m.AzureMachine.Status.VMState != nil { return *m.AzureMachine.Status.VMState } @@ -362,7 +362,7 @@ func (m *MachineScope) VMState() infrav1.VMState { } // SetVMState sets the AzureMachine VM state. -func (m *MachineScope) SetVMState(v infrav1.VMState) { +func (m *MachineScope) SetVMState(v infrav1.ProvisioningState) { m.AzureMachine.Status.VMState = &v } @@ -388,16 +388,16 @@ func (m *MachineScope) SetFailureReason(v capierrors.MachineStatusError) { // SetBootstrapConditions sets the AzureMachine BootstrapSucceeded condition based on the extension provisioning states. func (m *MachineScope) SetBootstrapConditions(provisioningState string, extensionName string) error { - switch infrav1.VMState(provisioningState) { - case infrav1.VMStateSucceeded: + switch infrav1.ProvisioningState(provisioningState) { + case infrav1.Succeeded: m.V(4).Info("extension provisioning state is succeeded", "vm extension", extensionName, "virtual machine", m.Name()) conditions.MarkTrue(m.AzureMachine, infrav1.BootstrapSucceededCondition) return nil - case infrav1.VMStateCreating: + case infrav1.Creating: m.V(4).Info("extension provisioning state is creating", "vm extension", extensionName, "virtual machine", m.Name()) conditions.MarkFalse(m.AzureMachine, infrav1.BootstrapSucceededCondition, infrav1.BootstrapInProgressReason, clusterv1.ConditionSeverityInfo, "") return azure.WithTransientError(errors.New("extension still provisioning"), 30*time.Second) - case infrav1.VMStateFailed: + case infrav1.Failed: m.V(4).Info("extension provisioning state is failed", "vm extension", extensionName, "virtual machine", m.Name()) conditions.MarkFalse(m.AzureMachine, infrav1.BootstrapSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityError, "") return azure.WithTerminalError(errors.New("extension state failed")) @@ -409,19 +409,19 @@ func (m *MachineScope) SetBootstrapConditions(provisioningState string, extensio // UpdateStatus updates the AzureMachine status. func (m *MachineScope) UpdateStatus() { switch m.VMState() { - case infrav1.VMStateSucceeded: + case infrav1.Succeeded: m.V(2).Info("VM is running", "id", m.GetVMID()) conditions.MarkTrue(m.AzureMachine, infrav1.VMRunningCondition) - case infrav1.VMStateCreating: + case infrav1.Creating: m.V(2).Info("VM is creating", "id", m.GetVMID()) conditions.MarkFalse(m.AzureMachine, infrav1.VMRunningCondition, infrav1.VMCreatingReason, clusterv1.ConditionSeverityInfo, "") - case infrav1.VMStateUpdating: + case infrav1.Updating: m.V(2).Info("VM is updating", "id", m.GetVMID()) conditions.MarkFalse(m.AzureMachine, infrav1.VMRunningCondition, infrav1.VMUpdatingReason, clusterv1.ConditionSeverityInfo, "") - case infrav1.VMStateDeleting: + case infrav1.Deleting: m.Info("Unexpected VM deletion", "id", m.GetVMID()) conditions.MarkFalse(m.AzureMachine, infrav1.VMRunningCondition, infrav1.VMDeletingReason, clusterv1.ConditionSeverityWarning, "") - case infrav1.VMStateFailed: + case infrav1.Failed: m.Error(errors.New("Failed to create or update VM"), "VM is in failed state", "id", m.GetVMID()) m.SetFailureReason(capierrors.UpdateMachineError) m.SetFailureMessage(errors.Errorf("Azure VM state is %s", m.VMState())) diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index f6cb5208c29..0e57b3bef73 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -152,7 +152,7 @@ func (m *MachinePoolScope) SetProviderID(v string) { } // ProvisioningState returns the AzureMachinePool provisioning state. -func (m *MachinePoolScope) ProvisioningState() infrav1.VMState { +func (m *MachinePoolScope) ProvisioningState() infrav1.ProvisioningState { if m.AzureMachinePool.Status.ProvisioningState != nil { return *m.AzureMachinePool.Status.ProvisioningState } @@ -229,14 +229,14 @@ func (m *MachinePoolScope) GetLongRunningOperationState() *infrav1.Future { } // SetProvisioningState sets the AzureMachinePool provisioning state. -func (m *MachinePoolScope) SetProvisioningState(v infrav1.VMState) { +func (m *MachinePoolScope) SetProvisioningState(v infrav1.ProvisioningState) { switch { - case v == infrav1.VMStateSucceeded && *m.MachinePool.Spec.Replicas == m.AzureMachinePool.Status.Replicas: + case v == infrav1.Succeeded && *m.MachinePool.Spec.Replicas == m.AzureMachinePool.Status.Replicas: // vmss is provisioned with enough ready replicas m.AzureMachinePool.Status.ProvisioningState = &v - case v == infrav1.VMStateSucceeded && *m.MachinePool.Spec.Replicas != m.AzureMachinePool.Status.Replicas: + case v == infrav1.Succeeded && *m.MachinePool.Spec.Replicas != m.AzureMachinePool.Status.Replicas: // not enough ready or too many ready replicas we must still be scaling up or down - updatingState := infrav1.VMStateUpdating + updatingState := infrav1.Updating m.AzureMachinePool.Status.ProvisioningState = &updatingState default: m.AzureMachinePool.Status.ProvisioningState = &v diff --git a/azure/services/scalesets/mock_scalesets/scalesets_mock.go b/azure/services/scalesets/mock_scalesets/scalesets_mock.go index 975035dbcb3..29f53d57ab9 100644 --- a/azure/services/scalesets/mock_scalesets/scalesets_mock.go +++ b/azure/services/scalesets/mock_scalesets/scalesets_mock.go @@ -378,7 +378,7 @@ func (mr *MockScaleSetScopeMockRecorder) SetProviderID(arg0 interface{}) *gomock } // SetProvisioningState mocks base method. -func (m *MockScaleSetScope) SetProvisioningState(arg0 v1alpha4.VMState) { +func (m *MockScaleSetScope) SetProvisioningState(arg0 v1alpha4.ProvisioningState) { m.ctrl.T.Helper() m.ctrl.Call(m, "SetProvisioningState", arg0) } diff --git a/azure/services/scalesets/scalesets.go b/azure/services/scalesets/scalesets.go index 2575a5cd837..f4f06676194 100644 --- a/azure/services/scalesets/scalesets.go +++ b/azure/services/scalesets/scalesets.go @@ -52,7 +52,7 @@ type ScaleSetScope interface { UpdateInstanceStatuses(context.Context, []infrav1exp.VMSSVM) error NeedsK8sVersionUpdate() bool SaveK8sVersion() - SetProvisioningState(infrav1.VMState) + SetProvisioningState(infrav1.ProvisioningState) SetLongRunningOperationState(*infrav1.Future) GetLongRunningOperationState() *infrav1.Future } @@ -179,7 +179,7 @@ func (s *Service) createVMSS(ctx context.Context) (*infrav1.Future, error) { vmss := result.VMSSWithoutHash vmss.Tags = converters.TagsToMap(result.Tags.AddSpecVersionHashTag(result.Hash)) - s.Scope.SetProvisioningState(infrav1.VMStateCreating) + s.Scope.SetProvisioningState(infrav1.Creating) future, err := s.Client.CreateOrUpdateAsync(ctx, s.Scope.ResourceGroup(), spec.Name, vmss) if err != nil { return future, errors.Wrap(err, "cannot create VMSS") @@ -234,7 +234,7 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *infrav1exp.V return future, errors.Wrap(err, "failed updating VMSS") } - s.Scope.SetProvisioningState(infrav1.VMStateUpdating) + s.Scope.SetProvisioningState(infrav1.Updating) s.Scope.SetLongRunningOperationState(future) s.Scope.V(2).Info("successfully started to update vmss", "scale set", spec.Name) return future, err diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index ab3f0a66de2..cf933e5e568 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -267,7 +267,7 @@ func TestReconcileVMSS(t *testing.T) { createdVMSS = setupDefaultVMSSInProgressOperationDoneExpectations(g, s, m, createdVMSS, instances) s.SetProviderID(fmt.Sprintf("azure://%s", *createdVMSS.ID)) s.SetLongRunningOperationState(nil) - s.SetProvisioningState(infrav1.VMStateSucceeded) + s.SetProvisioningState(infrav1.Succeeded) s.NeedsK8sVersionUpdate().Return(false) infraVMSS := converters.SDKToVMSS(createdVMSS, instances) s.UpdateInstanceStatuses(gomockinternal.AContext(), infraVMSS.Instances).Return(nil) @@ -287,7 +287,7 @@ func TestReconcileVMSS(t *testing.T) { instances := newDefaultInstances() vmss = setupDefaultVMSSInProgressOperationDoneExpectations(g, s, m, vmss, instances) s.SetProviderID(fmt.Sprintf("azure://%s", *vmss.ID)) - s.SetProvisioningState(infrav1.VMStateUpdating) + s.SetProvisioningState(infrav1.Updating) // create a VMSS patch with an updated hash to match the spec updatedVMSS := newDefaultVMSS() @@ -315,7 +315,7 @@ func TestReconcileVMSS(t *testing.T) { createdVMSS = setupDefaultVMSSInProgressOperationDoneExpectations(g, s, m, createdVMSS, instances) s.SetProviderID(fmt.Sprintf("azure://%s", *createdVMSS.ID)) s.SetLongRunningOperationState(nil) - s.SetProvisioningState(infrav1.VMStateSucceeded) + s.SetProvisioningState(infrav1.Succeeded) s.NeedsK8sVersionUpdate().Return(false) infraVMSS := converters.SDKToVMSS(createdVMSS, instances) s.UpdateInstanceStatuses(gomockinternal.AContext(), infraVMSS.Instances).Return(nil) @@ -1042,7 +1042,7 @@ func setHashOnVMSSUpdate(g *WithT, vmss compute.VirtualMachineScaleSet, update c func setupDefaultVMSSInProgressOperationDoneExpectations(g *WithT, s *mock_scalesets.MockScaleSetScopeMockRecorder, m *mock_scalesets.MockClientMockRecorder, createdVMSS compute.VirtualMachineScaleSet, instances []compute.VirtualMachineScaleSetVM) compute.VirtualMachineScaleSet { setHashOnVMSS(g, createdVMSS) createdVMSS.ID = to.StringPtr("vmss-id") - createdVMSS.ProvisioningState = to.StringPtr(string(infrav1.VMStateSucceeded)) + createdVMSS.ProvisioningState = to.StringPtr(string(infrav1.Succeeded)) setupDefaultVMSSExpectations(s) future := &infrav1.Future{ Type: PutFuture, @@ -1061,7 +1061,7 @@ func setupDefaultVMSSStartCreatingExpectations(s *mock_scalesets.MockScaleSetSco s.GetLongRunningOperationState().Return(nil) m.Get(gomockinternal.AContext(), defaultResourceGroup, defaultVMSSName). Return(compute.VirtualMachineScaleSet{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) - s.SetProvisioningState(infrav1.VMStateCreating) + s.SetProvisioningState(infrav1.Creating) } func setupCreatingSucceededExpectations(s *mock_scalesets.MockScaleSetScopeMockRecorder, m *mock_scalesets.MockClientMockRecorder, future *infrav1.Future) { @@ -1102,6 +1102,6 @@ func setupDefaultVMSSExpectations(s *mock_scalesets.MockScaleSetScopeMockRecorde func setupDefaultVMSSUpdateExpectations(s *mock_scalesets.MockScaleSetScopeMockRecorder) { setupDefaultVMSSExpectations(s) s.SetProviderID("azure://vmss-id") - s.SetProvisioningState(infrav1.VMStateUpdating) + s.SetProvisioningState(infrav1.Updating) s.GetLongRunningOperationState().Return(nil) } diff --git a/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go b/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go index ab01014b563..d3ef75cb1e4 100644 --- a/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go +++ b/azure/services/virtualmachines/mock_virtualmachines/virtualmachines_mock.go @@ -353,7 +353,7 @@ func (mr *MockVMScopeMockRecorder) SetProviderID(arg0 interface{}) *gomock.Call } // SetVMState mocks base method. -func (m *MockVMScope) SetVMState(arg0 v1alpha4.VMState) { +func (m *MockVMScope) SetVMState(arg0 v1alpha4.ProvisioningState) { m.ctrl.T.Helper() m.ctrl.Call(m, "SetVMState", arg0) } diff --git a/azure/services/virtualmachines/virtualmachines.go b/azure/services/virtualmachines/virtualmachines.go index 2a458e00132..8352ef8e5b2 100644 --- a/azure/services/virtualmachines/virtualmachines.go +++ b/azure/services/virtualmachines/virtualmachines.go @@ -51,7 +51,7 @@ type VMScope interface { AvailabilitySet() (string, bool) SetProviderID(string) SetAddresses([]corev1.NodeAddress) - SetVMState(infrav1.VMState) + SetVMState(infrav1.ProvisioningState) UpdateStatus() } @@ -88,7 +88,7 @@ func (s *Service) Reconcile(ctx context.Context) error { switch { // VM got deleted outside of capz case err != nil && azure.ResourceNotFound(err) && s.Scope.ProviderID() != "": - s.Scope.SetVMState(infrav1.VMStateDeleted) + s.Scope.SetVMState(infrav1.Deleted) return azure.VMDeletedError{ProviderID: s.Scope.ProviderID()} case err != nil && !azure.ResourceNotFound(err): return errors.Wrapf(err, "failed to get VM %s", vmSpec.Name) diff --git a/azure/services/virtualmachines/virtualmachines_test.go b/azure/services/virtualmachines/virtualmachines_test.go index 2359861a3e9..2801faaf5cd 100644 --- a/azure/services/virtualmachines/virtualmachines_test.go +++ b/azure/services/virtualmachines/virtualmachines_test.go @@ -1760,7 +1760,7 @@ func TestReconcileVM(t *testing.T) { s.ResourceGroup().AnyTimes().Return("my-rg") s.V(gomock.AssignableToTypeOf(2)).AnyTimes().Return(klogr.New()) s.ProviderID().Times(2).Return("ExistingVM-ProviderID") - s.SetVMState(infrav1.VMStateDeleted) + s.SetVMState(infrav1.Deleted) m.Get(gomockinternal.AContext(), "my-rg", "my-vm"). Return(compute.VirtualMachine{}, autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 404}, "Not found")) }, diff --git a/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go b/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go index 63528c613f0..906e5ca0823 100644 --- a/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go +++ b/azure/services/vmextensions/mock_vmextensions/vmextensions_mock.go @@ -28,7 +28,6 @@ import ( gomock "github.com/golang/mock/gomock" v1alpha4 "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha4" azure "sigs.k8s.io/cluster-api-provider-azure/azure" - v1alpha40 "sigs.k8s.io/cluster-api/api/v1alpha4" ) // MockVMExtensionScope is a mock of VMExtensionScope interface. @@ -256,16 +255,18 @@ func (mr *MockVMExtensionScopeMockRecorder) ResourceGroup() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResourceGroup", reflect.TypeOf((*MockVMExtensionScope)(nil).ResourceGroup)) } -// SetCondition mocks base method. -func (m *MockVMExtensionScope) SetCondition(arg0 v1alpha40.ConditionType, arg1 string, arg2 v1alpha40.ConditionSeverity, arg3 bool) { +// SetBootstrapConditions mocks base method. +func (m *MockVMExtensionScope) SetBootstrapConditions(arg0, arg1 string) error { m.ctrl.T.Helper() - m.ctrl.Call(m, "SetCondition", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "SetBootstrapConditions", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 } -// SetCondition indicates an expected call of SetCondition. -func (mr *MockVMExtensionScopeMockRecorder) SetCondition(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +// SetBootstrapConditions indicates an expected call of SetBootstrapConditions. +func (mr *MockVMExtensionScopeMockRecorder) SetBootstrapConditions(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetCondition", reflect.TypeOf((*MockVMExtensionScope)(nil).SetCondition), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetBootstrapConditions", reflect.TypeOf((*MockVMExtensionScope)(nil).SetBootstrapConditions), arg0, arg1) } // SubscriptionID mocks base method. diff --git a/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go b/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go index e7569294764..59be28ef684 100644 --- a/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go +++ b/azure/services/vmssextensions/mock_vmssextensions/vmssextensions_mock.go @@ -28,7 +28,6 @@ import ( gomock "github.com/golang/mock/gomock" v1alpha4 "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha4" azure "sigs.k8s.io/cluster-api-provider-azure/azure" - v1alpha40 "sigs.k8s.io/cluster-api/api/v1alpha4" ) // MockVMSSExtensionScope is a mock of VMSSExtensionScope interface. @@ -256,16 +255,18 @@ func (mr *MockVMSSExtensionScopeMockRecorder) ResourceGroup() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResourceGroup", reflect.TypeOf((*MockVMSSExtensionScope)(nil).ResourceGroup)) } -// SetCondition mocks base method. -func (m *MockVMSSExtensionScope) SetCondition(arg0 v1alpha40.ConditionType, arg1 string, arg2 v1alpha40.ConditionSeverity, arg3 bool) { +// SetBootstrapConditions mocks base method. +func (m *MockVMSSExtensionScope) SetBootstrapConditions(arg0, arg1 string) error { m.ctrl.T.Helper() - m.ctrl.Call(m, "SetCondition", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "SetBootstrapConditions", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 } -// SetCondition indicates an expected call of SetCondition. -func (mr *MockVMSSExtensionScopeMockRecorder) SetCondition(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +// SetBootstrapConditions indicates an expected call of SetBootstrapConditions. +func (mr *MockVMSSExtensionScopeMockRecorder) SetBootstrapConditions(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetCondition", reflect.TypeOf((*MockVMSSExtensionScope)(nil).SetCondition), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetBootstrapConditions", reflect.TypeOf((*MockVMSSExtensionScope)(nil).SetBootstrapConditions), arg0, arg1) } // SubscriptionID mocks base method. diff --git a/controllers/azuremachine_controller.go b/controllers/azuremachine_controller.go index 2c7309dee31..f88978d7d3a 100644 --- a/controllers/azuremachine_controller.go +++ b/controllers/azuremachine_controller.go @@ -302,7 +302,7 @@ func (r *AzureMachineReconciler) reconcileNormal(ctx context.Context, machineSco machineScope.SetFailureReason(capierrors.CreateMachineError) machineScope.SetFailureMessage(err) machineScope.SetNotReady() - machineScope.SetVMState(infrav1.VMStateFailed) + machineScope.SetVMState(infrav1.Failed) return reconcile.Result{}, nil } diff --git a/exp/api/v1alpha3/zz_generated.conversion.go b/exp/api/v1alpha3/zz_generated.conversion.go index 61fef30651c..5795209fa6a 100644 --- a/exp/api/v1alpha3/zz_generated.conversion.go +++ b/exp/api/v1alpha3/zz_generated.conversion.go @@ -327,7 +327,7 @@ func Convert_v1alpha4_AzureMachinePool_To_v1alpha3_AzureMachinePool(in *v1alpha4 func autoConvert_v1alpha3_AzureMachinePoolInstanceStatus_To_v1alpha4_AzureMachinePoolInstanceStatus(in *AzureMachinePoolInstanceStatus, out *v1alpha4.AzureMachinePoolInstanceStatus, s conversion.Scope) error { out.Version = in.Version - out.ProvisioningState = (*clusterapiproviderazureapiv1alpha4.VMState)(unsafe.Pointer(in.ProvisioningState)) + out.ProvisioningState = (*clusterapiproviderazureapiv1alpha4.ProvisioningState)(unsafe.Pointer(in.ProvisioningState)) out.ProviderID = in.ProviderID out.InstanceID = in.InstanceID out.InstanceName = in.InstanceName @@ -440,7 +440,7 @@ func autoConvert_v1alpha3_AzureMachinePoolStatus_To_v1alpha4_AzureMachinePoolSta out.Replicas = in.Replicas out.Instances = *(*[]*v1alpha4.AzureMachinePoolInstanceStatus)(unsafe.Pointer(&in.Instances)) out.Version = in.Version - out.ProvisioningState = (*clusterapiproviderazureapiv1alpha4.VMState)(unsafe.Pointer(in.ProvisioningState)) + out.ProvisioningState = (*clusterapiproviderazureapiv1alpha4.ProvisioningState)(unsafe.Pointer(in.ProvisioningState)) out.FailureReason = (*errors.MachineStatusError)(unsafe.Pointer(in.FailureReason)) out.FailureMessage = (*string)(unsafe.Pointer(in.FailureMessage)) out.Conditions = *(*apiv1alpha4.Conditions)(unsafe.Pointer(&in.Conditions)) @@ -958,7 +958,7 @@ func autoConvert_v1alpha3_VMSS_To_v1alpha4_VMSS(in *VMSS, out *v1alpha4.VMSS, s if err := Convert_v1alpha3_Image_To_v1alpha4_Image(&in.Image, &out.Image, s); err != nil { return err } - out.State = clusterapiproviderazureapiv1alpha4.VMState(in.State) + out.State = clusterapiproviderazureapiv1alpha4.ProvisioningState(in.State) out.Identity = clusterapiproviderazureapiv1alpha4.VMIdentity(in.Identity) out.Tags = *(*clusterapiproviderazureapiv1alpha4.Tags)(unsafe.Pointer(&in.Tags)) out.Instances = *(*[]v1alpha4.VMSSVM)(unsafe.Pointer(&in.Instances)) @@ -996,7 +996,7 @@ func autoConvert_v1alpha3_VMSSVM_To_v1alpha4_VMSSVM(in *VMSSVM, out *v1alpha4.VM out.InstanceID = in.InstanceID out.Name = in.Name out.AvailabilityZone = in.AvailabilityZone - out.State = clusterapiproviderazureapiv1alpha4.VMState(in.State) + out.State = clusterapiproviderazureapiv1alpha4.ProvisioningState(in.State) out.LatestModelApplied = in.LatestModelApplied return nil } diff --git a/exp/api/v1alpha4/azuremachinepool_types.go b/exp/api/v1alpha4/azuremachinepool_types.go index e8f8765d55d..6c81a1aabdb 100644 --- a/exp/api/v1alpha4/azuremachinepool_types.go +++ b/exp/api/v1alpha4/azuremachinepool_types.go @@ -133,7 +133,7 @@ type ( // ProvisioningState is the provisioning state of the Azure virtual machine. // +optional - ProvisioningState *infrav1.VMState `json:"provisioningState,omitempty"` + ProvisioningState *infrav1.ProvisioningState `json:"provisioningState,omitempty"` // FailureReason will be set in the event that there is a terminal problem // reconciling the MachinePool and will contain a succinct value suitable @@ -191,7 +191,7 @@ type ( // ProvisioningState is the provisioning state of the Azure virtual machine instance. // +optional - ProvisioningState *infrav1.VMState `json:"provisioningState"` + ProvisioningState *infrav1.ProvisioningState `json:"provisioningState"` // ProviderID is the provider identification of the VMSS Instance // +optional diff --git a/exp/api/v1alpha4/types.go b/exp/api/v1alpha4/types.go index 75cf9b95dd7..7b8ad770324 100644 --- a/exp/api/v1alpha4/types.go +++ b/exp/api/v1alpha4/types.go @@ -23,25 +23,25 @@ import ( type ( // VMSSVM defines a VM in a virtual machine scale set. VMSSVM struct { - ID string `json:"id,omitempty"` - InstanceID string `json:"instanceID,omitempty"` - Name string `json:"name,omitempty"` - AvailabilityZone string `json:"availabilityZone,omitempty"` - State infrav1.VMState `json:"vmState,omitempty"` - LatestModelApplied bool `json:"latestModelApplied,omitempty"` + ID string `json:"id,omitempty"` + InstanceID string `json:"instanceID,omitempty"` + Name string `json:"name,omitempty"` + AvailabilityZone string `json:"availabilityZone,omitempty"` + State infrav1.ProvisioningState `json:"vmState,omitempty"` + LatestModelApplied bool `json:"latestModelApplied,omitempty"` } // VMSS defines a virtual machine scale set. VMSS struct { - ID string `json:"id,omitempty"` - Name string `json:"name,omitempty"` - Sku string `json:"sku,omitempty"` - Capacity int64 `json:"capacity,omitempty"` - Zones []string `json:"zones,omitempty"` - Image infrav1.Image `json:"image,omitempty"` - State infrav1.VMState `json:"vmState,omitempty"` - Identity infrav1.VMIdentity `json:"identity,omitempty"` - Tags infrav1.Tags `json:"tags,omitempty"` - Instances []VMSSVM `json:"instances,omitempty"` + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Sku string `json:"sku,omitempty"` + Capacity int64 `json:"capacity,omitempty"` + Zones []string `json:"zones,omitempty"` + Image infrav1.Image `json:"image,omitempty"` + State infrav1.ProvisioningState `json:"vmState,omitempty"` + Identity infrav1.VMIdentity `json:"identity,omitempty"` + Tags infrav1.Tags `json:"tags,omitempty"` + Instances []VMSSVM `json:"instances,omitempty"` } ) diff --git a/exp/api/v1alpha4/zz_generated.deepcopy.go b/exp/api/v1alpha4/zz_generated.deepcopy.go index 0de5d6f9737..c54a3c368c4 100644 --- a/exp/api/v1alpha4/zz_generated.deepcopy.go +++ b/exp/api/v1alpha4/zz_generated.deepcopy.go @@ -59,7 +59,7 @@ func (in *AzureMachinePoolInstanceStatus) DeepCopyInto(out *AzureMachinePoolInst *out = *in if in.ProvisioningState != nil { in, out := &in.ProvisioningState, &out.ProvisioningState - *out = new(apiv1alpha4.VMState) + *out = new(apiv1alpha4.ProvisioningState) **out = **in } } @@ -155,7 +155,7 @@ func (in *AzureMachinePoolStatus) DeepCopyInto(out *AzureMachinePoolStatus) { } if in.ProvisioningState != nil { in, out := &in.ProvisioningState, &out.ProvisioningState - *out = new(apiv1alpha4.VMState) + *out = new(apiv1alpha4.ProvisioningState) **out = **in } if in.FailureReason != nil { diff --git a/exp/controllers/azuremachinepool_controller.go b/exp/controllers/azuremachinepool_controller.go index 947b1b3a91e..68deaffa9f9 100644 --- a/exp/controllers/azuremachinepool_controller.go +++ b/exp/controllers/azuremachinepool_controller.go @@ -297,15 +297,15 @@ func (r *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machin } switch machinePoolScope.ProvisioningState() { - case infrav1.VMStateSucceeded: + case infrav1.Succeeded: machinePoolScope.V(2).Info("Scale Set is running", "id", machinePoolScope.ProviderID()) conditions.MarkTrue(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition) machinePoolScope.SetReady() - case infrav1.VMStateCreating: + case infrav1.Creating: machinePoolScope.V(2).Info("Scale Set is creating", "id", machinePoolScope.ProviderID()) conditions.MarkFalse(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetCreatingReason, clusterv1.ConditionSeverityInfo, "") machinePoolScope.SetNotReady() - case infrav1.VMStateUpdating: + case infrav1.Updating: machinePoolScope.V(2).Info("Scale Set is updating", "id", machinePoolScope.ProviderID()) conditions.MarkFalse(machinePoolScope.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetUpdatingReason, clusterv1.ConditionSeverityInfo, "") machinePoolScope.SetNotReady() @@ -313,12 +313,12 @@ func (r *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machin return reconcile.Result{ RequeueAfter: 30 * time.Second, }, nil - case infrav1.VMStateDeleting: + case infrav1.Deleting: machinePoolScope.Info("Unexpected scale set deletion", "id", machinePoolScope.ProviderID()) r.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure scale set deletion") conditions.MarkFalse(machinePoolScope.AzureMachinePool, infrav1.VMRunningCondition, infrav1.ScaleSetDeletingReason, clusterv1.ConditionSeverityWarning, "") machinePoolScope.SetNotReady() - case infrav1.VMStateFailed: + case infrav1.Failed: machinePoolScope.SetNotReady() machinePoolScope.Error(errors.New("Failed to create or update scale set"), "Scale Set is in failed state", "id", machinePoolScope.ProviderID()) r.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "FailedVMState", "Azure scale set is in failed state") From 638c14f4f19fbbb665529b64c16c3677c237128e Mon Sep 17 00:00:00 2001 From: Cecile Robert-Michon Date: Thu, 18 Mar 2021 16:18:31 -0700 Subject: [PATCH 3/3] Update troubleshooting docs --- docs/troubleshooting.md | 152 +++++++++++++++++++++++++++++++++------- 1 file changed, 127 insertions(+), 25 deletions(-) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 65ab2cedbdc..5207549b982 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -1,57 +1,157 @@ # Troubleshooting Guide -Common issue users might have when using Cluster API Provider for Azure. +Common issues users might run into when using Cluster API Provider for Azure. This list is work-in-progress. Feel free to open a PR to add to it if you find that useful information is missing. -## Debugging cluster creation -You will need to review the logs for the components of the control plane nodes and for the workload clusters. Start your investigation with reviewing the logs of the control plane then move onto the workload cluster that is created. +## Examples of troubleshooting real-world issues -## Review logs of control plane -While cluster buildout is running, you can follow the controller logs in a separate window like this: +### No Azure resources are getting created + +This is likely due to missing or invalid Azure credentials. + +Check the CAPZ controller logs on the management cluster: ```bash -kubectl get po -o wide --all-namespaces -w # Watch pod creation until azure-provider-controller-manager-0 is available +kubectl logs deploy/capz-controller-manager -n capz-system manager +``` -kubectl logs -n capz-system azure-provider-controller-manager-0 manager -f # Follow the controller logs +If you see an error similar to this: + +``` +azure.BearerAuthorizer#WithAuthorization: Failed to refresh the Token for request to https://management.azure.com/subscriptions/123/providers/Microsoft.Compute/skus?%24filter=location+eq+%27eastus2%27&api-version=2019-04-01: StatusCode=401 -- Original Error: adal: Refresh request failed. Status Code = '401'. Response body: {\"error\":\"invalid_client\",\"error_description\":\"AADSTS7000215: Invalid client secret is provided. ``` -An error such as the following in the manager could point to a mismatch between a current CAPI and an old CAPZ version: +Make sure the provided Service Principal client ID and client secret are correct and that the password has not expired. + +### The AzureCluster infrastructure is provisioned but no virtual machines are coming up + +Your Azure subscription might have no quota for the requested VM size in the specified Azure location. + +Check the CAPZ controller logs on the management cluster: + +```bash +kubectl logs deploy/capz-controller-manager -n capz-system manager +``` +If you see an error similar to this: ``` -E0320 23:33:33.288073 1 controller.go:258] controller-runtime/controller "msg"="Reconciler error" "error"="failed to create AzureMachine VM: failed to create nic capz-cluster-control-plane-7z8ng-nic for machine capz-cluster-control-plane-7z8ng: unable to determine NAT rule for control plane network interface: strconv.Atoi: parsing \"capz-cluster-control-plane-7z8ng\": invalid syntax" "controller"="azuremachine" "request"={"Namespace":"default","Name":"capz-cluster-control-plane-7z8ng"} +"error"="failed to reconcile AzureMachine: failed to create virtual machine: failed to create VM capz-md-0-qkg6m in resource group capz-fkl3tp: compute.VirtualMachinesClient#CreateOrUpdate: Failure sending request: StatusCode=0 -- Original Error: autorest/azure: Service returned an error. Status=\u003cnil\u003e Code=\"OperationNotAllowed\" Message=\"Operation could not be completed as it results in exceeding approved standardDSv3Family Cores quota. ``` -### Remoting to workload clusters -After the workload cluster is finished deploying you will have a kubeconfig in `./kubeconfig`. +Follow the [these steps](https://docs.microsoft.com/en-us/azure/azure-resource-manager/templates/error-resource-quota). Alternatively, you can specify another Azure location and/or VM size during cluster creation. + +### A virtual machine is running but the k8s node did not join the cluster + +Check the AzureMachine (or AzureMachinePool if using a MachinePool) status: +```bash +kubectl get azuremachines -o wide +``` + +If you see an output like this: + +``` +NAME READY STATE +default-template-md-0-w78jt false Updating +``` + +This indicates that the bootstrap script has not yet succeeded. Check the AzureMachine `status.conditions` field for more information. + +[Take a look at the cloud-init logs](#checking-cloud-init-logs-ubuntu) for further debugging. + +### One or more control plane replicas are missing + +Take a look at the KubeadmControlPlane controller logs and look for any potential errors: + +```bash +kubectl logs deploy/capi-kubeadm-control-plane-controller-manager -n capi-kubeadm-control-plane-system manager +``` + +In addition, make sure all pods on the workload cluster are healthy, including pods in the `kube-system` namespace. + +### Nodes are in NotReady state + +Make sure you have installed a CNI on the workload cluster and that all the pods on the workload cluster are in running state. + +### Load Balancer service fails to come up + +Check the cloud-controller-manager logs on the workload cluster. + +If running the Azure cloud provider in-tree: + +``` +kubectl logs kube-controller-manager- -n kube-system +``` -Using the ssh information provided during cluster creation (environment variable `AZURE_SSH_PUBLIC_KEY_B64`), you can debug most issues by SSHing into the VMs that have been created: +If running the Azure cloud provider out-of-tree: ``` -# connect to first control node - capi is default linux user created by deployment -API_SERVER=$(kubectl get azurecluster capz-cluster -o jsonpath='{.status.network.apiServerIp.dnsName}') +kubectl logs cloud-controller-manager -n kube-system +``` + + +## Watching Kubernetes resources + +To watch progression of all Cluster API resources on the management cluster you can run: + +```bash +kubectl get cluster-api +``` + +## Looking at controller logs + +To check the CAPZ controller logs on the management cluster, run: + +```bash +kubectl logs deploy/capz-controller-manager -n capz-system manager +``` + +### Checking cloud-init logs (Ubuntu) + +Cloud-init logs can provide more information on any issues that happened when running the bootstrap script. + +#### Option 1: Using the Azure Portal + +Located in the virtual machine blade, the boot diagnostics option is under the Support and Troubleshooting section in the Azure portal. + +For more information, see [here](https://docs.microsoft.com/en-us/azure/virtual-machines/boot-diagnostics#boot-diagnostics-view) + +#### Option 2: Using the Azure CLI + +```bash +az vm boot-diagnostics get-boot-log --name MyVirtualMachine --resource-group MyResourceGroup +``` + +For more information, see [here](https://docs.microsoft.com/en-us/cli/azure/vm/boot-diagnostics?view=azure-cli-latest). + +#### Option 3: With SSH + +Using the ssh information provided during cluster creation (environment variable `AZURE_SSH_PUBLIC_KEY_B64`): + + +##### connect to first control node - capi is default linux user created by deployment +``` +API_SERVER=$(kubectl get azurecluster capz-cluster -o jsonpath='{.spec.controlPlaneEndpoint.host}') ssh capi@${API_SERVER} +``` -# list nodes +##### list nodes +``` kubectl get azuremachines NAME READY STATE capz-cluster-control-plane-2jprg true Succeeded capz-cluster-control-plane-ck5wv true Succeeded capz-cluster-control-plane-w4tv6 true Succeeded -capz-cluster-md-0-s52wb true Succeeded +capz-cluster-md-0-s52wb false Failed capz-cluster-md-0-w8xxw true Succeeded +``` -# pick node name from output above: +##### pick node name from output above: +``` node=$(kubectl get azuremachine capz-cluster-md-0-s52wb -o jsonpath='{.status.addresses[0].address}') ssh -J capi@${apiserver} capi@${node} ``` -> There are some [provided scripts](/hack/debugging/Readme.md) that can help automate a few common tasks. - -Reviewing the following logs on the workload cluster can help with troubleshooting: - -- `less /var/lib/waagent/custom-script/download/0/stdout` -- `journalctl -u cloud-final` -- `less /var/log/cloud-init-output.log` -- `journalctl -u kubelet` +##### look at cloud-init logs +`less /var/log/cloud-init-output.log` ## Automated log collection @@ -60,3 +160,5 @@ As part of [CI](../scripts/ci-e2e.sh) there is a [log collection script](hack/.. ```bash ./hack/log/log-dump.sh ``` + +There are also some [provided scripts](/hack/debugging/Readme.md) that can help automate a few common tasks.