From 0bfcee050cb075efba07e5d3951460a56ec2fd2b Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Wed, 6 Sep 2023 18:45:51 -0400 Subject: [PATCH 1/7] Block 4.14 upgrades with admin ack --- pkg/operator/starter.go | 3 +- pkg/operator/testlib/testlib.go | 46 +++++-- .../vspherecontroller/checks/api_interface.go | 11 ++ .../vspherecontroller/checks/check_error.go | 117 ++++++++++++++++-- .../checks/check_error_test.go | 90 ++++++++++++++ .../vspherecontroller/checks/check_nodes.go | 14 +++ .../checks/check_patched_vcenter.go | 58 +++++++++ .../vspherecontroller/vcsim_framework.go | 5 +- .../vsphere_environment_checker.go | 9 +- .../vsphere_environment_checker_test.go | 22 +++- .../vspherecontroller/vspherecontroller.go | 39 ++++++ .../vspherecontroller_test.go | 15 ++- 12 files changed, 394 insertions(+), 35 deletions(-) create mode 100644 pkg/operator/vspherecontroller/checks/check_error_test.go create mode 100644 pkg/operator/vspherecontroller/checks/check_patched_vcenter.go diff --git a/pkg/operator/starter.go b/pkg/operator/starter.go index 6cb7ede7e..72ef0b686 100644 --- a/pkg/operator/starter.go +++ b/pkg/operator/starter.go @@ -35,12 +35,13 @@ const ( operandName = "vmware-vsphere-csi-driver" secretName = "vmware-vsphere-cloud-credentials" envVMWareVsphereDriverSyncerImage = "VMWARE_VSPHERE_SYNCER_IMAGE" + managedConfigNamespace = "openshift-config-managed" ) func RunOperator(ctx context.Context, controllerConfig *controllercmd.ControllerContext) error { // Create core clientset and informers kubeClient := kubeclient.NewForConfigOrDie(rest.AddUserAgent(controllerConfig.KubeConfig, operatorName)) - kubeInformersForNamespaces := v1helpers.NewKubeInformersForNamespaces(kubeClient, utils.DefaultNamespace, cloudConfigNamespace, "") + kubeInformersForNamespaces := v1helpers.NewKubeInformersForNamespaces(kubeClient, utils.DefaultNamespace, cloudConfigNamespace, managedConfigNamespace, "") secretInformer := kubeInformersForNamespaces.InformersFor(utils.DefaultNamespace).Core().V1().Secrets() configMapInformer := kubeInformersForNamespaces.InformersFor(utils.DefaultNamespace).Core().V1().ConfigMaps() nodeInformer := kubeInformersForNamespaces.InformersFor("").Core().V1().Nodes() diff --git a/pkg/operator/testlib/testlib.go b/pkg/operator/testlib/testlib.go index 5bf3e0398..5396c2475 100644 --- a/pkg/operator/testlib/testlib.go +++ b/pkg/operator/testlib/testlib.go @@ -12,7 +12,6 @@ import ( operatorinformers "github.com/openshift/client-go/operator/informers/externalversions" "github.com/openshift/library-go/pkg/operator/v1helpers" "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/utils" - "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/vspherecontroller/checks" "gopkg.in/gcfg.v1" v1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" @@ -149,6 +148,12 @@ func AddInitialObjects(objects []runtime.Object, clients *utils.APIClient) error case *opv1.ClusterCSIDriver: clusterCSIDriverInformer := clients.ClusterCSIDriverInformer.Informer() clusterCSIDriverInformer.GetStore().Add(obj) + case *opv1.Storage: + storageInformer := clients.OCPOperatorInformers.Operator().V1().Storages().Informer() + storageInformer.GetStore().Add(obj) + case *v1.PersistentVolume: + pvInformer := clients.KubeInformers.InformersFor("").Core().V1().PersistentVolumes().Informer() + pvInformer.GetStore().Add(obj) default: return fmt.Errorf("Unknown initalObject type: %+v", obj) } @@ -219,6 +224,38 @@ func GetCSIDriver(withOCPAnnotation bool) *storagev1.CSIDriver { return driver } +func GetIntreePV(pvName string) *v1.PersistentVolume { + return &v1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvName, + }, + Spec: v1.PersistentVolumeSpec{ + PersistentVolumeSource: v1.PersistentVolumeSource{ + VsphereVolume: &v1.VsphereVirtualDiskVolumeSource{ + VolumePath: "foobar/baz.vmdk", + }, + }, + }, + } +} + +func GetNodeWithInlinePV(nodeName string, hasIntreePV bool) *v1.Node { + node := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{}, + } + + if hasIntreePV { + node.Status.VolumesInUse = []v1.UniqueVolumeName{ + "kubernetes.io/vsphere-volume/foobar", + } + } + return node +} + func GetCSINode() *storagev1.CSINode { return &storagev1.CSINode{ ObjectMeta: metav1.ObjectMeta{ @@ -295,10 +332,3 @@ func GetSecret() *v1.Secret { }, } } - -func GetTestClusterResult(statusType checks.CheckStatusType) checks.ClusterCheckResult { - return checks.ClusterCheckResult{ - CheckError: fmt.Errorf("some error"), - CheckStatus: statusType, - } -} diff --git a/pkg/operator/vspherecontroller/checks/api_interface.go b/pkg/operator/vspherecontroller/checks/api_interface.go index 37d616456..7cec0f10b 100644 --- a/pkg/operator/vspherecontroller/checks/api_interface.go +++ b/pkg/operator/vspherecontroller/checks/api_interface.go @@ -14,6 +14,7 @@ type KubeAPIInterface interface { ListNodes() ([]*v1.Node, error) GetCSIDriver(name string) (*storagev1.CSIDriver, error) ListCSINodes() ([]*storagev1.CSINode, error) + ListPersistentVolumes() ([]*v1.PersistentVolume, error) GetStorageClass(name string) (*storagev1.StorageClass, error) GetInfrastructure() *ocpv1.Infrastructure } @@ -24,6 +25,8 @@ type KubeAPIInterfaceImpl struct { CSINodeLister storagelister.CSINodeLister CSIDriverLister storagelister.CSIDriverLister StorageClassLister storagelister.StorageClassLister + PvLister corelister.PersistentVolumeLister + StorageLister oplister.StorageLister } func (k *KubeAPIInterfaceImpl) ListNodes() ([]*v1.Node, error) { @@ -45,3 +48,11 @@ func (k *KubeAPIInterfaceImpl) GetStorageClass(name string) (*storagev1.StorageC func (k *KubeAPIInterfaceImpl) GetInfrastructure() *ocpv1.Infrastructure { return k.Infrastructure } + +func (k *KubeAPIInterfaceImpl) GetStorage(name string) (*operatorv1.Storage, error) { + return k.StorageLister.Get(name) +} + +func (k *KubeAPIInterfaceImpl) ListPersistentVolumes() ([]*v1.PersistentVolume, error) { + return k.PvLister.List(labels.Everything()) +} diff --git a/pkg/operator/vspherecontroller/checks/check_error.go b/pkg/operator/vspherecontroller/checks/check_error.go index b566ad63f..5ad6cc248 100644 --- a/pkg/operator/vspherecontroller/checks/check_error.go +++ b/pkg/operator/vspherecontroller/checks/check_error.go @@ -2,12 +2,16 @@ package checks import ( "fmt" + "strings" + v1 "github.com/openshift/api/operator/v1" "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/utils" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/klog/v2" ) +// CheckStatusType stores exact error that was observed during performing various checks type CheckStatusType string const ( @@ -18,27 +22,20 @@ const ( CheckStatusDeprecatedVCenter CheckStatusType = "check_deprecated_vcenter" CheckStatusDeprecatedHWVersion CheckStatusType = "check_deprecated_hw_version" CheckStatusDeprecatedESXIVersion CheckStatusType = "check_deprecated_esxi_version" + CheckStatusBuggyMigrationPlatform CheckStatusType = "buggy_vsphere_migration_version" CheckStatusVcenterAPIError CheckStatusType = "vcenter_api_error" CheckStatusGenericError CheckStatusType = "generic_error" ) -type ClusterCheckStatus string - -const ( - ClusterCheckAllGood ClusterCheckStatus = "pass" - ClusterCheckBlockUpgradeDriverInstall ClusterCheckStatus = "installation_blocked" - ClusterCheckBlockUpgrade ClusterCheckStatus = "upgrades_blocked" - ClusterCheckUpgradeStateUnknown ClusterCheckStatus = "upgrades_unknown" - ClusterCheckDegrade ClusterCheckStatus = "degraded" -) - +// CheckAction stores what a single failing check would do. type CheckAction int // Ordered by severity, Pass must be 0 (for struct initialization). const ( CheckActionPass = iota + CheckActionRequiresAdminAck // blocks upgrades via admin-ack CheckActionBlockUpgrade // Only block upgrade - CheckActionBlockUpgradeDriverInstall // Block voth upgrade and driver install + CheckActionBlockUpgradeDriverInstall // Block both upgrade and driver install CheckActionBlockUpgradeOrDegrade // Degrade if the driver is installed, block upgrade otherwise CheckActionDegrade ) @@ -55,11 +52,30 @@ func ActionToString(a CheckAction) string { return "BlockUpgradeOrDegrade" case CheckActionDegrade: return "Degrade" + case CheckActionRequiresAdminAck: + return "UpgradeRequiresAdminAck" default: return "Unknown" } } +// ClusterCheckStatus stores what is the status of overall cluster after checking everything and then applying +// additional logic which may not be included in checks themselves. +type ClusterCheckStatus string + +const ( + ClusterCheckAllGood ClusterCheckStatus = "pass" + ClusterCheckBlockUpgradeDriverInstall ClusterCheckStatus = "installation_blocked" + ClusterCheckBlockUpgrade ClusterCheckStatus = "upgrades_blocked" + ClusterCheckUpgradeStateUnknown ClusterCheckStatus = "upgrades_unknown" + ClusterCheckUpgradesBlockedViaAdminAck ClusterCheckStatus = "upgrades_blocked_via_admin_ack" + ClusterCheckDegrade ClusterCheckStatus = "degraded" +) + +const ( + inTreePluginName = "kubernetes.io/vsphere-volume" +) + type ClusterCheckResult struct { CheckError error CheckStatus CheckStatusType @@ -98,6 +114,16 @@ func makeDeprecatedEnvironmentError(statusType CheckStatusType, reason error) Cl return checkResult } +func makeBuggyEnvironmentError(statusType CheckStatusType, reason error) ClusterCheckResult { + checkResult := ClusterCheckResult{ + CheckStatus: statusType, + CheckError: reason, + Action: CheckActionRequiresAdminAck, + Reason: reason.Error(), + } + return checkResult +} + func MakeGenericVCenterAPIError(reason error) ClusterCheckResult { return ClusterCheckResult{ CheckStatus: CheckStatusVcenterAPIError, @@ -125,6 +151,8 @@ func MakeClusterUnupgradeableError(checkStatus CheckStatusType, reason error) Cl } } +// CheckClusterStatus uses results from all the checks we ran and applies additional logic to determine +// overall CheckClusterStatus func CheckClusterStatus(result ClusterCheckResult, apiDependencies KubeAPIInterface) (ClusterCheckStatus, ClusterCheckResult) { switch result.Action { case CheckActionDegrade: @@ -162,11 +190,74 @@ func CheckClusterStatus(result ClusterCheckResult, apiDependencies KubeAPIInterf case CheckActionBlockUpgrade: return ClusterCheckBlockUpgrade, result - + case CheckActionRequiresAdminAck: + return checkForIntreePluginUse(result, apiDependencies) case CheckActionBlockUpgradeDriverInstall: return ClusterCheckBlockUpgradeDriverInstall, result - default: return ClusterCheckAllGood, result } } + +// returns false is migration is enabled in the cluster. +// returns true if migration is not enabled in the cluster and cluster is using +// in-tree vSphere volumes. +func checkForIntreePluginUse(result ClusterCheckResult, apiDependencies KubeAPIInterface) (ClusterCheckStatus, ClusterCheckResult) { + storageCR, err := apiDependencies.GetStorage(storageOperatorName) + if err != nil { + reason := fmt.Errorf("vsphere csi driver installed failed with %s, unable to verify storage status: %v", result.Reason, err) + return ClusterCheckDegrade, MakeClusterDegradedError(CheckStatusOpenshiftAPIError, reason) + } + + klog.Infof("Checking for intree plugin use") + + allGoodCheckResult := MakeClusterCheckResultPass() + + // migration is enabled and hence we should be fine + driverName := storageCR.Spec.VSphereStorageDriver + if driverName == v1.CSIWithMigrationDriver { + return ClusterCheckAllGood, allGoodCheckResult + } + + pvs, err := apiDependencies.ListPersistentVolumes() + if err != nil { + reason := fmt.Errorf("vsphere csi driver installed failed with %s, unable to list pvs: %v", result.Reason, err) + return ClusterCheckDegrade, MakeClusterDegradedError(CheckStatusOpenshiftAPIError, reason) + } + + usingvSphereVolumes := false + + for _, pv := range pvs { + if pv.Spec.VsphereVolume != nil { + usingvSphereVolumes = true + break + } + } + + if usingvSphereVolumes { + return ClusterCheckUpgradesBlockedViaAdminAck, allGoodCheckResult + } + + nodes, err := apiDependencies.ListNodes() + if err != nil { + reason := fmt.Errorf("csi driver installed failed with %s, unable to list pvs: %v", result.Reason, err) + return ClusterCheckDegrade, MakeClusterDegradedError(CheckStatusOpenshiftAPIError, reason) + } + + if checkInlineIntreeVolumeUse(nodes) { + return ClusterCheckUpgradesBlockedViaAdminAck, allGoodCheckResult + } + return ClusterCheckAllGood, allGoodCheckResult +} + +func checkInlineIntreeVolumeUse(nodes []*corev1.Node) bool { + for _, node := range nodes { + volumesInUse := node.Status.VolumesInUse + for _, volume := range volumesInUse { + if strings.HasPrefix(string(volume), inTreePluginName) { + return true + } + } + } + return false +} diff --git a/pkg/operator/vspherecontroller/checks/check_error_test.go b/pkg/operator/vspherecontroller/checks/check_error_test.go new file mode 100644 index 000000000..de3d5e4a6 --- /dev/null +++ b/pkg/operator/vspherecontroller/checks/check_error_test.go @@ -0,0 +1,90 @@ +package checks + +import ( + "fmt" + "testing" + + opv1 "github.com/openshift/api/operator/v1" + "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/testlib" + "k8s.io/apimachinery/pkg/runtime" +) + +func TestCheckForIntreePluginUse(t *testing.T) { + tests := []struct { + name string + initialObjects []runtime.Object + configObjects runtime.Object + storageCR *opv1.Storage + clusterCSIDriverObject *testlib.FakeDriverInstance + expectedBlockResult ClusterCheckStatus + }{ + { + name: "when csi migration is enabled", + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + storageCR: testlib.GetStorageOperator(opv1.CSIWithMigrationDriver), + initialObjects: []runtime.Object{}, + configObjects: runtime.Object(testlib.GetInfraObject()), + expectedBlockResult: ClusterCheckAllGood, + }, + { + name: "when csi migration is not enabled, but intree pvs exist", + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + storageCR: testlib.GetStorageOperator(""), + initialObjects: []runtime.Object{testlib.GetIntreePV("intree-pv"), testlib.GetNodeWithInlinePV("foobar", false /*hasinline volume*/)}, + configObjects: runtime.Object(testlib.GetInfraObject()), + expectedBlockResult: ClusterCheckUpgradesBlockedViaAdminAck, + }, + { + name: "when csi migration is not enabled and inline volumes exist", + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + storageCR: testlib.GetStorageOperator(""), + initialObjects: []runtime.Object{testlib.GetNodeWithInlinePV("foobar", true /*hasinline volume*/)}, + configObjects: runtime.Object(testlib.GetInfraObject()), + expectedBlockResult: ClusterCheckUpgradesBlockedViaAdminAck, + }, + { + name: "when csi migration is not enabled but no intree volumes exist", + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + storageCR: testlib.GetStorageOperator(""), + initialObjects: []runtime.Object{}, + configObjects: runtime.Object(testlib.GetInfraObject()), + expectedBlockResult: ClusterCheckAllGood, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + commonApiClient := testlib.NewFakeClients(test.initialObjects, test.clusterCSIDriverObject, test.configObjects) + clusterCSIDriver := testlib.GetClusterCSIDriver(false) + testlib.AddClusterCSIDriverClient(commonApiClient, clusterCSIDriver) + + test.initialObjects = append(test.initialObjects, clusterCSIDriver) + test.initialObjects = append(test.initialObjects, test.storageCR) + + stopCh := make(chan struct{}) + defer close(stopCh) + + testlib.StartFakeInformer(commonApiClient, stopCh) + if err := testlib.AddInitialObjects(test.initialObjects, commonApiClient); err != nil { + t.Fatalf("error adding initial objects: %v", err) + } + + testlib.WaitForSync(commonApiClient, stopCh) + checkApiClient := KubeAPIInterfaceImpl{ + Infrastructure: testlib.GetInfraObject(), + CSINodeLister: commonApiClient.KubeInformers.InformersFor("").Storage().V1().CSINodes().Lister(), + CSIDriverLister: commonApiClient.KubeInformers.InformersFor("").Storage().V1().CSIDrivers().Lister(), + NodeLister: commonApiClient.NodeInformer.Lister(), + StorageLister: commonApiClient.OCPOperatorInformers.Operator().V1().Storages().Lister(), + PvLister: commonApiClient.KubeInformers.InformersFor("").Core().V1().PersistentVolumes().Lister(), + } + + overallClusterStatus, _ := checkForIntreePluginUse(makeBuggyEnvironmentError(CheckStatusBuggyMigrationPlatform, fmt.Errorf("version is older")), &checkApiClient) + if overallClusterStatus != test.expectedBlockResult { + t.Errorf("expected %v, got %v", test.expectedBlockResult, overallClusterStatus) + } + }) + } + +} diff --git a/pkg/operator/vspherecontroller/checks/check_nodes.go b/pkg/operator/vspherecontroller/checks/check_nodes.go index 6a2fc65c1..0f3d7d47a 100644 --- a/pkg/operator/vspherecontroller/checks/check_nodes.go +++ b/pkg/operator/vspherecontroller/checks/check_nodes.go @@ -169,6 +169,20 @@ func (n *NodeChecker) checkOnNode(workInfo nodeChannelWorkData) ClusterCheckResu return MakeClusterUnupgradeableError(CheckStatusDeprecatedESXIVersion, reason) } + esxiBuildNumber := hostSystem.Config.Product.Build + + klog.Infof("checking for patched version of ESXi for CSI migration: %s-%s", hostAPIVersion, esxiBuildNumber) + + meetsMigrationPatchVersionRequirement, err := checkForMinimumPatchedVersion(hostAPIVersion, esxiBuildNumber) + if err != nil { + klog.Errorf("error parsing host version for node %s and host %s: %v", node.Name, hostName, err) + } + + if !meetsMigrationPatchVersionRequirement { + reason := fmt.Errorf("host %s is on ESXI version %s-%s, which is below minimum required version %s-%d for csi migration", hostName, hostAPIVersion, esxiBuildNumber, minPatchedVersion, minBuildNumber) + return makeBuggyEnvironmentError(CheckStatusBuggyMigrationPlatform, reason) + } + return MakeClusterCheckResultPass() } diff --git a/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go new file mode 100644 index 000000000..3813a5bd8 --- /dev/null +++ b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go @@ -0,0 +1,58 @@ +package checks + +import ( + "context" + "fmt" + "strconv" + + "k8s.io/klog/v2" +) + +const ( + minPatchedVersion = "7.0.3" + minBuildNumber = 21424296 +) + +type PatchedVcenterChecker struct{} + +var _ CheckInterface = &PatchedVcenterChecker{} + +func (v *PatchedVcenterChecker) Check(ctx context.Context, checkOpts CheckArgs) []ClusterCheckResult { + vmClient := checkOpts.vmConnection.Client + vcenterAPIVersion := vmClient.ServiceContent.About.ApiVersion + buildVersion := vmClient.ServiceContent.About.Build + + klog.Infof("checking for patched version of vSphere for CSI migration: %s-%s", vcenterAPIVersion, buildVersion) + + hasMin, err := checkForMinimumPatchedVersion(vcenterAPIVersion, buildVersion) + if err != nil { + return []ClusterCheckResult{makeDeprecatedEnvironmentError(CheckStatusVcenterAPIError, err)} + } + + if !hasMin { + reason := fmt.Errorf("found version of vCenter which has bugs related to CSI migration. Minimum required, version: %s, build: %d", minPatchedVersion, minBuildNumber) + return []ClusterCheckResult{makeBuggyEnvironmentError(CheckStatusBuggyMigrationPlatform, reason)} + } + return []ClusterCheckResult{} +} + +func checkForMinimumPatchedVersion(vCenterVersion string, build string) (bool, error) { + hasMinimumApiVersion, err := isMinimumVersion(minPatchedVersion, vCenterVersion) + if err != nil { + return true, err + } + if !hasMinimumApiVersion { + return false, nil + } + + buildNumber, err := strconv.Atoi(build) + if err != nil { + return true, fmt.Errorf("error converting build number %s to integer", build) + } + + if buildNumber >= minBuildNumber { + return true, nil + } + + return false, nil +} diff --git a/pkg/operator/vspherecontroller/vcsim_framework.go b/pkg/operator/vspherecontroller/vcsim_framework.go index 47b1bf8d7..aabce9ae6 100644 --- a/pkg/operator/vspherecontroller/vcsim_framework.go +++ b/pkg/operator/vspherecontroller/vcsim_framework.go @@ -121,9 +121,12 @@ datacenters = "DC0" return &cfg } -func customizeVCenterVersion(version string, apiVersion string, conn *vclib.VSphereConnection) { +func CustomizeVCenterVersion(version, apiVersion, build string, conn *vclib.VSphereConnection) { conn.Client.Client.ServiceContent.About.Version = version conn.Client.Client.ServiceContent.About.ApiVersion = apiVersion + if build != "" { + conn.Client.Client.ServiceContent.About.Build = build + } fmt.Printf("customize vcenter version") } diff --git a/pkg/operator/vspherecontroller/vsphere_environment_checker.go b/pkg/operator/vspherecontroller/vsphere_environment_checker.go index a7adb7fda..2d6878105 100644 --- a/pkg/operator/vspherecontroller/vsphere_environment_checker.go +++ b/pkg/operator/vspherecontroller/vsphere_environment_checker.go @@ -45,6 +45,7 @@ func newVSphereEnvironmentChecker() *vSphereEnvironmentCheckerComposite { &checks.CheckExistingDriver{}, &checks.VCenterChecker{}, &checks.NodeChecker{}, + &checks.PatchedVcenterChecker{}, } return checker } @@ -67,9 +68,7 @@ func (v *vSphereEnvironmentCheckerComposite) Check( allChecks = append(allChecks, result...) } - overallResult := checks.ClusterCheckResult{ - Action: checks.CheckActionPass, - } + overallResult := checks.MakeClusterCheckResultPass() // following checks can either block cluster upgrades or degrade the cluster // the severity of degradation is higher than blocking upgrades @@ -80,7 +79,7 @@ func (v *vSphereEnvironmentCheckerComposite) Check( } } - if overallResult.Action > checks.CheckActionPass { + if overallResult.Action > checks.CheckActionRequiresAdminAck { // Everything else than pass needs a quicker re-check klog.Warningf("Overall check result: %s: %s", checks.ActionToString(overallResult.Action), overallResult.Reason) v.nextCheck = v.lastCheck.Add(nextErrorDelay) @@ -91,5 +90,5 @@ func (v *vSphereEnvironmentCheckerComposite) Check( klog.V(2).Infof("Overall check result: %s: %s", checks.ActionToString(overallResult.Action), overallResult.Reason) v.backoff = defaultBackoff v.nextCheck = v.lastCheck.Add(defaultBackoff.Cap) - return defaultBackoff.Cap, checks.MakeClusterCheckResultPass(), true + return defaultBackoff.Cap, overallResult, true } diff --git a/pkg/operator/vspherecontroller/vsphere_environment_checker_test.go b/pkg/operator/vspherecontroller/vsphere_environment_checker_test.go index cb3bd7dc2..c1b7e00ed 100644 --- a/pkg/operator/vspherecontroller/vsphere_environment_checker_test.go +++ b/pkg/operator/vspherecontroller/vsphere_environment_checker_test.go @@ -2,10 +2,11 @@ package vspherecontroller import ( "context" - "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/testlib" "testing" "time" + "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/testlib" + "k8s.io/apimachinery/pkg/runtime" "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/vspherecontroller/checks" @@ -15,6 +16,7 @@ func TestEnvironmentCheck(t *testing.T) { tests := []struct { name string vcenterVersion string + build string checksRan bool result checks.CheckStatusType initialObjects []runtime.Object @@ -29,7 +31,8 @@ func TestEnvironmentCheck(t *testing.T) { initialObjects: []runtime.Object{testlib.GetConfigMap(), testlib.GetSecret()}, configObjects: runtime.Object(testlib.GetInfraObject()), clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), - vcenterVersion: "7.0.2", + vcenterVersion: "7.0.3", + build: "21424296", result: checks.CheckStatusPass, checksRan: true, // should reset the steps back to maximum in defaultBackoff @@ -37,6 +40,19 @@ func TestEnvironmentCheck(t *testing.T) { expectedNextCheck: time.Now().Add(defaultBackoff.Cap), runCount: 1, }, + { + name: "when tests are ran on a unpatched version of vSphere", + initialObjects: []runtime.Object{testlib.GetConfigMap(), testlib.GetSecret()}, + configObjects: runtime.Object(testlib.GetInfraObject()), + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + vcenterVersion: "7.0.2", + result: checks.CheckStatusBuggyMigrationPlatform, + checksRan: true, + // should reset the steps back to maximum in defaultBackoff + expectedBackOffSteps: defaultBackoff.Steps, + expectedNextCheck: time.Now().Add(defaultBackoff.Cap), + runCount: 1, + }, { name: "when tests fail, delay should backoff exponentially", initialObjects: []runtime.Object{testlib.GetConfigMap(), testlib.GetSecret()}, @@ -79,7 +95,7 @@ func TestEnvironmentCheck(t *testing.T) { time.Sleep(5 * time.Second) if test.vcenterVersion != "" { - customizeVCenterVersion(test.vcenterVersion, test.vcenterVersion, conn) + CustomizeVCenterVersion(test.vcenterVersion, test.vcenterVersion, test.build, conn) } csiDriverLister := commonApiClient.KubeInformers.InformersFor("").Storage().V1().CSIDrivers().Lister() csiNodeLister := commonApiClient.KubeInformers.InformersFor("").Storage().V1().CSINodes().Lister() diff --git a/pkg/operator/vspherecontroller/vspherecontroller.go b/pkg/operator/vspherecontroller/vspherecontroller.go index c52122d2d..6587c781c 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller.go +++ b/pkg/operator/vspherecontroller/vspherecontroller.go @@ -45,11 +45,13 @@ type VSphereController struct { kubeClient kubernetes.Interface operatorClient v1helpers.OperatorClientWithFinalizers configMapLister corelister.ConfigMapLister + managedConfigMapLister corelister.ConfigMapLister secretLister corelister.SecretLister scLister storagelister.StorageClassLister clusterCSIDriverLister clustercsidriverlister.ClusterCSIDriverLister infraLister infralister.InfrastructureLister nodeLister corelister.NodeLister + pvLister corelister.PersistentVolumeLister csiDriverLister storagelister.CSIDriverLister csiNodeLister storagelister.CSINodeLister apiClients utils.APIClient @@ -74,6 +76,10 @@ const ( envVMWareVsphereDriverSyncerImage = "VMWARE_VSPHERE_SYNCER_IMAGE" storageClassControllerName = "VMwareVSphereDriverStorageClassController" storageClassName = "thin-csi" + + managedConfigNamespace = "openshift-config-managed" + adminGateConfigMap = "admin-gates" + migrationAck413 = "ack-4.13-kube-127-vsphere-migration-in-4.14" ) type conditionalControllerInterface interface { @@ -95,12 +101,18 @@ func NewVSphereController( kubeInformers := apiClients.KubeInformers ocpConfigInformer := apiClients.ConfigInformers configMapInformer := kubeInformers.InformersFor(cloudConfigNamespace).Core().V1().ConfigMaps() + infraInformer := ocpConfigInformer.Config().V1().Infrastructures() scInformer := kubeInformers.InformersFor("").Storage().V1().StorageClasses() csiDriverLister := kubeInformers.InformersFor("").Storage().V1().CSIDrivers().Lister() csiNodeLister := kubeInformers.InformersFor("").Storage().V1().CSINodes().Lister() + // we need pvLister for checking if cluster is using intree pvs + pvLister := kubeInformers.InformersFor("").Core().V1().PersistentVolumes().Lister() nodeLister := apiClients.NodeInformer.Lister() + // managedConfigMapInformer for applying admin-gates + managedConfigMapInformer := kubeInformers.InformersFor(managedConfigNamespace).Core().V1().ConfigMaps() + rc := recorder.WithComponentSuffix("vmware-" + strings.ToLower(name)) c := &VSphereController{ @@ -114,6 +126,8 @@ func NewVSphereController( scLister: scInformer.Lister(), csiDriverLister: csiDriverLister, nodeLister: nodeLister, + pvLister: pvLister, + managedConfigMapLister: managedConfigMapInformer.Lister(), apiClients: apiClients, eventRecorder: rc, vSphereChecker: newVSphereEnvironmentChecker(), @@ -131,6 +145,7 @@ func NewVSphereController( apiClients.OperatorClient.Informer(), configMapInformer.Informer(), apiClients.SecretInformer.Informer(), + managedConfigMapInformer.Informer(), infraInformer.Informer(), scInformer.Informer(), ).WithSync(c.sync). @@ -324,6 +339,11 @@ func (c *VSphereController) blockUpgradeOrDegradeCluster( // Set Upgradeable: true with an extra message updateError := c.updateConditions(ctx, c.name, result, status, operatorapi.ConditionFalse) return updateError, true, true + case checks.ClusterCheckUpgradesBlockedViaAdminAck: + clusterCondition = "upgrade_blocked_admin_ack" + utils.InstallErrorMetric.WithLabelValues(string(result.CheckStatus), clusterCondition).Set(1) + updateError := c.addRequiresAdminAck(ctx, result, c.name) + return updateError, false, false } return nil, false, false } @@ -359,6 +379,8 @@ func (c *VSphereController) getCheckAPIDependency(infra *ocpv1.Infrastructure) c CSINodeLister: c.csiNodeLister, CSIDriverLister: c.csiDriverLister, NodeLister: c.nodeLister, + StorageLister: c.storageLister, + PvLister: c.pvLister, } return checkerApiClient } @@ -468,6 +490,23 @@ func (c *VSphereController) updateConditions( return nil } +func (c *VSphereController) addRequiresAdminAck(ctx context.Context, lastCheckResult checks.ClusterCheckResult, name string) error { + adminGate, err := c.managedConfigMapLister.ConfigMaps(managedConfigNamespace).Get(adminGateConfigMap) + if err != nil { + return fmt.Errorf("failed to get admin-gate configmap: %v", err) + } + + klog.Infof("Updating admin-gates") + + _, ok := adminGate.Data[migrationAck413] + if !ok { + adminGate.Data[migrationAck413] = "vSphere CSI migration will be enabled in Openshift-4.14. Your cluster appears to be using in-tree vSphere volumes and is on a vSphere version that has CSI migration related bugs. See - https://access.redhat.com/node/7011683 for more information, before upgrading to 4.14." + + } + _, _, err = resourceapply.ApplyConfigMap(ctx, c.kubeClient.CoreV1(), c.eventRecorder, adminGate) + return err +} + func (c *VSphereController) addUpgradeableBlockCondition( lastCheckResult checks.ClusterCheckResult, name string, diff --git a/pkg/operator/vspherecontroller/vspherecontroller_test.go b/pkg/operator/vspherecontroller/vspherecontroller_test.go index e957e41c2..51f5d6ba8 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller_test.go +++ b/pkg/operator/vspherecontroller/vspherecontroller_test.go @@ -373,7 +373,7 @@ vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_dr var connError error conn, cleanUpFunc, connError = setupSimulator(defaultModel) if test.vcenterVersion != "" { - customizeVCenterVersion(test.vcenterVersion, test.vcenterVersion, conn) + CustomizeVCenterVersion(test.vcenterVersion, test.vcenterVersion, "", conn) } ctrl.vsphereConnectionFunc = makeVsphereConnectionFunc(conn, test.failVCenterConnection, connError) defer func() { @@ -618,7 +618,7 @@ func TestAddUpgradeableBlockCondition(t *testing.T) { { name: "when no existing condition is found, should add condition", clusterCSIDriver: testlib.MakeFakeDriverInstance(), - clusterResult: testlib.GetTestClusterResult(checks.CheckStatusVSphereConnectionFailed), + clusterResult: GetTestClusterResult(checks.CheckStatusVSphereConnectionFailed), expectedCondition: opv1.OperatorCondition{ Type: conditionType, Status: opv1.ConditionFalse, @@ -638,7 +638,7 @@ func TestAddUpgradeableBlockCondition(t *testing.T) { } return instance }), - clusterResult: testlib.GetTestClusterResult(checks.CheckStatusVSphereConnectionFailed), + clusterResult: GetTestClusterResult(checks.CheckStatusVSphereConnectionFailed), expectedCondition: opv1.OperatorCondition{ Type: conditionType, Status: opv1.ConditionFalse, @@ -658,7 +658,7 @@ func TestAddUpgradeableBlockCondition(t *testing.T) { } return instance }), - clusterResult: testlib.GetTestClusterResult(checks.CheckStatusVSphereConnectionFailed), + clusterResult: GetTestClusterResult(checks.CheckStatusVSphereConnectionFailed), expectedCondition: opv1.OperatorCondition{ Type: conditionType, Status: opv1.ConditionFalse, @@ -707,3 +707,10 @@ func (*skippingChecker) Check(ctx context.Context, connection checks.CheckArgs) func newSkippingChecker() *skippingChecker { return &skippingChecker{} } + +func GetTestClusterResult(statusType checks.CheckStatusType) checks.ClusterCheckResult { + return checks.ClusterCheckResult{ + CheckError: fmt.Errorf("some error"), + CheckStatus: statusType, + } +} From 6eacf76baa254c84324eb100b808af0797056f7c Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 12 Sep 2023 12:04:38 -0400 Subject: [PATCH 2/7] Add placeholder checks for vSphere 8 --- .../vspherecontroller/checks/check_error.go | 6 +- .../vspherecontroller/checks/check_nodes.go | 4 +- .../checks/check_patched_vcenter.go | 55 +++++++++++----- .../checks/check_patched_vcenter_test.go | 63 +++++++++++++++++++ 4 files changed, 110 insertions(+), 18 deletions(-) create mode 100644 pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go diff --git a/pkg/operator/vspherecontroller/checks/check_error.go b/pkg/operator/vspherecontroller/checks/check_error.go index 5ad6cc248..48df9deaa 100644 --- a/pkg/operator/vspherecontroller/checks/check_error.go +++ b/pkg/operator/vspherecontroller/checks/check_error.go @@ -209,7 +209,7 @@ func checkForIntreePluginUse(result ClusterCheckResult, apiDependencies KubeAPII return ClusterCheckDegrade, MakeClusterDegradedError(CheckStatusOpenshiftAPIError, reason) } - klog.Infof("Checking for intree plugin use") + klog.V(2).Infof("Checking for intree plugin use") allGoodCheckResult := MakeClusterCheckResultPass() @@ -229,6 +229,7 @@ func checkForIntreePluginUse(result ClusterCheckResult, apiDependencies KubeAPII for _, pv := range pvs { if pv.Spec.VsphereVolume != nil { + klog.V(2).Infof("found vSphere in-tree persistent volumes in the cluster") usingvSphereVolumes = true break } @@ -240,11 +241,12 @@ func checkForIntreePluginUse(result ClusterCheckResult, apiDependencies KubeAPII nodes, err := apiDependencies.ListNodes() if err != nil { - reason := fmt.Errorf("csi driver installed failed with %s, unable to list pvs: %v", result.Reason, err) + reason := fmt.Errorf("csi driver installed failed with %s, unable to list nodes: %v", result.Reason, err) return ClusterCheckDegrade, MakeClusterDegradedError(CheckStatusOpenshiftAPIError, reason) } if checkInlineIntreeVolumeUse(nodes) { + klog.V(2).Infof("found vSphere in-line persistent volumes in the cluster") return ClusterCheckUpgradesBlockedViaAdminAck, allGoodCheckResult } return ClusterCheckAllGood, allGoodCheckResult diff --git a/pkg/operator/vspherecontroller/checks/check_nodes.go b/pkg/operator/vspherecontroller/checks/check_nodes.go index 0f3d7d47a..a76f0dc0a 100644 --- a/pkg/operator/vspherecontroller/checks/check_nodes.go +++ b/pkg/operator/vspherecontroller/checks/check_nodes.go @@ -171,7 +171,7 @@ func (n *NodeChecker) checkOnNode(workInfo nodeChannelWorkData) ClusterCheckResu esxiBuildNumber := hostSystem.Config.Product.Build - klog.Infof("checking for patched version of ESXi for CSI migration: %s-%s", hostAPIVersion, esxiBuildNumber) + klog.V(2).Infof("checking for patched version of ESXi for CSI migration: %s-%s", hostAPIVersion, esxiBuildNumber) meetsMigrationPatchVersionRequirement, err := checkForMinimumPatchedVersion(hostAPIVersion, esxiBuildNumber) if err != nil { @@ -179,7 +179,7 @@ func (n *NodeChecker) checkOnNode(workInfo nodeChannelWorkData) ClusterCheckResu } if !meetsMigrationPatchVersionRequirement { - reason := fmt.Errorf("host %s is on ESXI version %s-%s, which is below minimum required version %s-%d for csi migration", hostName, hostAPIVersion, esxiBuildNumber, minPatchedVersion, minBuildNumber) + reason := fmt.Errorf("host %s is on ESXI version %s-%s, which is below minimum required version %s-%d for csi migration", hostName, hostAPIVersion, esxiBuildNumber, minPatchedVersion7Series, minBuildNumber7Series) return makeBuggyEnvironmentError(CheckStatusBuggyMigrationPlatform, reason) } diff --git a/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go index 3813a5bd8..1f0b22329 100644 --- a/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go +++ b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go @@ -4,13 +4,18 @@ import ( "context" "fmt" "strconv" + "strings" "k8s.io/klog/v2" ) const ( - minPatchedVersion = "7.0.3" - minBuildNumber = 21424296 + minPatchedVersion7Series = "7.0.3" + minBuildNumber7Series = 21424296 + + // TODO: Fix with real version information + minPatchedVersion8Series = "8.0.2" + minBuildNumber8Series = 999 ) type PatchedVcenterChecker struct{} @@ -22,7 +27,7 @@ func (v *PatchedVcenterChecker) Check(ctx context.Context, checkOpts CheckArgs) vcenterAPIVersion := vmClient.ServiceContent.About.ApiVersion buildVersion := vmClient.ServiceContent.About.Build - klog.Infof("checking for patched version of vSphere for CSI migration: %s-%s", vcenterAPIVersion, buildVersion) + klog.V(2).Infof("checking for patched version of vSphere for CSI migration: %s-%s", vcenterAPIVersion, buildVersion) hasMin, err := checkForMinimumPatchedVersion(vcenterAPIVersion, buildVersion) if err != nil { @@ -30,14 +35,14 @@ func (v *PatchedVcenterChecker) Check(ctx context.Context, checkOpts CheckArgs) } if !hasMin { - reason := fmt.Errorf("found version of vCenter which has bugs related to CSI migration. Minimum required, version: %s, build: %d", minPatchedVersion, minBuildNumber) + reason := fmt.Errorf("found version of vCenter which has bugs related to CSI migration. Minimum required, version: %s, build: %d", minPatchedVersion7Series, minBuildNumber7Series) return []ClusterCheckResult{makeBuggyEnvironmentError(CheckStatusBuggyMigrationPlatform, reason)} } return []ClusterCheckResult{} } -func checkForMinimumPatchedVersion(vCenterVersion string, build string) (bool, error) { - hasMinimumApiVersion, err := isMinimumVersion(minPatchedVersion, vCenterVersion) +func checkForMinimumPatchedVersion(vSphereVersion string, build string) (bool, error) { + hasMinimumApiVersion, err := isMinimumVersion(minPatchedVersion7Series, vSphereVersion) if err != nil { return true, err } @@ -45,14 +50,36 @@ func checkForMinimumPatchedVersion(vCenterVersion string, build string) (bool, e return false, nil } - buildNumber, err := strconv.Atoi(build) - if err != nil { - return true, fmt.Errorf("error converting build number %s to integer", build) - } + if strings.HasPrefix(vSphereVersion, "8") { + hasMinimumApiVersion, err := isMinimumVersion(minPatchedVersion8Series, vSphereVersion) + if err != nil { + return true, err + } - if buildNumber >= minBuildNumber { - return true, nil - } + if !hasMinimumApiVersion { + return false, nil + } + + buildNumber, err := strconv.Atoi(build) + if err != nil { + return true, fmt.Errorf("error converting build number %s to integer", build) + } + + if buildNumber >= minBuildNumber8Series { + return true, nil + } - return false, nil + return false, nil + } else { + buildNumber, err := strconv.Atoi(build) + if err != nil { + return true, fmt.Errorf("error converting build number %s to integer", build) + } + + if buildNumber >= minBuildNumber7Series { + return true, nil + } + + return false, nil + } } diff --git a/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go b/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go new file mode 100644 index 000000000..034be45f2 --- /dev/null +++ b/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go @@ -0,0 +1,63 @@ +package checks + +import "testing" + +func TestCheckForMinimumPatchedVersion(t *testing.T) { + tests := []struct { + name string + vSphereVersion string + buildNumber string + meetsRequirement bool + }{ + { + name: "when vSphere version meets minimum 7 series requirement", + vSphereVersion: "7.0.3", + buildNumber: "21958406", + meetsRequirement: true, + }, + { + name: "when vSphere version meets minimum 8 series requirement", + vSphereVersion: "8.0.2", + buildNumber: "1000", + meetsRequirement: true, + }, + { + name: "when vSphere version does not meet 7 series build number requirement", + vSphereVersion: "7.0.3", + buildNumber: "21324296", + meetsRequirement: false, + }, + { + name: "when vSphere version does not meet 8 series build number requirement", + vSphereVersion: "8.0.2", + buildNumber: "998", + meetsRequirement: false, + }, + { + name: "when vSphere version does not meet 8 series version requirement", + vSphereVersion: "8.0.1", + buildNumber: "1000", + meetsRequirement: false, + }, + { + name: "when vSphere version does not meet 7 series version requirement", + vSphereVersion: "7.0.2", + buildNumber: "21958406", + meetsRequirement: false, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + checkFlag, err := checkForMinimumPatchedVersion(test.vSphereVersion, test.buildNumber) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + if checkFlag != test.meetsRequirement { + t.Errorf("for checking version requirement, expected %v got %v", test.meetsRequirement, checkFlag) + } + }) + } +} From 3608ea4b7d807913213b06d75c6b5ec0d060103d Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 12 Sep 2023 16:09:33 -0400 Subject: [PATCH 3/7] Add tests for adding and remove admin-ack keys --- pkg/operator/testlib/testlib.go | 37 +++++-- .../vspherecontroller/vspherecontroller.go | 62 +++++++++-- .../vspherecontroller_test.go | 100 +++++++++++++++++- 3 files changed, 178 insertions(+), 21 deletions(-) diff --git a/pkg/operator/testlib/testlib.go b/pkg/operator/testlib/testlib.go index 5396c2475..3065aa967 100644 --- a/pkg/operator/testlib/testlib.go +++ b/pkg/operator/testlib/testlib.go @@ -26,10 +26,12 @@ import ( var f embed.FS const ( - cloudConfigNamespace = "openshift-config" - infraGlobalName = "cluster" - secretName = "vmware-vsphere-cloud-credentials" - defaultNamespace = "openshift-cluster-csi-drivers" + cloudConfigNamespace = "openshift-config" + infraGlobalName = "cluster" + storageOperatorName = "cluster" + secretName = "vmware-vsphere-cloud-credentials" + defaultNamespace = "openshift-cluster-csi-drivers" + managedConfigNamespace = "openshift-config-managed" ) // fakeInstance is a fake CSI driver instance that also fullfils the OperatorClient interface @@ -71,7 +73,7 @@ func StartFakeInformer(clients *utils.APIClient, stopCh <-chan struct{}) { func NewFakeClients(coreObjects []runtime.Object, operatorObject *FakeDriverInstance, configObject runtime.Object) *utils.APIClient { dynamicClient := dynamicfake.NewSimpleDynamicClient(runtime.NewScheme()) kubeClient := fakecore.NewSimpleClientset(coreObjects...) - kubeInformers := v1helpers.NewKubeInformersForNamespaces(kubeClient, defaultNamespace, cloudConfigNamespace, "") + kubeInformers := v1helpers.NewKubeInformersForNamespaces(kubeClient, defaultNamespace, cloudConfigNamespace, managedConfigNamespace, "") nodeInformer := kubeInformers.InformersFor("").Core().V1().Nodes() secretInformer := kubeInformers.InformersFor(defaultNamespace).Core().V1().Secrets() @@ -131,8 +133,9 @@ func AddInitialObjects(objects []runtime.Object, clients *utils.APIClient) error for _, obj := range objects { switch obj.(type) { case *v1.ConfigMap: - configMapInformer := clients.KubeInformers.InformersFor(cloudConfigNamespace).Core().V1().ConfigMaps().Informer() - configMapInformer.GetStore().Add(obj) + configMap := obj.(*v1.ConfigMap) + configMapInformer := clients.KubeInformers.InformersFor(configMap.Namespace).Core().V1().ConfigMaps().Informer() + configMapInformer.GetStore().Add(configMap) case *v1.Secret: secretInformer := clients.SecretInformer.Informer() secretInformer.GetStore().Add(obj) @@ -320,6 +323,26 @@ datacenters = "DC0" } } +func GetAdminGateConfigMap(withAckKey bool) *v1.ConfigMap { + cMap := &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "admin-gates", + Namespace: managedConfigNamespace, + }, + Data: map[string]string{}, + } + + if withAckKey { + cMap.Data["ack-4.12-kube-1.26-api-removals-in-4.13"] = ` + Kubernetes 1.26 and therefore OpenShift + 4.13 remove several APIs which require admin consideration. Please see the knowledge + article https://access.redhat.com/articles/6958394 for details and instructions.` + cMap.Data["ack-4.13-kube-127-vsphere-migration-in-4.14"] = "remove this to upgrade" + } + + return cMap +} + func GetSecret() *v1.Secret { return &v1.Secret{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/operator/vspherecontroller/vspherecontroller.go b/pkg/operator/vspherecontroller/vspherecontroller.go index 6587c781c..20b9c39c3 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller.go +++ b/pkg/operator/vspherecontroller/vspherecontroller.go @@ -233,7 +233,9 @@ func (c *VSphereController) installCSIDriver(ctx context.Context, syncContext fa // if there was an OCP error we should degrade the cluster or if we previously created CSIDriver // but we can't connect to vcenter now, we should also degrade the cluster var connectionBlockUpgrade bool - err, blockCSIDriverInstall, connectionBlockUpgrade = c.blockUpgradeOrDegradeCluster(ctx, connectionResult, infra, opStatus) + var blockUpgradeViaAdminAck bool + + err, blockCSIDriverInstall, connectionBlockUpgrade, _ = c.blockUpgradeOrDegradeCluster(ctx, connectionResult, infra, opStatus) if err != nil { return blockCSIDriverInstall, err } @@ -262,7 +264,7 @@ func (c *VSphereController) installCSIDriver(ctx context.Context, syncContext fa }) var clusterCheckBlockUpgrade bool - err, blockCSIDriverInstall, clusterCheckBlockUpgrade = c.blockUpgradeOrDegradeCluster(ctx, result, infra, opStatus) + err, blockCSIDriverInstall, clusterCheckBlockUpgrade, blockUpgradeViaAdminAck = c.blockUpgradeOrDegradeCluster(ctx, result, infra, opStatus) if err != nil { return blockCSIDriverInstall, err } @@ -288,6 +290,20 @@ func (c *VSphereController) installCSIDriver(ctx context.Context, syncContext fa if blockUpgrade { upgradeableStatus = operatorapi.ConditionFalse } + + // if we are not blocking upgrade via admin-ack nor we are blocking upgrades otherwise, we should + // remove admin-gate checks. + // The reason we are also checking for blockUpgrade variable is because when cluster upgrade is blocked because of + // other error conditions, then those checks take precedence over blockUpgradeViaAdminAck and hence blockUpgradeViaAdminAck + // might be false even though cluster overall is not upgradeable. In which we should not remove adminAck change + // which might have been installed previously. + if !blockUpgradeViaAdminAck && !blockUpgrade { + err := c.removeAdminAck(ctx, c.name) + if err != nil { + return blockCSIDriverInstall, err + } + } + return blockCSIDriverInstall, c.updateConditions(ctx, c.name, result, opStatus, upgradeableStatus) } @@ -312,7 +328,7 @@ func (c *VSphereController) blockUpgradeOrDegradeCluster( ctx context.Context, result checks.ClusterCheckResult, infra *ocpv1.Infrastructure, - status *operatorapi.OperatorStatus) (err error, blockInstall, blockUpgrade bool) { + status *operatorapi.OperatorStatus) (err error, blockInstall, blockUpgrade, blockUpgradeViaAdminAck bool) { var clusterCondition string clusterStatus, result := checks.CheckClusterStatus(result, c.getCheckAPIDependency(infra)) @@ -320,17 +336,17 @@ func (c *VSphereController) blockUpgradeOrDegradeCluster( case checks.ClusterCheckDegrade: clusterCondition = "degraded" utils.InstallErrorMetric.WithLabelValues(string(result.CheckStatus), clusterCondition).Set(1) - return result.CheckError, true, false + return result.CheckError, true, false, false case checks.ClusterCheckUpgradeStateUnknown: clusterCondition = "upgrade_unknown" utils.InstallErrorMetric.WithLabelValues(string(result.CheckStatus), clusterCondition).Set(1) updateError := c.updateConditions(ctx, c.name, result, status, operatorapi.ConditionUnknown) - return updateError, true, false + return updateError, true, false, false case checks.ClusterCheckBlockUpgrade: clusterCondition = "upgrade_blocked" utils.InstallErrorMetric.WithLabelValues(string(result.CheckStatus), clusterCondition).Set(1) updateError := c.updateConditions(ctx, c.name, result, status, operatorapi.ConditionFalse) - return updateError, false, true + return updateError, false, true, false case checks.ClusterCheckBlockUpgradeDriverInstall: clusterCondition = "install_blocked" utils.InstallErrorMetric.WithLabelValues(string(result.CheckStatus), clusterCondition).Set(1) @@ -338,14 +354,16 @@ func (c *VSphereController) blockUpgradeOrDegradeCluster( utils.InstallErrorMetric.WithLabelValues(string(result.CheckStatus), clusterCondition).Set(1) // Set Upgradeable: true with an extra message updateError := c.updateConditions(ctx, c.name, result, status, operatorapi.ConditionFalse) - return updateError, true, true + return updateError, true, true, false case checks.ClusterCheckUpgradesBlockedViaAdminAck: clusterCondition = "upgrade_blocked_admin_ack" - utils.InstallErrorMetric.WithLabelValues(string(result.CheckStatus), clusterCondition).Set(1) + rt := string(result.CheckStatus) + klog.Infof("check status is: %s", rt) + utils.InstallErrorMetric.WithLabelValues(rt, clusterCondition).Set(1) updateError := c.addRequiresAdminAck(ctx, result, c.name) - return updateError, false, false + return updateError, false, false, true } - return nil, false, false + return nil, false, false, false } func (c *VSphereController) runConditionalController(ctx context.Context) { @@ -496,7 +514,7 @@ func (c *VSphereController) addRequiresAdminAck(ctx context.Context, lastCheckRe return fmt.Errorf("failed to get admin-gate configmap: %v", err) } - klog.Infof("Updating admin-gates") + klog.V(2).Infof("Updating admin-gates to require admin-ack for vSphere CSI migration") _, ok := adminGate.Data[migrationAck413] if !ok { @@ -507,6 +525,28 @@ func (c *VSphereController) addRequiresAdminAck(ctx context.Context, lastCheckRe return err } +func (c *VSphereController) removeAdminAck(ctx context.Context, name string) error { + adminGate, err := c.managedConfigMapLister.ConfigMaps(managedConfigNamespace).Get(adminGateConfigMap) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get admin-gate configmap: %v", err) + } + + klog.V(2).Infof("removing admin-gates that is required for CSI migration") + + _, ok := adminGate.Data[migrationAck413] + // nothing needs to be done if key doesn't exist + if !ok { + return nil + } + + delete(adminGate.Data, migrationAck413) + _, _, err = resourceapply.ApplyConfigMap(ctx, c.kubeClient.CoreV1(), c.eventRecorder, adminGate) + return err +} + func (c *VSphereController) addUpgradeableBlockCondition( lastCheckResult checks.ClusterCheckResult, name string, diff --git a/pkg/operator/vspherecontroller/vspherecontroller_test.go b/pkg/operator/vspherecontroller/vspherecontroller_test.go index 51f5d6ba8..685aa8297 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller_test.go +++ b/pkg/operator/vspherecontroller/vspherecontroller_test.go @@ -17,6 +17,7 @@ import ( "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/vspherecontroller/checks" iniv1 "gopkg.in/ini.v1" v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/component-base/metrics/testutil" ) @@ -29,12 +30,15 @@ func newVsphereController(apiClients *utils.APIClient) *VSphereController { kubeInformers := apiClients.KubeInformers ocpConfigInformer := apiClients.ConfigInformers configMapInformer := kubeInformers.InformersFor(cloudConfigNamespace).Core().V1().ConfigMaps() + managedConfigMapInformer := kubeInformers.InformersFor(managedConfigNamespace).Core().V1().ConfigMaps() infraInformer := ocpConfigInformer.Config().V1().Infrastructures() scInformer := kubeInformers.InformersFor("").Storage().V1().StorageClasses() csiDriverLister := kubeInformers.InformersFor("").Storage().V1().CSIDrivers().Lister() csiNodeLister := kubeInformers.InformersFor("").Storage().V1().CSINodes().Lister() nodeLister := apiClients.NodeInformer.Lister() + storageLister := apiClients.OCPOperatorInformers.Operator().V1().Storages().Lister() + pvLister := kubeInformers.InformersFor("").Core().V1().PersistentVolumes().Lister() rc := events.NewInMemoryRecorder(testControllerName) cloudConfigBytes, _ := assets.ReadFile("vsphere_cloud_config.yaml") @@ -53,9 +57,11 @@ func newVsphereController(apiClients *utils.APIClient) *VSphereController { csiDriverLister: csiDriverLister, nodeLister: nodeLister, manifest: cloudConfigBytes, + pvLister: pvLister, csiConfigManifest: csiConfigBytes, apiClients: *apiClients, clusterCSIDriverLister: apiClients.ClusterCSIDriverInformer.Lister(), + managedConfigMapLister: managedConfigMapInformer.Lister(), eventRecorder: rc, vSphereChecker: newVSphereEnvironmentChecker(), infraLister: infraInformer.Lister(), @@ -90,11 +96,13 @@ func TestSync(t *testing.T) { configObjects runtime.Object vcenterVersion string hostVersion string + build string startingNodeHardwareVersions []string finalNodeHardwareVersions []string expectedConditions []opv1.OperatorCondition expectedMetrics string expectError error + expectedAdminGateConfigKey string failVCenterConnection bool operandStarted bool storageClassCreated bool @@ -228,7 +236,7 @@ func TestSync(t *testing.T) { }, }, expectedMetrics: `vsphere_csi_driver_error{condition="install_blocked",failure_reason="existing_driver_found"} 1 -vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_driver_found"} 1`, + vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_driver_found"} 1`, operandStarted: false, storageClassCreated: false, }, @@ -251,7 +259,7 @@ vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_dr }, }, expectedMetrics: `vsphere_csi_driver_error{condition="install_blocked",failure_reason="existing_driver_found"} 1 -vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_driver_found"} 1`, + vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_driver_found"} 1`, operandStarted: false, storageClassCreated: false, }, @@ -333,6 +341,75 @@ vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_dr operandStarted: true, storageClassCreated: true, }, + { + name: "when upgrade is blocked because CSI migration is disabled", + storageCR: testlib.GetStorageOperator(opv1.LegacyDeprecatedInTreeDriver), + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + vcenterVersion: "7.0.2", + hostVersion: "7.0.2", + startingNodeHardwareVersions: []string{"vmx-15", "vmx-15"}, + initialObjects: []runtime.Object{testlib.GetConfigMap(), testlib.GetSecret()}, + configObjects: runtime.Object(testlib.GetInfraObject()), + expectedConditions: []opv1.OperatorCondition{ + { + Type: testControllerName + opv1.OperatorStatusTypeAvailable, + Status: opv1.ConditionTrue, + }, + { + Type: testControllerName + opv1.OperatorStatusTypeUpgradeable, + Status: opv1.ConditionFalse, + }, + }, + expectedMetrics: `vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="csi_migration_disabled"} 1`, + operandStarted: true, + storageClassCreated: true, + }, + { + name: "when upgrade is blocked via admin-ack gate", + storageCR: testlib.GetStorageOperator(""), + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + vcenterVersion: "7.0.2", + hostVersion: "7.0.2", + startingNodeHardwareVersions: []string{"vmx-15", "vmx-15"}, + initialObjects: []runtime.Object{testlib.GetConfigMap(), testlib.GetSecret(), testlib.GetIntreePV("test-123"), testlib.GetAdminGateConfigMap(false)}, + configObjects: runtime.Object(testlib.GetInfraObject()), + expectedConditions: []opv1.OperatorCondition{ + { + Type: testControllerName + opv1.OperatorStatusTypeAvailable, + Status: opv1.ConditionTrue, + }, + { + Type: testControllerName + opv1.OperatorStatusTypeUpgradeable, + Status: opv1.ConditionTrue, + }, + }, + expectedAdminGateConfigKey: migrationAck413, + operandStarted: true, + storageClassCreated: true, + }, + { + name: "should remove admin-ack key if cluster meets requirement", + storageCR: testlib.GetStorageOperator(""), + clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), + vcenterVersion: "7.0.3", + hostVersion: "7.0.3", + build: "21424296", + startingNodeHardwareVersions: []string{"vmx-15", "vmx-15"}, + initialObjects: []runtime.Object{testlib.GetConfigMap(), testlib.GetSecret(), testlib.GetAdminGateConfigMap(true)}, + configObjects: runtime.Object(testlib.GetInfraObject()), + expectedConditions: []opv1.OperatorCondition{ + { + Type: testControllerName + opv1.OperatorStatusTypeAvailable, + Status: opv1.ConditionTrue, + }, + { + Type: testControllerName + opv1.OperatorStatusTypeUpgradeable, + Status: opv1.ConditionTrue, + }, + }, + operandStarted: true, + storageClassCreated: true, + }, } for i := range tests { @@ -373,7 +450,7 @@ vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_dr var connError error conn, cleanUpFunc, connError = setupSimulator(defaultModel) if test.vcenterVersion != "" { - CustomizeVCenterVersion(test.vcenterVersion, test.vcenterVersion, "", conn) + CustomizeVCenterVersion(test.vcenterVersion, test.vcenterVersion, test.build, conn) } ctrl.vsphereConnectionFunc = makeVsphereConnectionFunc(conn, test.failVCenterConnection, connError) defer func() { @@ -440,6 +517,23 @@ vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="existing_dr t.Fatalf("expected operandStarted to be %v, got %v", test.operandStarted, ctrl.operandControllerStarted) } + cmap, err := ctrl.managedConfigMapLister.ConfigMaps(managedConfigNamespace).Get(adminGateConfigMap) + if err != nil { + if !apierrors.IsNotFound(err) { + t.Errorf("error getting admin-gate configmap: %v", err) + } + } + + if test.expectedAdminGateConfigKey != "" { + if _, ok := cmap.Data[test.expectedAdminGateConfigKey]; !ok { + t.Errorf("expected key %s, found none", test.expectedAdminGateConfigKey) + } + } else if cmap != nil { + if _, ok := cmap.Data[migrationAck413]; ok { + t.Errorf("unexpected key %s", migrationAck413) + } + } + if test.expectedMetrics != "" { if err := testutil.CollectAndCompare(utils.InstallErrorMetric, strings.NewReader(metricsHeader+test.expectedMetrics+"\n"), utils.InstallErrorMetric.Name); err != nil { t.Errorf("wrong metrics: %s", err) From 7e3134ac5c493a878c725b88166ff39549b67042 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 19 Sep 2023 12:57:07 -0400 Subject: [PATCH 4/7] Add last known release for vSphere 8 Also address other review comments --- .../vspherecontroller/checks/check_patched_vcenter.go | 6 +++--- .../checks/check_patched_vcenter_test.go | 8 ++++---- pkg/operator/vspherecontroller/vspherecontroller.go | 11 +++++------ 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go index 1f0b22329..c2614beac 100644 --- a/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go +++ b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go @@ -14,8 +14,8 @@ const ( minBuildNumber7Series = 21424296 // TODO: Fix with real version information - minPatchedVersion8Series = "8.0.2" - minBuildNumber8Series = 999 + minPatchedVersion8Series = "8.0.1" + minBuildNumber8Series = 22088125 ) type PatchedVcenterChecker struct{} @@ -65,7 +65,7 @@ func checkForMinimumPatchedVersion(vSphereVersion string, build string) (bool, e return true, fmt.Errorf("error converting build number %s to integer", build) } - if buildNumber >= minBuildNumber8Series { + if buildNumber > minBuildNumber8Series { return true, nil } diff --git a/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go b/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go index 034be45f2..cf99b7d8f 100644 --- a/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go +++ b/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go @@ -18,7 +18,7 @@ func TestCheckForMinimumPatchedVersion(t *testing.T) { { name: "when vSphere version meets minimum 8 series requirement", vSphereVersion: "8.0.2", - buildNumber: "1000", + buildNumber: "22088126", meetsRequirement: true, }, { @@ -29,14 +29,14 @@ func TestCheckForMinimumPatchedVersion(t *testing.T) { }, { name: "when vSphere version does not meet 8 series build number requirement", - vSphereVersion: "8.0.2", + vSphereVersion: "8.0.1", buildNumber: "998", meetsRequirement: false, }, { name: "when vSphere version does not meet 8 series version requirement", - vSphereVersion: "8.0.1", - buildNumber: "1000", + vSphereVersion: "8.0.0", + buildNumber: "22088127", meetsRequirement: false, }, { diff --git a/pkg/operator/vspherecontroller/vspherecontroller.go b/pkg/operator/vspherecontroller/vspherecontroller.go index 20b9c39c3..e1889c70e 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller.go +++ b/pkg/operator/vspherecontroller/vspherecontroller.go @@ -298,7 +298,7 @@ func (c *VSphereController) installCSIDriver(ctx context.Context, syncContext fa // might be false even though cluster overall is not upgradeable. In which we should not remove adminAck change // which might have been installed previously. if !blockUpgradeViaAdminAck && !blockUpgrade { - err := c.removeAdminAck(ctx, c.name) + err := c.removeAdminAck(ctx) if err != nil { return blockCSIDriverInstall, err } @@ -360,7 +360,7 @@ func (c *VSphereController) blockUpgradeOrDegradeCluster( rt := string(result.CheckStatus) klog.Infof("check status is: %s", rt) utils.InstallErrorMetric.WithLabelValues(rt, clusterCondition).Set(1) - updateError := c.addRequiresAdminAck(ctx, result, c.name) + updateError := c.addRequiresAdminAck(ctx, result) return updateError, false, false, true } return nil, false, false, false @@ -508,7 +508,7 @@ func (c *VSphereController) updateConditions( return nil } -func (c *VSphereController) addRequiresAdminAck(ctx context.Context, lastCheckResult checks.ClusterCheckResult, name string) error { +func (c *VSphereController) addRequiresAdminAck(ctx context.Context, lastCheckResult checks.ClusterCheckResult) error { adminGate, err := c.managedConfigMapLister.ConfigMaps(managedConfigNamespace).Get(adminGateConfigMap) if err != nil { return fmt.Errorf("failed to get admin-gate configmap: %v", err) @@ -525,7 +525,7 @@ func (c *VSphereController) addRequiresAdminAck(ctx context.Context, lastCheckRe return err } -func (c *VSphereController) removeAdminAck(ctx context.Context, name string) error { +func (c *VSphereController) removeAdminAck(ctx context.Context) error { adminGate, err := c.managedConfigMapLister.ConfigMaps(managedConfigNamespace).Get(adminGateConfigMap) if err != nil { if apierrors.IsNotFound(err) { @@ -534,13 +534,12 @@ func (c *VSphereController) removeAdminAck(ctx context.Context, name string) err return fmt.Errorf("failed to get admin-gate configmap: %v", err) } - klog.V(2).Infof("removing admin-gates that is required for CSI migration") - _, ok := adminGate.Data[migrationAck413] // nothing needs to be done if key doesn't exist if !ok { return nil } + klog.V(2).Infof("removing admin-gates that is required for CSI migration") delete(adminGate.Data, migrationAck413) _, _, err = resourceapply.ApplyConfigMap(ctx, c.kubeClient.CoreV1(), c.eventRecorder, adminGate) From eb414fa2c25c9acfa190a40888a866fb8a81548c Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Fri, 22 Sep 2023 12:23:47 -0400 Subject: [PATCH 5/7] Make fixes for vSphere 4.12 --- .../vspherecontroller/checks/api_interface.go | 5 ---- .../vspherecontroller/checks/check_error.go | 15 ----------- .../checks/check_error_test.go | 19 ++------------ .../vspherecontroller/vspherecontroller.go | 1 - .../vspherecontroller_test.go | 26 ------------------- 5 files changed, 2 insertions(+), 64 deletions(-) diff --git a/pkg/operator/vspherecontroller/checks/api_interface.go b/pkg/operator/vspherecontroller/checks/api_interface.go index 7cec0f10b..fd2d0b296 100644 --- a/pkg/operator/vspherecontroller/checks/api_interface.go +++ b/pkg/operator/vspherecontroller/checks/api_interface.go @@ -26,7 +26,6 @@ type KubeAPIInterfaceImpl struct { CSIDriverLister storagelister.CSIDriverLister StorageClassLister storagelister.StorageClassLister PvLister corelister.PersistentVolumeLister - StorageLister oplister.StorageLister } func (k *KubeAPIInterfaceImpl) ListNodes() ([]*v1.Node, error) { @@ -49,10 +48,6 @@ func (k *KubeAPIInterfaceImpl) GetInfrastructure() *ocpv1.Infrastructure { return k.Infrastructure } -func (k *KubeAPIInterfaceImpl) GetStorage(name string) (*operatorv1.Storage, error) { - return k.StorageLister.Get(name) -} - func (k *KubeAPIInterfaceImpl) ListPersistentVolumes() ([]*v1.PersistentVolume, error) { return k.PvLister.List(labels.Everything()) } diff --git a/pkg/operator/vspherecontroller/checks/check_error.go b/pkg/operator/vspherecontroller/checks/check_error.go index 48df9deaa..05ff28faa 100644 --- a/pkg/operator/vspherecontroller/checks/check_error.go +++ b/pkg/operator/vspherecontroller/checks/check_error.go @@ -4,7 +4,6 @@ import ( "fmt" "strings" - v1 "github.com/openshift/api/operator/v1" "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/utils" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -203,22 +202,8 @@ func CheckClusterStatus(result ClusterCheckResult, apiDependencies KubeAPIInterf // returns true if migration is not enabled in the cluster and cluster is using // in-tree vSphere volumes. func checkForIntreePluginUse(result ClusterCheckResult, apiDependencies KubeAPIInterface) (ClusterCheckStatus, ClusterCheckResult) { - storageCR, err := apiDependencies.GetStorage(storageOperatorName) - if err != nil { - reason := fmt.Errorf("vsphere csi driver installed failed with %s, unable to verify storage status: %v", result.Reason, err) - return ClusterCheckDegrade, MakeClusterDegradedError(CheckStatusOpenshiftAPIError, reason) - } - - klog.V(2).Infof("Checking for intree plugin use") - allGoodCheckResult := MakeClusterCheckResultPass() - // migration is enabled and hence we should be fine - driverName := storageCR.Spec.VSphereStorageDriver - if driverName == v1.CSIWithMigrationDriver { - return ClusterCheckAllGood, allGoodCheckResult - } - pvs, err := apiDependencies.ListPersistentVolumes() if err != nil { reason := fmt.Errorf("vsphere csi driver installed failed with %s, unable to list pvs: %v", result.Reason, err) diff --git a/pkg/operator/vspherecontroller/checks/check_error_test.go b/pkg/operator/vspherecontroller/checks/check_error_test.go index de3d5e4a6..210854d47 100644 --- a/pkg/operator/vspherecontroller/checks/check_error_test.go +++ b/pkg/operator/vspherecontroller/checks/check_error_test.go @@ -4,7 +4,6 @@ import ( "fmt" "testing" - opv1 "github.com/openshift/api/operator/v1" "github.com/openshift/vmware-vsphere-csi-driver-operator/pkg/operator/testlib" "k8s.io/apimachinery/pkg/runtime" ) @@ -14,30 +13,19 @@ func TestCheckForIntreePluginUse(t *testing.T) { name string initialObjects []runtime.Object configObjects runtime.Object - storageCR *opv1.Storage clusterCSIDriverObject *testlib.FakeDriverInstance expectedBlockResult ClusterCheckStatus }{ { - name: "when csi migration is enabled", + name: "when intree pvs exist", clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), - storageCR: testlib.GetStorageOperator(opv1.CSIWithMigrationDriver), - initialObjects: []runtime.Object{}, - configObjects: runtime.Object(testlib.GetInfraObject()), - expectedBlockResult: ClusterCheckAllGood, - }, - { - name: "when csi migration is not enabled, but intree pvs exist", - clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), - storageCR: testlib.GetStorageOperator(""), initialObjects: []runtime.Object{testlib.GetIntreePV("intree-pv"), testlib.GetNodeWithInlinePV("foobar", false /*hasinline volume*/)}, configObjects: runtime.Object(testlib.GetInfraObject()), expectedBlockResult: ClusterCheckUpgradesBlockedViaAdminAck, }, { - name: "when csi migration is not enabled and inline volumes exist", + name: "when inline volumes exist", clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), - storageCR: testlib.GetStorageOperator(""), initialObjects: []runtime.Object{testlib.GetNodeWithInlinePV("foobar", true /*hasinline volume*/)}, configObjects: runtime.Object(testlib.GetInfraObject()), expectedBlockResult: ClusterCheckUpgradesBlockedViaAdminAck, @@ -45,7 +33,6 @@ func TestCheckForIntreePluginUse(t *testing.T) { { name: "when csi migration is not enabled but no intree volumes exist", clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), - storageCR: testlib.GetStorageOperator(""), initialObjects: []runtime.Object{}, configObjects: runtime.Object(testlib.GetInfraObject()), expectedBlockResult: ClusterCheckAllGood, @@ -60,7 +47,6 @@ func TestCheckForIntreePluginUse(t *testing.T) { testlib.AddClusterCSIDriverClient(commonApiClient, clusterCSIDriver) test.initialObjects = append(test.initialObjects, clusterCSIDriver) - test.initialObjects = append(test.initialObjects, test.storageCR) stopCh := make(chan struct{}) defer close(stopCh) @@ -76,7 +62,6 @@ func TestCheckForIntreePluginUse(t *testing.T) { CSINodeLister: commonApiClient.KubeInformers.InformersFor("").Storage().V1().CSINodes().Lister(), CSIDriverLister: commonApiClient.KubeInformers.InformersFor("").Storage().V1().CSIDrivers().Lister(), NodeLister: commonApiClient.NodeInformer.Lister(), - StorageLister: commonApiClient.OCPOperatorInformers.Operator().V1().Storages().Lister(), PvLister: commonApiClient.KubeInformers.InformersFor("").Core().V1().PersistentVolumes().Lister(), } diff --git a/pkg/operator/vspherecontroller/vspherecontroller.go b/pkg/operator/vspherecontroller/vspherecontroller.go index e1889c70e..d7bd700d6 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller.go +++ b/pkg/operator/vspherecontroller/vspherecontroller.go @@ -397,7 +397,6 @@ func (c *VSphereController) getCheckAPIDependency(infra *ocpv1.Infrastructure) c CSINodeLister: c.csiNodeLister, CSIDriverLister: c.csiDriverLister, NodeLister: c.nodeLister, - StorageLister: c.storageLister, PvLister: c.pvLister, } return checkerApiClient diff --git a/pkg/operator/vspherecontroller/vspherecontroller_test.go b/pkg/operator/vspherecontroller/vspherecontroller_test.go index 685aa8297..a5741bc80 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller_test.go +++ b/pkg/operator/vspherecontroller/vspherecontroller_test.go @@ -37,7 +37,6 @@ func newVsphereController(apiClients *utils.APIClient) *VSphereController { csiDriverLister := kubeInformers.InformersFor("").Storage().V1().CSIDrivers().Lister() csiNodeLister := kubeInformers.InformersFor("").Storage().V1().CSINodes().Lister() nodeLister := apiClients.NodeInformer.Lister() - storageLister := apiClients.OCPOperatorInformers.Operator().V1().Storages().Lister() pvLister := kubeInformers.InformersFor("").Core().V1().PersistentVolumes().Lister() rc := events.NewInMemoryRecorder(testControllerName) @@ -341,32 +340,8 @@ func TestSync(t *testing.T) { operandStarted: true, storageClassCreated: true, }, - { - name: "when upgrade is blocked because CSI migration is disabled", - storageCR: testlib.GetStorageOperator(opv1.LegacyDeprecatedInTreeDriver), - clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), - vcenterVersion: "7.0.2", - hostVersion: "7.0.2", - startingNodeHardwareVersions: []string{"vmx-15", "vmx-15"}, - initialObjects: []runtime.Object{testlib.GetConfigMap(), testlib.GetSecret()}, - configObjects: runtime.Object(testlib.GetInfraObject()), - expectedConditions: []opv1.OperatorCondition{ - { - Type: testControllerName + opv1.OperatorStatusTypeAvailable, - Status: opv1.ConditionTrue, - }, - { - Type: testControllerName + opv1.OperatorStatusTypeUpgradeable, - Status: opv1.ConditionFalse, - }, - }, - expectedMetrics: `vsphere_csi_driver_error{condition="upgrade_blocked",failure_reason="csi_migration_disabled"} 1`, - operandStarted: true, - storageClassCreated: true, - }, { name: "when upgrade is blocked via admin-ack gate", - storageCR: testlib.GetStorageOperator(""), clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), vcenterVersion: "7.0.2", hostVersion: "7.0.2", @@ -389,7 +364,6 @@ func TestSync(t *testing.T) { }, { name: "should remove admin-ack key if cluster meets requirement", - storageCR: testlib.GetStorageOperator(""), clusterCSIDriverObject: testlib.MakeFakeDriverInstance(), vcenterVersion: "7.0.3", hostVersion: "7.0.3", From 6d282b78dcb4d5af123cf0f94ece47fcca4f7411 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Mon, 25 Sep 2023 15:44:15 -0400 Subject: [PATCH 6/7] Use message from check for adding to conditions Also tweak wording for 7 and 8 series --- .../vspherecontroller/checks/check_nodes.go | 4 +-- .../checks/check_patched_vcenter.go | 30 ++++++++++--------- .../checks/check_patched_vcenter_test.go | 2 +- .../vspherecontroller/vspherecontroller.go | 6 ++++ .../vspherecontroller_test.go | 14 +++++++-- 5 files changed, 37 insertions(+), 19 deletions(-) diff --git a/pkg/operator/vspherecontroller/checks/check_nodes.go b/pkg/operator/vspherecontroller/checks/check_nodes.go index a76f0dc0a..e4c469dc8 100644 --- a/pkg/operator/vspherecontroller/checks/check_nodes.go +++ b/pkg/operator/vspherecontroller/checks/check_nodes.go @@ -173,13 +173,13 @@ func (n *NodeChecker) checkOnNode(workInfo nodeChannelWorkData) ClusterCheckResu klog.V(2).Infof("checking for patched version of ESXi for CSI migration: %s-%s", hostAPIVersion, esxiBuildNumber) - meetsMigrationPatchVersionRequirement, err := checkForMinimumPatchedVersion(hostAPIVersion, esxiBuildNumber) + meetsMigrationPatchVersionRequirement, minVersionString, err := checkForMinimumPatchedVersion(hostAPIVersion, esxiBuildNumber) if err != nil { klog.Errorf("error parsing host version for node %s and host %s: %v", node.Name, hostName, err) } if !meetsMigrationPatchVersionRequirement { - reason := fmt.Errorf("host %s is on ESXI version %s-%s, which is below minimum required version %s-%d for csi migration", hostName, hostAPIVersion, esxiBuildNumber, minPatchedVersion7Series, minBuildNumber7Series) + reason := fmt.Errorf("host %s is on ESXI version %s-%s, minimum required for CSI migration: %s", hostName, hostAPIVersion, esxiBuildNumber, minVersionString) return makeBuggyEnvironmentError(CheckStatusBuggyMigrationPlatform, reason) } diff --git a/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go index c2614beac..868dce678 100644 --- a/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go +++ b/pkg/operator/vspherecontroller/checks/check_patched_vcenter.go @@ -13,7 +13,6 @@ const ( minPatchedVersion7Series = "7.0.3" minBuildNumber7Series = 21424296 - // TODO: Fix with real version information minPatchedVersion8Series = "8.0.1" minBuildNumber8Series = 22088125 ) @@ -29,57 +28,60 @@ func (v *PatchedVcenterChecker) Check(ctx context.Context, checkOpts CheckArgs) klog.V(2).Infof("checking for patched version of vSphere for CSI migration: %s-%s", vcenterAPIVersion, buildVersion) - hasMin, err := checkForMinimumPatchedVersion(vcenterAPIVersion, buildVersion) + hasMin, minString, err := checkForMinimumPatchedVersion(vcenterAPIVersion, buildVersion) if err != nil { return []ClusterCheckResult{makeDeprecatedEnvironmentError(CheckStatusVcenterAPIError, err)} } if !hasMin { - reason := fmt.Errorf("found version of vCenter which has bugs related to CSI migration. Minimum required, version: %s, build: %d", minPatchedVersion7Series, minBuildNumber7Series) + reason := fmt.Errorf("found version of vCenter which has bugs related to CSI migration. Minimum required, %s", minString) return []ClusterCheckResult{makeBuggyEnvironmentError(CheckStatusBuggyMigrationPlatform, reason)} } return []ClusterCheckResult{} } -func checkForMinimumPatchedVersion(vSphereVersion string, build string) (bool, error) { +func checkForMinimumPatchedVersion(vSphereVersion string, build string) (bool, string, error) { hasMinimumApiVersion, err := isMinimumVersion(minPatchedVersion7Series, vSphereVersion) + min7SeriesString := fmt.Sprintf(">= %s-%d", minPatchedVersion7Series, minBuildNumber7Series) + if err != nil { - return true, err + return true, min7SeriesString, err } if !hasMinimumApiVersion { - return false, nil + return false, min7SeriesString, nil } if strings.HasPrefix(vSphereVersion, "8") { hasMinimumApiVersion, err := isMinimumVersion(minPatchedVersion8Series, vSphereVersion) + min8SeriesString := fmt.Sprintf("> %s-%d", minPatchedVersion8Series, minBuildNumber8Series) if err != nil { - return true, err + return true, min8SeriesString, err } if !hasMinimumApiVersion { - return false, nil + return false, min8SeriesString, nil } buildNumber, err := strconv.Atoi(build) if err != nil { - return true, fmt.Errorf("error converting build number %s to integer", build) + return true, min8SeriesString, fmt.Errorf("error converting build number %s to integer", build) } if buildNumber > minBuildNumber8Series { - return true, nil + return true, min8SeriesString, nil } - return false, nil + return false, min8SeriesString, nil } else { buildNumber, err := strconv.Atoi(build) if err != nil { - return true, fmt.Errorf("error converting build number %s to integer", build) + return true, min7SeriesString, fmt.Errorf("error converting build number %s to integer", build) } if buildNumber >= minBuildNumber7Series { - return true, nil + return true, min7SeriesString, nil } - return false, nil + return false, min7SeriesString, nil } } diff --git a/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go b/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go index cf99b7d8f..c10921e75 100644 --- a/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go +++ b/pkg/operator/vspherecontroller/checks/check_patched_vcenter_test.go @@ -50,7 +50,7 @@ func TestCheckForMinimumPatchedVersion(t *testing.T) { for i := range tests { test := tests[i] t.Run(test.name, func(t *testing.T) { - checkFlag, err := checkForMinimumPatchedVersion(test.vSphereVersion, test.buildNumber) + checkFlag, _, err := checkForMinimumPatchedVersion(test.vSphereVersion, test.buildNumber) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/pkg/operator/vspherecontroller/vspherecontroller.go b/pkg/operator/vspherecontroller/vspherecontroller.go index d7bd700d6..5669824e1 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller.go +++ b/pkg/operator/vspherecontroller/vspherecontroller.go @@ -559,6 +559,12 @@ func (c *VSphereController) addUpgradeableBlockCondition( Reason: string(lastCheckResult.CheckStatus), } + // we do not record admin acks in clustercsidriver objects + if lastCheckResult.Action == checks.CheckActionRequiresAdminAck { + blockUpgradeCondition.Message = "All is well" + blockUpgradeCondition.Reason = "AsExpected" + } + oldConditions := status.Conditions for _, condition := range oldConditions { if condition.Type == conditionType { diff --git a/pkg/operator/vspherecontroller/vspherecontroller_test.go b/pkg/operator/vspherecontroller/vspherecontroller_test.go index a5741bc80..2cd361f2c 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller_test.go +++ b/pkg/operator/vspherecontroller/vspherecontroller_test.go @@ -354,8 +354,10 @@ func TestSync(t *testing.T) { Status: opv1.ConditionTrue, }, { - Type: testControllerName + opv1.OperatorStatusTypeUpgradeable, - Status: opv1.ConditionTrue, + Type: testControllerName + opv1.OperatorStatusTypeUpgradeable, + Status: opv1.ConditionTrue, + Reason: "AsExpected", + Message: "All is well", }, }, expectedAdminGateConfigKey: migrationAck413, @@ -485,6 +487,14 @@ func TestSync(t *testing.T) { if matchingCondition.Status != expectedCondition.Status { t.Fatalf("for condition %s: expected status: %v, got: %v", expectedCondition.Type, expectedCondition.Status, matchingCondition.Status) } + + if expectedCondition.Message != "" && expectedCondition.Message != matchingCondition.Message { + t.Fatalf("for condition %s: expected message: %v, got: %v", expectedCondition.Type, expectedCondition.Message, matchingCondition.Message) + } + + if expectedCondition.Reason != "" && expectedCondition.Reason != matchingCondition.Reason { + t.Fatalf("for condition %s: expected reason: %v, got: %v", expectedCondition.Type, expectedCondition.Reason, matchingCondition.Reason) + } } if test.operandStarted != ctrl.operandControllerStarted { From 77ab37fc9302525448cd9e4f7140cf5abdb659f2 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 26 Sep 2023 12:30:14 -0400 Subject: [PATCH 7/7] Change admin-ack key for upgrading to 4.13 --- pkg/operator/vspherecontroller/vspherecontroller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/operator/vspherecontroller/vspherecontroller.go b/pkg/operator/vspherecontroller/vspherecontroller.go index 5669824e1..6f42eeef8 100644 --- a/pkg/operator/vspherecontroller/vspherecontroller.go +++ b/pkg/operator/vspherecontroller/vspherecontroller.go @@ -79,7 +79,7 @@ const ( managedConfigNamespace = "openshift-config-managed" adminGateConfigMap = "admin-gates" - migrationAck413 = "ack-4.13-kube-127-vsphere-migration-in-4.14" + migrationAck413 = "ack-4.12-kube-126-vsphere-migration-in-4.14" ) type conditionalControllerInterface interface {