From 08bb32f2decc8daab1b5aa886e94ee5dcb6fefbc Mon Sep 17 00:00:00 2001 From: Hanchi Zhang Date: Wed, 29 Jan 2025 18:56:19 +0000 Subject: [PATCH] Add e2e tests for HdHA and verify that they pass locally. --- test/e2e/tests/multi_zone_e2e_test.go | 194 +++++++++++++++++++++++++ test/e2e/tests/setup_e2e_test.go | 38 +++-- test/e2e/tests/single_zone_e2e_test.go | 1 + test/run-e2e-local.sh | 2 +- 4 files changed, 223 insertions(+), 12 deletions(-) diff --git a/test/e2e/tests/multi_zone_e2e_test.go b/test/e2e/tests/multi_zone_e2e_test.go index fb1ea32dd..492cb7cac 100644 --- a/test/e2e/tests/multi_zone_e2e_test.go +++ b/test/e2e/tests/multi_zone_e2e_test.go @@ -1028,6 +1028,200 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() { Expect(err).To(BeNil(), "failed read in zone 1") Expect(strings.TrimSpace(string(readContents))).To(BeIdenticalTo(testFileContents), "content mismatch in zone 1") }) + + It("Should successfully run through entire lifecycle of a HdHA volume on instances in 2 zones", func() { + // Create new driver and client + + Expect(hyperdiskTestContexts).NotTo(BeEmpty()) + + zoneToContext := map[string]*remote.TestContext{} + zones := []string{} + + for _, tc := range hyperdiskTestContexts { + _, z, _ := tc.Instance.GetIdentity() + // Zone hasn't been seen before + if _, ok := zoneToContext[z]; !ok { + zoneToContext[z] = tc + zones = append(zones, z) + } + if len(zoneToContext) == 2 { + break + } + } + + Expect(len(zoneToContext)).To(Equal(2), "Must have instances in 2 zones") + + controllerContext := zoneToContext[zones[0]] + controllerClient := controllerContext.Client + controllerInstance := controllerContext.Instance + + p, _, _ := controllerInstance.GetIdentity() + + region, err := common.GetRegionFromZones(zones) + Expect(err).To(BeNil(), "Failed to get region from zones") + + // Create Disk + volName := testNamePrefix + string(uuid.NewUUID()) + volume, err := controllerClient.CreateVolume(volName, map[string]string{ + common.ParameterKeyType: common.ParameterHdHADiskType, + }, defaultRepdSizeGb, &csi.TopologyRequirement{ + Requisite: []*csi.Topology{ + { + Segments: map[string]string{common.TopologyKeyZone: zones[0]}, + }, + { + Segments: map[string]string{common.TopologyKeyZone: zones[1]}, + }, + }, + }, nil) + Expect(err).To(BeNil(), "CreateVolume failed with error: %v", err) + + // Validate Disk Created + cloudDisk, err := computeService.RegionDisks.Get(p, region, volName).Do() + Expect(err).To(BeNil(), "Could not get disk from cloud directly") + Expect(cloudDisk.Type).To(ContainSubstring(hdhaDiskType)) + Expect(cloudDisk.Status).To(Equal(readyState)) + Expect(cloudDisk.SizeGb).To(Equal(defaultRepdSizeGb)) + Expect(cloudDisk.Name).To(Equal(volName)) + Expect(len(cloudDisk.ReplicaZones)).To(Equal(2)) + zonesSet := sets.NewString(zones...) + for _, replicaZone := range cloudDisk.ReplicaZones { + tokens := strings.Split(replicaZone, "/") + actualZone := tokens[len(tokens)-1] + Expect(zonesSet.Has(actualZone)).To(BeTrue(), "Expected zone %v to exist in zone set %v", actualZone, zones) + } + + defer func() { + // Delete Disk + controllerClient.DeleteVolume(volume.VolumeId) + Expect(err).To(BeNil(), "DeleteVolume failed") + + // Validate Disk Deleted + _, err = computeService.RegionDisks.Get(p, region, volName).Do() + Expect(gce.IsGCEError(err, "notFound")).To(BeTrue(), "Expected disk to not be found") + }() + + // For each of the two instances + i := 0 + for _, testContext := range zoneToContext { + err = testAttachWriteReadDetach(volume.VolumeId, volName, testContext.Instance, testContext.Client, false) + Expect(err).To(BeNil(), "failed volume lifecycle checks") + i = i + 1 + } + }) + + It("Should create a HdHA instance, write to it, force-attach it to another instance, and read the same data", func() { + Expect(hyperdiskTestContexts).NotTo(BeEmpty()) + + zoneToContext := map[string]*remote.TestContext{} + zones := []string{} + + for _, tc := range hyperdiskTestContexts { + _, z, _ := tc.Instance.GetIdentity() + // Zone hasn't been seen before + if _, ok := zoneToContext[z]; !ok { + zoneToContext[z] = tc + zones = append(zones, z) + } + if len(zoneToContext) == 2 { + break + } + } + + Expect(len(zoneToContext)).To(Equal(2), "Must have instances in 2 zones") + + controllerContext := zoneToContext[zones[0]] + controllerClient := controllerContext.Client + controllerInstance := controllerContext.Instance + + p, _, _ := controllerInstance.GetIdentity() + + region, err := common.GetRegionFromZones(zones) + Expect(err).To(BeNil(), "Failed to get region from zones") + + // Create Disk + volName := testNamePrefix + string(uuid.NewUUID()) + volume, err := controllerClient.CreateVolume(volName, map[string]string{ + common.ParameterKeyType: common.ParameterHdHADiskType, + common.ParameterAvailabilityClass: common.ParameterRegionalHardFailoverClass, + }, defaultRepdSizeGb, &csi.TopologyRequirement{ + Requisite: []*csi.Topology{ + { + Segments: map[string]string{common.TopologyKeyZone: zones[0]}, + }, + { + Segments: map[string]string{common.TopologyKeyZone: zones[1]}, + }, + }, + }, nil) + Expect(err).To(BeNil(), "CreateVolume failed with error: %v", err) + + // Validate Disk Created + cloudDisk, err := computeService.RegionDisks.Get(p, region, volName).Do() + Expect(err).To(BeNil(), "Could not get disk from cloud directly") + Expect(cloudDisk.Type).To(ContainSubstring(hdhaDiskType)) + Expect(cloudDisk.Status).To(Equal(readyState)) + Expect(cloudDisk.SizeGb).To(Equal(defaultRepdSizeGb)) + Expect(cloudDisk.Name).To(Equal(volName)) + Expect(len(cloudDisk.ReplicaZones)).To(Equal(2)) + zonesSet := sets.NewString(zones...) + for _, replicaZone := range cloudDisk.ReplicaZones { + tokens := strings.Split(replicaZone, "/") + actualZone := tokens[len(tokens)-1] + Expect(zonesSet.Has(actualZone)).To(BeTrue(), "Expected zone %v to exist in zone set %v", actualZone, zones) + } + Expect(volume.VolumeContext).To(HaveKeyWithValue("force-attach", "true")) + + detachers := []detacherFunc{} + + defer func() { + // Perform any detaches + for _, fn := range detachers { + fn() + } + + // Delete Disk + controllerClient.DeleteVolume(volume.VolumeId) + Expect(err).To(BeNil(), "DeleteVolume failed") + + // Validate Disk Deleted + _, err = computeService.RegionDisks.Get(p, region, volName).Do() + Expect(gce.IsGCEError(err, "notFound")).To(BeTrue(), "Expected disk to not be found") + }() + + // Attach disk to instance in the first zone. + tc0 := zoneToContext[zones[0]] + err, detacher, args := testAttachAndMount(volume.VolumeId, volName, tc0.Instance, tc0.Client, attachAndMountArgs{ + readOnly: false, + useBlock: false, + forceAttach: false, + }) + detachers = append(detachers, detacher) + Expect(err).To(BeNil(), "failed attach in zone 0") + testFileName := filepath.Join(args.publishDir, "force-attach-test") + testFileContents := "force attach test" + err = testutils.WriteFile(tc0.Instance, testFileName, testFileContents) + Expect(err).To(BeNil(), "failed write in zone 0") + _, err = tc0.Instance.SSH("sync") // Sync so force detach doesn't lose data. + Expect(err).To(BeNil(), "failed sync") + + readContents, err := testutils.ReadFile(tc0.Instance, testFileName) + Expect(err).To(BeNil(), "failed read in zone 0") + Expect(strings.TrimSpace(string(readContents))).To(BeIdenticalTo(testFileContents), "content mismatch in zone 0") + + // Now force attach to the second instance without detaching. + tc1 := zoneToContext[zones[1]] + err, detacher, _ = testAttachAndMount(volume.VolumeId, volName, tc1.Instance, tc1.Client, attachAndMountArgs{ + readOnly: false, + useBlock: false, + forceAttach: true, + }) + detachers = append(detachers, detacher) + Expect(err).To(BeNil(), "failed force attach in zone 1") + readContents, err = testutils.ReadFile(tc1.Instance, testFileName) + Expect(err).To(BeNil(), "failed read in zone 1") + Expect(strings.TrimSpace(string(readContents))).To(BeIdenticalTo(testFileContents), "content mismatch in zone 1") + }) }) func deleteDisk(controllerClient *remote.CsiClient, p, zone, volID, volName string) { diff --git a/test/e2e/tests/setup_e2e_test.go b/test/e2e/tests/setup_e2e_test.go index 2e1c8cf51..ebe1a920a 100644 --- a/test/e2e/tests/setup_e2e_test.go +++ b/test/e2e/tests/setup_e2e_test.go @@ -49,12 +49,15 @@ var ( cloudtopHost = flag.Bool("cloudtop-host", false, "The local host is cloudtop, a kind of googler machine with special requirements to access GCP") extraDriverFlags = flag.String("extra-driver-flags", "", "Extra flags to pass to the driver") enableConfidentialCompute = flag.Bool("enable-confidential-compute", false, "Create VMs with confidential compute mode. This uses NVMe devices") - - testContexts = []*remote.TestContext{} - computeService *compute.Service - computeAlphaService *computealpha.Service - computeBetaService *computebeta.Service - kmsClient *cloudkms.KeyManagementClient + hdMachineType = flag.String("hyperdisk-machine-type", "c3-standard-4", "Type of machine to provision instance on") + hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "sapphirerapids", "Minimum CPU architecture") + + testContexts = []*remote.TestContext{} + hyperdiskTestContexts = []*remote.TestContext{} + computeService *compute.Service + computeAlphaService *computealpha.Service + computeBetaService *computebeta.Service + kmsClient *cloudkms.KeyManagementClient ) func init() { @@ -70,7 +73,9 @@ func TestE2E(t *testing.T) { var _ = BeforeSuite(func() { var err error tcc := make(chan *remote.TestContext) + hdtcc := make(chan *remote.TestContext) defer close(tcc) + defer close(hdtcc) zones := strings.Split(*zones, ",") @@ -101,7 +106,11 @@ var _ = BeforeSuite(func() { for _, zone := range zones { go func(curZone string) { defer GinkgoRecover() - tcc <- NewTestContext(curZone) + tcc <- NewDefaultTestContext(curZone) + }(zone) + go func(curZone string) { + defer GinkgoRecover() + hdtcc <- NewTestContext(curZone, *hdMinCpuPlatform, *hdMachineType) }(zone) } @@ -109,6 +118,9 @@ var _ = BeforeSuite(func() { tc := <-tcc testContexts = append(testContexts, tc) klog.Infof("Added TestContext for node %s", tc.Instance.GetName()) + tc = <-hdtcc + hyperdiskTestContexts = append(hyperdiskTestContexts, tc) + klog.Infof("Added TestContext for node %s", tc.Instance.GetName()) } }) @@ -133,17 +145,21 @@ func getDriverConfig() testutils.DriverConfig { } } -func NewTestContext(zone string) *remote.TestContext { - nodeID := fmt.Sprintf("%s-%s", *vmNamePrefix, zone) +func NewDefaultTestContext(zone string) *remote.TestContext { + return NewTestContext(zone, *minCpuPlatform, *machineType) +} + +func NewTestContext(zone, minCpuPlatform, machineType string) *remote.TestContext { + nodeID := fmt.Sprintf("%s-%s-%s", *vmNamePrefix, zone, machineType) klog.Infof("Setting up node %s", nodeID) instanceConfig := remote.InstanceConfig{ Project: *project, Architecture: *architecture, - MinCpuPlatform: *minCpuPlatform, + MinCpuPlatform: minCpuPlatform, Zone: zone, Name: nodeID, - MachineType: *machineType, + MachineType: machineType, ServiceAccount: *serviceAccount, ImageURL: *imageURL, CloudtopHost: *cloudtopHost, diff --git a/test/e2e/tests/single_zone_e2e_test.go b/test/e2e/tests/single_zone_e2e_test.go index c77c7b911..da83ad0b5 100644 --- a/test/e2e/tests/single_zone_e2e_test.go +++ b/test/e2e/tests/single_zone_e2e_test.go @@ -64,6 +64,7 @@ const ( hdxDiskType = "hyperdisk-extreme" hdtDiskType = "hyperdisk-throughput" hdmlDiskType = "hyperdisk-ml" + hdhaDiskType = "hyperdisk-balanced-high-availability" provisionedIOPSOnCreate = "12345" provisionedIOPSOnCreateInt = int64(12345) provisionedIOPSOnCreateDefaultInt = int64(100000) diff --git a/test/run-e2e-local.sh b/test/run-e2e-local.sh index aaf9a65d5..c0f25188f 100755 --- a/test/run-e2e-local.sh +++ b/test/run-e2e-local.sh @@ -16,4 +16,4 @@ if hostname | grep -q c.googlers.com ; then CLOUDTOP_HOST=--cloudtop-host fi -ginkgo --v "test/e2e/tests" -- --project "${PROJECT}" --service-account "${IAM_NAME}" "${CLOUDTOP_HOST}" --v=6 --logtostderr $@ +ginkgo --focus "HdHA" --v "test/e2e/tests" -- --project "${PROJECT}" --service-account "${IAM_NAME}" "${CLOUDTOP_HOST}" --v=6 --logtostderr $@