Skip to content

Commit

Permalink
Add e2e tests for HdHA and verify that they pass locally.
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyzhc committed Jan 29, 2025
1 parent 0f2a2cc commit 08bb32f
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 12 deletions.
194 changes: 194 additions & 0 deletions test/e2e/tests/multi_zone_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,200 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
Expect(err).To(BeNil(), "failed read in zone 1")
Expect(strings.TrimSpace(string(readContents))).To(BeIdenticalTo(testFileContents), "content mismatch in zone 1")
})

It("Should successfully run through entire lifecycle of a HdHA volume on instances in 2 zones", func() {
// Create new driver and client

Expect(hyperdiskTestContexts).NotTo(BeEmpty())

zoneToContext := map[string]*remote.TestContext{}
zones := []string{}

for _, tc := range hyperdiskTestContexts {
_, z, _ := tc.Instance.GetIdentity()
// Zone hasn't been seen before
if _, ok := zoneToContext[z]; !ok {
zoneToContext[z] = tc
zones = append(zones, z)
}
if len(zoneToContext) == 2 {
break
}
}

Expect(len(zoneToContext)).To(Equal(2), "Must have instances in 2 zones")

controllerContext := zoneToContext[zones[0]]
controllerClient := controllerContext.Client
controllerInstance := controllerContext.Instance

p, _, _ := controllerInstance.GetIdentity()

region, err := common.GetRegionFromZones(zones)
Expect(err).To(BeNil(), "Failed to get region from zones")

// Create Disk
volName := testNamePrefix + string(uuid.NewUUID())
volume, err := controllerClient.CreateVolume(volName, map[string]string{
common.ParameterKeyType: common.ParameterHdHADiskType,
}, defaultRepdSizeGb, &csi.TopologyRequirement{
Requisite: []*csi.Topology{
{
Segments: map[string]string{common.TopologyKeyZone: zones[0]},
},
{
Segments: map[string]string{common.TopologyKeyZone: zones[1]},
},
},
}, nil)
Expect(err).To(BeNil(), "CreateVolume failed with error: %v", err)

// Validate Disk Created
cloudDisk, err := computeService.RegionDisks.Get(p, region, volName).Do()
Expect(err).To(BeNil(), "Could not get disk from cloud directly")
Expect(cloudDisk.Type).To(ContainSubstring(hdhaDiskType))
Expect(cloudDisk.Status).To(Equal(readyState))
Expect(cloudDisk.SizeGb).To(Equal(defaultRepdSizeGb))
Expect(cloudDisk.Name).To(Equal(volName))
Expect(len(cloudDisk.ReplicaZones)).To(Equal(2))
zonesSet := sets.NewString(zones...)
for _, replicaZone := range cloudDisk.ReplicaZones {
tokens := strings.Split(replicaZone, "/")
actualZone := tokens[len(tokens)-1]
Expect(zonesSet.Has(actualZone)).To(BeTrue(), "Expected zone %v to exist in zone set %v", actualZone, zones)
}

defer func() {
// Delete Disk
controllerClient.DeleteVolume(volume.VolumeId)
Expect(err).To(BeNil(), "DeleteVolume failed")

// Validate Disk Deleted
_, err = computeService.RegionDisks.Get(p, region, volName).Do()
Expect(gce.IsGCEError(err, "notFound")).To(BeTrue(), "Expected disk to not be found")
}()

// For each of the two instances
i := 0
for _, testContext := range zoneToContext {
err = testAttachWriteReadDetach(volume.VolumeId, volName, testContext.Instance, testContext.Client, false)
Expect(err).To(BeNil(), "failed volume lifecycle checks")
i = i + 1
}
})

It("Should create a HdHA instance, write to it, force-attach it to another instance, and read the same data", func() {
Expect(hyperdiskTestContexts).NotTo(BeEmpty())

zoneToContext := map[string]*remote.TestContext{}
zones := []string{}

for _, tc := range hyperdiskTestContexts {
_, z, _ := tc.Instance.GetIdentity()
// Zone hasn't been seen before
if _, ok := zoneToContext[z]; !ok {
zoneToContext[z] = tc
zones = append(zones, z)
}
if len(zoneToContext) == 2 {
break
}
}

Expect(len(zoneToContext)).To(Equal(2), "Must have instances in 2 zones")

controllerContext := zoneToContext[zones[0]]
controllerClient := controllerContext.Client
controllerInstance := controllerContext.Instance

p, _, _ := controllerInstance.GetIdentity()

region, err := common.GetRegionFromZones(zones)
Expect(err).To(BeNil(), "Failed to get region from zones")

// Create Disk
volName := testNamePrefix + string(uuid.NewUUID())
volume, err := controllerClient.CreateVolume(volName, map[string]string{
common.ParameterKeyType: common.ParameterHdHADiskType,
common.ParameterAvailabilityClass: common.ParameterRegionalHardFailoverClass,
}, defaultRepdSizeGb, &csi.TopologyRequirement{
Requisite: []*csi.Topology{
{
Segments: map[string]string{common.TopologyKeyZone: zones[0]},
},
{
Segments: map[string]string{common.TopologyKeyZone: zones[1]},
},
},
}, nil)
Expect(err).To(BeNil(), "CreateVolume failed with error: %v", err)

// Validate Disk Created
cloudDisk, err := computeService.RegionDisks.Get(p, region, volName).Do()
Expect(err).To(BeNil(), "Could not get disk from cloud directly")
Expect(cloudDisk.Type).To(ContainSubstring(hdhaDiskType))
Expect(cloudDisk.Status).To(Equal(readyState))
Expect(cloudDisk.SizeGb).To(Equal(defaultRepdSizeGb))
Expect(cloudDisk.Name).To(Equal(volName))
Expect(len(cloudDisk.ReplicaZones)).To(Equal(2))
zonesSet := sets.NewString(zones...)
for _, replicaZone := range cloudDisk.ReplicaZones {
tokens := strings.Split(replicaZone, "/")
actualZone := tokens[len(tokens)-1]
Expect(zonesSet.Has(actualZone)).To(BeTrue(), "Expected zone %v to exist in zone set %v", actualZone, zones)
}
Expect(volume.VolumeContext).To(HaveKeyWithValue("force-attach", "true"))

detachers := []detacherFunc{}

defer func() {
// Perform any detaches
for _, fn := range detachers {
fn()
}

// Delete Disk
controllerClient.DeleteVolume(volume.VolumeId)
Expect(err).To(BeNil(), "DeleteVolume failed")

// Validate Disk Deleted
_, err = computeService.RegionDisks.Get(p, region, volName).Do()
Expect(gce.IsGCEError(err, "notFound")).To(BeTrue(), "Expected disk to not be found")
}()

// Attach disk to instance in the first zone.
tc0 := zoneToContext[zones[0]]
err, detacher, args := testAttachAndMount(volume.VolumeId, volName, tc0.Instance, tc0.Client, attachAndMountArgs{
readOnly: false,
useBlock: false,
forceAttach: false,
})
detachers = append(detachers, detacher)
Expect(err).To(BeNil(), "failed attach in zone 0")
testFileName := filepath.Join(args.publishDir, "force-attach-test")
testFileContents := "force attach test"
err = testutils.WriteFile(tc0.Instance, testFileName, testFileContents)
Expect(err).To(BeNil(), "failed write in zone 0")
_, err = tc0.Instance.SSH("sync") // Sync so force detach doesn't lose data.
Expect(err).To(BeNil(), "failed sync")

readContents, err := testutils.ReadFile(tc0.Instance, testFileName)
Expect(err).To(BeNil(), "failed read in zone 0")
Expect(strings.TrimSpace(string(readContents))).To(BeIdenticalTo(testFileContents), "content mismatch in zone 0")

// Now force attach to the second instance without detaching.
tc1 := zoneToContext[zones[1]]
err, detacher, _ = testAttachAndMount(volume.VolumeId, volName, tc1.Instance, tc1.Client, attachAndMountArgs{
readOnly: false,
useBlock: false,
forceAttach: true,
})
detachers = append(detachers, detacher)
Expect(err).To(BeNil(), "failed force attach in zone 1")
readContents, err = testutils.ReadFile(tc1.Instance, testFileName)
Expect(err).To(BeNil(), "failed read in zone 1")
Expect(strings.TrimSpace(string(readContents))).To(BeIdenticalTo(testFileContents), "content mismatch in zone 1")
})
})

func deleteDisk(controllerClient *remote.CsiClient, p, zone, volID, volName string) {
Expand Down
38 changes: 27 additions & 11 deletions test/e2e/tests/setup_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,15 @@ var (
cloudtopHost = flag.Bool("cloudtop-host", false, "The local host is cloudtop, a kind of googler machine with special requirements to access GCP")
extraDriverFlags = flag.String("extra-driver-flags", "", "Extra flags to pass to the driver")
enableConfidentialCompute = flag.Bool("enable-confidential-compute", false, "Create VMs with confidential compute mode. This uses NVMe devices")

testContexts = []*remote.TestContext{}
computeService *compute.Service
computeAlphaService *computealpha.Service
computeBetaService *computebeta.Service
kmsClient *cloudkms.KeyManagementClient
hdMachineType = flag.String("hyperdisk-machine-type", "c3-standard-4", "Type of machine to provision instance on")
hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "sapphirerapids", "Minimum CPU architecture")

testContexts = []*remote.TestContext{}
hyperdiskTestContexts = []*remote.TestContext{}
computeService *compute.Service
computeAlphaService *computealpha.Service
computeBetaService *computebeta.Service
kmsClient *cloudkms.KeyManagementClient
)

func init() {
Expand All @@ -70,7 +73,9 @@ func TestE2E(t *testing.T) {
var _ = BeforeSuite(func() {
var err error
tcc := make(chan *remote.TestContext)
hdtcc := make(chan *remote.TestContext)
defer close(tcc)
defer close(hdtcc)

zones := strings.Split(*zones, ",")

Expand Down Expand Up @@ -101,14 +106,21 @@ var _ = BeforeSuite(func() {
for _, zone := range zones {
go func(curZone string) {
defer GinkgoRecover()
tcc <- NewTestContext(curZone)
tcc <- NewDefaultTestContext(curZone)
}(zone)
go func(curZone string) {
defer GinkgoRecover()
hdtcc <- NewTestContext(curZone, *hdMinCpuPlatform, *hdMachineType)
}(zone)
}

for i := 0; i < len(zones); i++ {
tc := <-tcc
testContexts = append(testContexts, tc)
klog.Infof("Added TestContext for node %s", tc.Instance.GetName())
tc = <-hdtcc
hyperdiskTestContexts = append(hyperdiskTestContexts, tc)
klog.Infof("Added TestContext for node %s", tc.Instance.GetName())
}
})

Expand All @@ -133,17 +145,21 @@ func getDriverConfig() testutils.DriverConfig {
}
}

func NewTestContext(zone string) *remote.TestContext {
nodeID := fmt.Sprintf("%s-%s", *vmNamePrefix, zone)
func NewDefaultTestContext(zone string) *remote.TestContext {
return NewTestContext(zone, *minCpuPlatform, *machineType)
}

func NewTestContext(zone, minCpuPlatform, machineType string) *remote.TestContext {
nodeID := fmt.Sprintf("%s-%s-%s", *vmNamePrefix, zone, machineType)
klog.Infof("Setting up node %s", nodeID)

instanceConfig := remote.InstanceConfig{
Project: *project,
Architecture: *architecture,
MinCpuPlatform: *minCpuPlatform,
MinCpuPlatform: minCpuPlatform,
Zone: zone,
Name: nodeID,
MachineType: *machineType,
MachineType: machineType,
ServiceAccount: *serviceAccount,
ImageURL: *imageURL,
CloudtopHost: *cloudtopHost,
Expand Down
1 change: 1 addition & 0 deletions test/e2e/tests/single_zone_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ const (
hdxDiskType = "hyperdisk-extreme"
hdtDiskType = "hyperdisk-throughput"
hdmlDiskType = "hyperdisk-ml"
hdhaDiskType = "hyperdisk-balanced-high-availability"
provisionedIOPSOnCreate = "12345"
provisionedIOPSOnCreateInt = int64(12345)
provisionedIOPSOnCreateDefaultInt = int64(100000)
Expand Down
2 changes: 1 addition & 1 deletion test/run-e2e-local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ if hostname | grep -q c.googlers.com ; then
CLOUDTOP_HOST=--cloudtop-host
fi

ginkgo --v "test/e2e/tests" -- --project "${PROJECT}" --service-account "${IAM_NAME}" "${CLOUDTOP_HOST}" --v=6 --logtostderr $@
ginkgo --focus "HdHA" --v "test/e2e/tests" -- --project "${PROJECT}" --service-account "${IAM_NAME}" "${CLOUDTOP_HOST}" --v=6 --logtostderr $@

0 comments on commit 08bb32f

Please sign in to comment.