Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/common/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ func isGoogleAPIError(err error) (codes.Code, error) {
}

func loggedErrorForCode(msg string, code codes.Code, err error) error {
klog.Errorf(msg+"%v", err.Error())
klog.Errorf("%v: %s %v", code, msg, err.Error())
return status.Errorf(code, msg+"%v", err.Error())
}

Expand Down
13 changes: 9 additions & 4 deletions pkg/gce-cloud-provider/compute/gce-compute.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ func (cloud *CloudProvider) insertConstructedDisk(ctx context.Context, disk *com
}

if filterErr := cloud.processDiskAlreadyExistErr(ctx, err, project, volKey, params, capacityRange, multiWriter, accessMode); filterErr != nil {
return common.NewTemporaryError(codes.Unavailable, fmt.Errorf("unknown error when polling the operation: %w", err))
return common.NewTemporaryError(codes.Unavailable, fmt.Errorf("unknown error when polling the operation: %w; filter error %v", err, filterErr))
}
return nil
}
Expand Down Expand Up @@ -1063,7 +1063,7 @@ func (cloud *CloudProvider) waitForZonalOp(ctx context.Context, project, opName
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
waitOp, err := cloud.service.ZoneOperations.Wait(project, zone, opName).Context(ctx).Do()
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
if err != nil && waitOp.HttpErrorStatusCode == 503 {
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
klog.Errorf("WaitForZonalOp(op: %s, zone: %#v, err: %v) failed to poll the operation", opName, zone, err)
return false, nil
}
Expand All @@ -1080,7 +1080,7 @@ func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opNa
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
waitOp, err := cloud.service.RegionOperations.Wait(project, region, opName).Context(ctx).Do()
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
if err != nil && waitOp.HttpErrorStatusCode == 503 {
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
klog.Errorf("WaitForRegionalOp(op: %s, region: %#v, err: %v) failed to poll the operation", opName, region, err)
return false, nil
}
Expand All @@ -1089,6 +1089,10 @@ func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opNa
return false, err
}
done, err := opIsDone(waitOp)
if err != nil && status.Code(err) == codes.Unavailable {
klog.Errorf("retriable error in op: %+v", err)
return false, nil
}
return done, err
})
}
Expand All @@ -1097,7 +1101,7 @@ func (cloud *CloudProvider) waitForGlobalOp(ctx context.Context, project, opName
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
waitOp, err := cloud.service.GlobalOperations.Wait(project, opName).Context(ctx).Do()
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
if err != nil && waitOp.HttpErrorStatusCode == 503 {
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
klog.Errorf("WaitForGlobalOp(op: %s, err: %v) failed to poll the operation", opName, err)
return false, nil
}
Expand Down Expand Up @@ -1197,6 +1201,7 @@ func codeForGCEOpError(err computev1.OperationErrorErrors) codes.Code {
"QUOTA_EXCEEDED": codes.ResourceExhausted,
"ZONE_RESOURCE_POOL_EXHAUSTED": codes.Unavailable,
"ZONE_RESOURCE_POOL_EXHAUSTED_WITH_DETAILS": codes.Unavailable,
"INTERNAL_ERROR": codes.Unavailable,
"REGION_QUOTA_EXCEEDED": codes.ResourceExhausted,
"RATE_LIMIT_EXCEEDED": codes.ResourceExhausted,
"INVALID_USAGE": codes.InvalidArgument,
Expand Down
2 changes: 1 addition & 1 deletion pkg/gce-pd-csi-driver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ func (gceCS *GCEControllerServer) createSingleDeviceDisk(ctx context.Context, re

disk, err := gceCS.createSingleDisk(ctx, req, params, volKey, zones, accessMode)
if err != nil {
return nil, common.LoggedError("CreateVolume failed: %v", err)
return nil, common.LoggedError("CreateVolume failed: ", err)
}

return gceCS.generateCreateVolumeResponseWithVolumeId(disk, zones, params, dataCacheParams, enableDataCache, volumeID), nil
Expand Down
9 changes: 7 additions & 2 deletions test/e2e/tests/multi_zone_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func checkSkipMultiZoneTests() {
// TODO: Remove this once hyperdisk-ml SKU is supported
// If you want to run these tests, set the env variable: RUN_MULTI_ZONE_TESTS=true
if !runMultiZoneTests() {
Skip("Not running multi-zone tests, as RUN_MULTI_ZONE_TESTS is falsy")
Skip("Not running multi-zone tests without RUN_MULTI_ZONE_TESTS=true")
}
}

Expand Down Expand Up @@ -1235,8 +1235,9 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
})

It("Should successfully run through entire lifecycle of a HdHA volume on instances in 2 zones", func() {
// Create new driver and client
Skip("Flaking on GCP errors. Google internal bug: 463743704")

// Create new driver and client
Expect(hyperdiskTestContexts).NotTo(BeEmpty())

zoneToContext := map[string]*remote.TestContext{}
Expand All @@ -1255,10 +1256,12 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
}

Expect(len(zoneToContext)).To(Equal(2), "Must have instances in 2 zones")
klog.Infof("Using zones %s and %s", zones[0], zones[1])

controllerContext := zoneToContext[zones[0]]
controllerClient := controllerContext.Client
controllerInstance := controllerContext.Instance
klog.Infof("Using controller instance %v", controllerInstance.GetName())

p, _, _ := controllerInstance.GetIdentity()

Expand Down Expand Up @@ -1321,6 +1324,8 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
})

It("Should create a HdHA instance, write to it, force-attach it to another instance, and read the same data", func() {
Skip("Flaking on GCP errors. Google internal bug: 463743704")

Expect(hyperdiskTestContexts).NotTo(BeEmpty())

zoneToContext := map[string]*remote.TestContext{}
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/tests/setup_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ var (
// Multi-writer is only supported on M3, C3, and N4
// https://cloud.google.com/compute/docs/disks/sharing-disks-between-vms#hd-multi-writer
hdMachineType = flag.String("hyperdisk-machine-type", "c3-standard-4", "Type of machine to provision instance on, or `none' to skip")
hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "sapphirerapids", "Minimum CPU architecture")
hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "Intel Sapphire Rapids", "Minimum CPU architecture")

// Some architectures don't have local ssd. Give way to opt out of tests like datacache.
skipLocalSsdTests = flag.Bool("skip-local-ssd-tests", false, "Skip local ssd tests like datacache")
Expand Down