Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/gce-pd-csi-driver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ func handle() {
MetricsManager: metricsManager,
DeviceCache: deviceCache,
EnableDynamicVolumes: *dynamicVolumes,
NodeName: *nodeName,
}
nodeServer = driver.NewNodeServer(gceDriver, mounter, deviceUtils, meta, statter, nsArgs)

Expand Down
1 change: 1 addition & 0 deletions pkg/gce-pd-csi-driver/gce-pd-driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ func NewNodeServer(gceDriver *GCEDriver, mounter *mount.SafeFormatAndMount, devi
metricsManager: args.MetricsManager,
DeviceCache: args.DeviceCache,
EnableDynamicVolumes: args.EnableDynamicVolumes,
nodeName: args.NodeName,
}
}

Expand Down
18 changes: 15 additions & 3 deletions pkg/gce-pd-csi-driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type GCENodeServer struct {
EnableDataCache bool
DataCacheEnabledNodePool bool
SysfsPath string
nodeName string

// A map storing all volumes with ongoing operations so that additional operations
// for that same volume (as defined by VolumeID) return an Aborted error
Expand Down Expand Up @@ -103,6 +104,8 @@ type NodeServerArgs struct {
// SysfsPath defaults to "/sys", except if it's a unit test.
SysfsPath string

NodeName string

MetricsManager *metrics.MetricsManager
DeviceCache *linkcache.DeviceCache

Expand Down Expand Up @@ -187,6 +190,15 @@ func (ns *GCENodeServer) WithSerializedFormatAndMount(timeout time.Duration, max
return ns
}

// GetNodeName returns the node name, prioritizing the override value (from Downward API)
// over the metadata service if available.
func (ns *GCENodeServer) GetNodeName() string {
if ns.nodeName != "" {
return ns.nodeName
}
return ns.MetadataService.GetName()
}

func (ns *GCENodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
// Validate Arguments
targetPath := req.GetTargetPath()
Expand Down Expand Up @@ -731,9 +743,9 @@ func (ns *GCENodeServer) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRe
Segments: map[string]string{constants.TopologyKeyZone: ns.MetadataService.GetZone()},
}

node, err := k8sclient.GetNodeWithRetry(ctx, ns.MetadataService.GetName())
node, err := k8sclient.GetNodeWithRetry(ctx, ns.GetNodeName())
if err != nil {
klog.Errorf("Failed to get node %s: %v. The error is ignored so that the driver can register", ns.MetadataService.GetName(), err.Error())
klog.Errorf("Failed to get node %s: %v. The error is ignored so that the driver can register", ns.GetNodeName(), err.Error())
}

if ns.EnableDynamicVolumes {
Expand All @@ -747,7 +759,7 @@ func (ns *GCENodeServer) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRe
}
}

nodeID := common.CreateNodeID(ns.MetadataService.GetProject(), ns.MetadataService.GetZone(), ns.MetadataService.GetName())
nodeID := common.CreateNodeID(ns.MetadataService.GetProject(), ns.MetadataService.GetZone(), ns.GetNodeName())
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This chanage is causing the following error:
Warning FailedAttachVolume 58m (x6 over 80m) attachdetach-controller AttachVolume.Attach failed for volume "pvc-507bb954-97fa-40aa-98c1-c78e849cac0d" : rpc error: code = InvalidArgument desc = ControllerPublish not permitted on node "projects/ocpstrat-1278/zones/us-central1-c/instances/test-gcp10-wf7zf-worker-c-6kgqn.c.ocpstrat-1278.internal" due to backoff condition Warning FailedAttachVolume 4m57s (x39 over 80m) attachdetach-controller AttachVolume.Attach failed for volume "pvc-507bb954-97fa-40aa-98c1-c78e849cac0d" : rpc error: code = InvalidArgument desc = Failed to get instance: googleapi: Error 400: Invalid value for field 'instance': 'test-gcp10-wf7zf-worker-c-6kgqn.c.ocpstrat-1278.internal'. Must be a match of regex '[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?|[1-9][0-9]{0,19}', invalid

i think for nodeID it should keep getting the name from MetadataService

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is already Get from MetadataService if you check the latest change, there is an additional commit merged.

Copy link
Copy Markdown

@noamasu noamasu Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hey @sunnylovestiramisu ,
are you reffering to d44bea1?
because this is a different code path, the error presented above comes specifically from nodeID.

I've tried the following fix from my forked repo:
https://github.com/noamasu/gcp-compute-persistent-disk-csi-driver/commit/2516027168b73db3060a42bebc4ccf04aff32e68
and it worked for me. i just want to make sure we are not missing something here :)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sunnylovestiramisu I think Noam is talking about a different place for the bug.

The commit d44bea1 fixed the issue in main.go, but Noam is referring another wrong usage of GetNodeName() in node.go at line 762. It should be using the MetadataService.


volumeLimits, err := ns.getVolumeLimits(ctx, node)
if err != nil {
Expand Down
66 changes: 62 additions & 4 deletions pkg/gce-pd-csi-driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1855,8 +1855,9 @@ func TestNodeGetInfo(t *testing.T) {
name = "test-node"
)
tests := []struct {
desc string
want *csi.NodeGetInfoResponse
desc string
nodeNameOverride string
want *csi.NodeGetInfoResponse
}{
{
desc: "success",
Expand All @@ -1870,11 +1871,34 @@ func TestNodeGetInfo(t *testing.T) {
},
},
},
{
desc: "success with nodeNameOverride",
nodeNameOverride: "override-node-name",
want: &csi.NodeGetInfoResponse{
NodeId: fmt.Sprintf("projects/test-project/zones/%s/instances/%s", zone, "override-node-name"),
MaxVolumesPerNode: volumeLimitBig,
AccessibleTopology: &csi.Topology{
Segments: map[string]string{
constants.TopologyKeyZone: zone,
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.desc, func(t *testing.T) {
gceDriver := getTestGCEDriver(t)
ns := gceDriver.ns
var ns *GCENodeServer
if tc.nodeNameOverride != "" {
args := &NodeServerArgs{
DeviceCache: linkcache.NewTestDeviceCache(1*time.Minute, linkcache.NewTestNodeWithVolumes([]string{defaultVolumeID})),
NodeName: tc.nodeNameOverride,
}
gceDriver := getTestGCEDriverWithCustomMounter(t, mountmanager.NewFakeSafeMounter(), args)
ns = gceDriver.ns
} else {
gceDriver := getTestGCEDriver(t)
ns = gceDriver.ns
}
req := &csi.NodeGetInfoRequest{}
metadataservice.SetMachineType(machineType)
metadataservice.SetZone(zone)
Expand All @@ -1891,3 +1915,37 @@ func TestNodeGetInfo(t *testing.T) {
})
}
}

func TestGetNodeName(t *testing.T) {
tests := []struct {
desc string
nodeNameOverride string
metadataName string
want string
}{
{
desc: "returns metadata service name when override is empty",
metadataName: "metadata-node",
want: "metadata-node",
},
{
desc: "returns override when set",
nodeNameOverride: "override-node",
metadataName: "metadata-node",
want: "override-node",
},
}
for _, tc := range tests {
t.Run(tc.desc, func(t *testing.T) {
args := &NodeServerArgs{NodeName: tc.nodeNameOverride}
gceDriver := getTestGCEDriverWithCustomMounter(t, mountmanager.NewFakeSafeMounter(), args)
ns := gceDriver.ns
metadataservice.SetName(tc.metadataName)

got := ns.GetNodeName()
if got != tc.want {
t.Errorf("GetNodeName() = %q, want %q", got, tc.want)
}
})
}
}