Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pkg/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,9 @@ var C4AMachineHyperdiskAttachLimitMap = []MachineHyperdiskLimit{
{max: 48, value: 31},
{max: 72, value: 63},
}

// A4X Machine Types - Hyperdisk Balanced Limits. The max here is actually the GPU count (not CPU, like the others).
var A4XMachineHyperdiskAttachLimitMap = []MachineHyperdiskLimit{
{max: 1, value: 63},
{max: 2, value: 127},
}
2 changes: 2 additions & 0 deletions pkg/common/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,8 @@ func GetHyperdiskAttachLimit(machineTypePrefix string, vCPUs int64) int64 {
limitMap = N4MachineHyperdiskAttachLimitMap
case "c4a":
limitMap = C4AMachineHyperdiskAttachLimitMap
case "a4x":
limitMap = A4XMachineHyperdiskAttachLimitMap
default:
// Fallback to the most conservative Gen4 map for unknown types
return MapNumber(vCPUs, C4DMachineHyperdiskAttachLimitMap)
Expand Down
34 changes: 28 additions & 6 deletions pkg/gce-pd-csi-driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,13 @@ const (
// doc https://cloud.google.com/compute/docs/memory-optimized-machines#x4_disks
x4HyperdiskLimit int64 = 39
// doc https://cloud.google.com/compute/docs/accelerator-optimized-machines#a4-disks
a4HyperdiskLimit int64 = 127
defaultLinuxFsType = "ext4"
defaultWindowsFsType = "ntfs"
fsTypeExt3 = "ext3"
fsTypeBtrfs = "btrfs"
a4HyperdiskLimit int64 = 127
a4xMetalHyperdiskLimit int64 = 31
c3MetalHyperdiskLimit int64 = 15
defaultLinuxFsType = "ext4"
defaultWindowsFsType = "ntfs"
fsTypeExt3 = "ext3"
fsTypeBtrfs = "btrfs"

readAheadKBMountFlagRegexPattern = "^read_ahead_kb=(.+)$"
btrfsReclaimDataRegexPattern = "^btrfs-allocation-data-bg_reclaim_threshold=(\\d{1,2})$" // 0-99 are valid, incl. 00
Expand Down Expand Up @@ -847,7 +849,7 @@ func (ns *GCENodeServer) GetVolumeLimits(ctx context.Context) (int64, error) {
}
}

// Process gen4 machine attach limits
// Process gen4 machine attach limits which include vCPUs in the machine type
gen4MachineTypesPrefix := []string{"c4a-", "c4-", "n4-", "c4d-"}
for _, gen4Prefix := range gen4MachineTypesPrefix {
if strings.HasPrefix(machineType, gen4Prefix) {
Expand All @@ -866,12 +868,32 @@ func (ns *GCENodeServer) GetVolumeLimits(ctx context.Context) (int64, error) {
}
}
}
// Process gen4 A4X machine attach limits, which have a -1g/-2g/-4g/metal suffix
if strings.HasPrefix(machineType, "a4x-") {
machineTypeSlice := strings.Split(machineType, "-")
if len(machineTypeSlice) < 3 {
return volumeLimitBig, fmt.Errorf("unconventional machine type: %v", machineType)
}
gpuString := machineTypeSlice[2]
if gpuString == "metal" {
return a4xMetalHyperdiskLimit, nil
}
gpuString = gpuString[0 : len(gpuString)-1] // Remove the 'g' suffix
gpus, err := strconv.ParseInt(gpuString, 10, 64)
if err != nil {
return volumeLimitBig, fmt.Errorf("invalid gpuString %s for machine type: %v", gpuString, machineType)
}
return common.GetHyperdiskAttachLimit("a4x", gpus), nil
}
if strings.HasPrefix(machineType, "x4-") {
return x4HyperdiskLimit, nil
}
if strings.HasPrefix(machineType, "a4-") {
return a4HyperdiskLimit, nil
}
if strings.HasPrefix(machineType, "c3-") && strings.HasSuffix(machineType, "-metal") {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if we can add this here? Because c3 VMs still support PD disks, and their attach limit is 128

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"C3-metal only supports Hyperdisk block storage, specifically GCP Hyperdisk Extreme and GCP Hyperdisk Balanced with the NVMe interface."

return c3MetalHyperdiskLimit, nil
}

return volumeLimitBig, nil
}
Expand Down
75 changes: 75 additions & 0 deletions pkg/gce-pd-csi-driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,81 @@ func TestNodeGetVolumeLimits(t *testing.T) {
machineType: "c4d-standard-32",
expVolumeLimit: 31,
},
{
name: "c3-highcpu-192-metal",
machineType: "c3-highcpu-192-metal",
expVolumeLimit: c3MetalHyperdiskLimit,
},
{
name: "c3-standard-192-metal",
machineType: "c3-standard-192-metal",
expVolumeLimit: c3MetalHyperdiskLimit,
},
{
name: "c3-highmem-192-metal",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we planning to backport this? Since this is way lower than the 128(for both PD and Hyperdisk), backport request will be rejected.

Copy link
Copy Markdown
Contributor

@mattcary mattcary Jul 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But metal is new, so this doesn't affect existing workloads? That is to say we should be able to backport this.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NVM it does not support PD at all.

machineType: "c3-highmem-192-metal",
expVolumeLimit: c3MetalHyperdiskLimit,
},
{
name: "a4x-highgpu-1g",
machineType: "a4x-highgpu-1g",
expVolumeLimit: 63,
},
{
name: "a4x-highgpu-2g",
machineType: "a4x-highgpu-2g",
expVolumeLimit: 127,
},
{
name: "a4x-highgpu-2g-nolssd",
machineType: "a4x-highgpu-2g-nolssd",
expVolumeLimit: 127,
},
{
name: "a4x-highgpu-4g",
machineType: "a4x-highgpu-4g",
expVolumeLimit: 127,
},
{
name: "a4x-highgpu-8g",
machineType: "a4x-highgpu-8g",
expVolumeLimit: 127,
},
{
name: "a4x-highgpu-metal",
machineType: "a4x-highgpu-metal",
expVolumeLimit: a4xMetalHyperdiskLimit,
},
{
name: "a4x-max-metal",
machineType: "a4x-max-metal",
expVolumeLimit: a4xMetalHyperdiskLimit,
},
{
name: "a4x-max-1g",
machineType: "a4x-max-1g",
expVolumeLimit: 63,
},
{
name: "a4x-max-highgpu-2g",
machineType: "a4x-max-2g",
expVolumeLimit: 127,
},
{
name: "a4x-max-4g",
machineType: "a4x-max-4g",
expVolumeLimit: 127,
},
Comment thread
mattcary marked this conversation as resolved.
{
name: "a4x-max-8g", // -8g does not exist, testing edge case
machineType: "a4x-max-8g",
expVolumeLimit: 127,
},
{
name: "a4x-medgpu-nolssd", // does not exist, testing edge case
machineType: "a4x-medgpu-nolssd",
expVolumeLimit: volumeLimitBig,
},
}

for _, tc := range testCases {
Expand Down