diff --git a/pkg/common/constants.go b/pkg/common/constants.go index ec218c133..62d8a6bc9 100644 --- a/pkg/common/constants.go +++ b/pkg/common/constants.go @@ -104,3 +104,9 @@ var C4AMachineHyperdiskAttachLimitMap = []MachineHyperdiskLimit{ {max: 48, value: 31}, {max: 72, value: 63}, } + +// A4X Machine Types - Hyperdisk Balanced Limits. The max here is actually the GPU count (not CPU, like the others). +var A4XMachineHyperdiskAttachLimitMap = []MachineHyperdiskLimit{ + {max: 1, value: 63}, + {max: 2, value: 127}, +} diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 23ca1839d..57323dcc3 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -775,6 +775,8 @@ func GetHyperdiskAttachLimit(machineTypePrefix string, vCPUs int64) int64 { limitMap = N4MachineHyperdiskAttachLimitMap case "c4a": limitMap = C4AMachineHyperdiskAttachLimitMap + case "a4x": + limitMap = A4XMachineHyperdiskAttachLimitMap default: // Fallback to the most conservative Gen4 map for unknown types return MapNumber(vCPUs, C4DMachineHyperdiskAttachLimitMap) diff --git a/pkg/gce-pd-csi-driver/node.go b/pkg/gce-pd-csi-driver/node.go index 13ed97715..00f0b0859 100644 --- a/pkg/gce-pd-csi-driver/node.go +++ b/pkg/gce-pd-csi-driver/node.go @@ -113,11 +113,13 @@ const ( // doc https://cloud.google.com/compute/docs/memory-optimized-machines#x4_disks x4HyperdiskLimit int64 = 39 // doc https://cloud.google.com/compute/docs/accelerator-optimized-machines#a4-disks - a4HyperdiskLimit int64 = 127 - defaultLinuxFsType = "ext4" - defaultWindowsFsType = "ntfs" - fsTypeExt3 = "ext3" - fsTypeBtrfs = "btrfs" + a4HyperdiskLimit int64 = 127 + a4xMetalHyperdiskLimit int64 = 31 + c3MetalHyperdiskLimit int64 = 15 + defaultLinuxFsType = "ext4" + defaultWindowsFsType = "ntfs" + fsTypeExt3 = "ext3" + fsTypeBtrfs = "btrfs" readAheadKBMountFlagRegexPattern = "^read_ahead_kb=(.+)$" btrfsReclaimDataRegexPattern = "^btrfs-allocation-data-bg_reclaim_threshold=(\\d{1,2})$" // 0-99 are valid, incl. 00 @@ -847,7 +849,7 @@ func (ns *GCENodeServer) GetVolumeLimits(ctx context.Context) (int64, error) { } } - // Process gen4 machine attach limits + // Process gen4 machine attach limits which include vCPUs in the machine type gen4MachineTypesPrefix := []string{"c4a-", "c4-", "n4-", "c4d-"} for _, gen4Prefix := range gen4MachineTypesPrefix { if strings.HasPrefix(machineType, gen4Prefix) { @@ -866,12 +868,32 @@ func (ns *GCENodeServer) GetVolumeLimits(ctx context.Context) (int64, error) { } } } + // Process gen4 A4X machine attach limits, which have a -1g/-2g/-4g/metal suffix + if strings.HasPrefix(machineType, "a4x-") { + machineTypeSlice := strings.Split(machineType, "-") + if len(machineTypeSlice) < 3 { + return volumeLimitBig, fmt.Errorf("unconventional machine type: %v", machineType) + } + gpuString := machineTypeSlice[2] + if gpuString == "metal" { + return a4xMetalHyperdiskLimit, nil + } + gpuString = gpuString[0 : len(gpuString)-1] // Remove the 'g' suffix + gpus, err := strconv.ParseInt(gpuString, 10, 64) + if err != nil { + return volumeLimitBig, fmt.Errorf("invalid gpuString %s for machine type: %v", gpuString, machineType) + } + return common.GetHyperdiskAttachLimit("a4x", gpus), nil + } if strings.HasPrefix(machineType, "x4-") { return x4HyperdiskLimit, nil } if strings.HasPrefix(machineType, "a4-") { return a4HyperdiskLimit, nil } + if strings.HasPrefix(machineType, "c3-") && strings.HasSuffix(machineType, "-metal") { + return c3MetalHyperdiskLimit, nil + } return volumeLimitBig, nil } diff --git a/pkg/gce-pd-csi-driver/node_test.go b/pkg/gce-pd-csi-driver/node_test.go index 4f64b6102..2ac1b9e2e 100644 --- a/pkg/gce-pd-csi-driver/node_test.go +++ b/pkg/gce-pd-csi-driver/node_test.go @@ -350,6 +350,81 @@ func TestNodeGetVolumeLimits(t *testing.T) { machineType: "c4d-standard-32", expVolumeLimit: 31, }, + { + name: "c3-highcpu-192-metal", + machineType: "c3-highcpu-192-metal", + expVolumeLimit: c3MetalHyperdiskLimit, + }, + { + name: "c3-standard-192-metal", + machineType: "c3-standard-192-metal", + expVolumeLimit: c3MetalHyperdiskLimit, + }, + { + name: "c3-highmem-192-metal", + machineType: "c3-highmem-192-metal", + expVolumeLimit: c3MetalHyperdiskLimit, + }, + { + name: "a4x-highgpu-1g", + machineType: "a4x-highgpu-1g", + expVolumeLimit: 63, + }, + { + name: "a4x-highgpu-2g", + machineType: "a4x-highgpu-2g", + expVolumeLimit: 127, + }, + { + name: "a4x-highgpu-2g-nolssd", + machineType: "a4x-highgpu-2g-nolssd", + expVolumeLimit: 127, + }, + { + name: "a4x-highgpu-4g", + machineType: "a4x-highgpu-4g", + expVolumeLimit: 127, + }, + { + name: "a4x-highgpu-8g", + machineType: "a4x-highgpu-8g", + expVolumeLimit: 127, + }, + { + name: "a4x-highgpu-metal", + machineType: "a4x-highgpu-metal", + expVolumeLimit: a4xMetalHyperdiskLimit, + }, + { + name: "a4x-max-metal", + machineType: "a4x-max-metal", + expVolumeLimit: a4xMetalHyperdiskLimit, + }, + { + name: "a4x-max-1g", + machineType: "a4x-max-1g", + expVolumeLimit: 63, + }, + { + name: "a4x-max-highgpu-2g", + machineType: "a4x-max-2g", + expVolumeLimit: 127, + }, + { + name: "a4x-max-4g", + machineType: "a4x-max-4g", + expVolumeLimit: 127, + }, + { + name: "a4x-max-8g", // -8g does not exist, testing edge case + machineType: "a4x-max-8g", + expVolumeLimit: 127, + }, + { + name: "a4x-medgpu-nolssd", // does not exist, testing edge case + machineType: "a4x-medgpu-nolssd", + expVolumeLimit: volumeLimitBig, + }, } for _, tc := range testCases {