Skip to content

Commit 8f59a1a

Browse files
Merge pull request #131 from MenD32/fix/support-h200-gpus
NO-JIRA: JN-2789: added H200 support
2 parents 3e8ba63 + 495f894 commit 8f59a1a

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

pkg/cloud/gcp/actuators/machine/reconciler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ func (r *Reconciler) validateGuestAccelerators() error {
156156
// machine types that have GPUs included would skip this function. The ultimate result for users it that new GPU
157157
// machine types will not have accurate quota reporting. If machines are being pathologically deleted and recreated
158158
// it may be a sign of a quota issue.
159-
if len(r.providerSpec.GPUs) == 0 && !strings.HasPrefix(r.providerSpec.MachineType, "a2-") && !strings.HasPrefix(r.providerSpec.MachineType, "a3-") {
159+
if len(r.providerSpec.GPUs) == 0 && !strings.HasPrefix(r.providerSpec.MachineType, "a2-") && !strings.HasPrefix(r.providerSpec.MachineType, "a3-") || strings.HasPrefix(r.providerSpec.MachineType, "a3-ultragpu-") {
160160
// no accelerators to validate so return nil
161161
return nil
162162
}

pkg/cloud/gcp/actuators/machine/reconciler_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,35 @@ func TestCreate(t *testing.T) {
745745
},
746746
expectedError: errors.New("failed to fetch user-defined tags for : failed to fetch openshift/key2/value2 tag details: googleapi: Error 500: Internal error while fetching 'openshift/key2/value2'"),
747747
},
748+
{
749+
name: "a3-ultragpu (H200 GPUs) instance create succeeds when quota is not available",
750+
providerSpec: &machinev1.GCPMachineProviderSpec{
751+
Region: "test-region",
752+
Zone: "test-zone",
753+
MachineType: "a3-ultragpu-8g",
754+
Disks: []*machinev1.GCPDisk{
755+
{
756+
Boot: true,
757+
Image: "projects/fooproject/global/images/uefi-image",
758+
},
759+
},
760+
},
761+
mockGPUCompatibleMachineTypesList: func(project string, zone string, ctx context.Context) (map[string]computeservice.GpuInfo, []string) {
762+
var compatibleMachineType = []string{}
763+
var gpuInfo = map[string]computeservice.GpuInfo{
764+
"a3-ultragpu-8g": {
765+
Type: "nvidia-h200-141gb",
766+
Count: 1,
767+
},
768+
}
769+
return gpuInfo, compatibleMachineType
770+
},
771+
mockRegionGet: func(project string, region string) (*compute.Region, error) {
772+
var computeQuota = &compute.Quota{}
773+
var computeRegion = &compute.Region{Quotas: []*compute.Quota{computeQuota}}
774+
return computeRegion, nil
775+
},
776+
},
748777
{
749778
name: "Create spot instance successfully",
750779
providerSpec: &machinev1.GCPMachineProviderSpec{

0 commit comments

Comments
 (0)