Skip to content

Commit 069c25c

Browse files
zxue2eero-ttkatila
committed
feat: add GPU type and enable intel GPU resources
Signed-off-by: Zhan Xue <[email protected]> Co-authored-by: Eero Tamminen <[email protected]> Co-authored-by: Tuomas Katila <[email protected]>
1 parent 4b7a806 commit 069c25c

File tree

11 files changed

+174
-17
lines changed

11 files changed

+174
-17
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: vllm-agg
8+
spec:
9+
services:
10+
Frontend:
11+
dynamoNamespace: vllm-agg
12+
componentType: frontend
13+
replicas: 1
14+
extraPodSpec:
15+
mainContainer:
16+
image: https://hub.docker.com/r/intel/ai-dynamo-xpu:v0.4.1-b1
17+
VllmDecodeWorker:
18+
envFromSecret: hf-token-secret
19+
dynamoNamespace: vllm-agg
20+
componentType: worker
21+
replicas: 1
22+
resources:
23+
limits:
24+
gpu: "1"
25+
gpu_type: "xe"
26+
extraPodSpec:
27+
mainContainer:
28+
image: https://hub.docker.com/r/intel/ai-dynamo-xpu:v0.4.1-b1
29+
workingDir: /workspace/components/backends/vllm
30+
command:
31+
- /bin/sh
32+
- -c
33+
args:
34+
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B

deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10186,6 +10186,15 @@ spec:
1018610186
Indicates the number of GPUs to request.
1018710187
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1018810188
type: string
10189+
gpu_type:
10190+
description: |-
10191+
Indicates the type of GPU resource to request.
10192+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10193+
If not specified, defaults to NVIDIA GPUs.
10194+
enum:
10195+
- xe
10196+
- i915
10197+
type: string
1018910198
memory:
1019010199
type: string
1019110200
type: object
@@ -10202,6 +10211,15 @@ spec:
1020210211
Indicates the number of GPUs to request.
1020310212
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1020410213
type: string
10214+
gpu_type:
10215+
description: |-
10216+
Indicates the type of GPU resource to request.
10217+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10218+
If not specified, defaults to NVIDIA GPUs.
10219+
enum:
10220+
- xe
10221+
- i915
10222+
type: string
1020510223
memory:
1020610224
type: string
1020710225
type: object

deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10320,6 +10320,15 @@ spec:
1032010320
Indicates the number of GPUs to request.
1032110321
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1032210322
type: string
10323+
gpu_type:
10324+
description: |-
10325+
Indicates the type of GPU resource to request.
10326+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10327+
If not specified, defaults to NVIDIA GPUs.
10328+
enum:
10329+
- xe
10330+
- i915
10331+
type: string
1032310332
memory:
1032410333
type: string
1032510334
type: object
@@ -10336,6 +10345,15 @@ spec:
1033610345
Indicates the number of GPUs to request.
1033710346
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1033810347
type: string
10348+
gpu_type:
10349+
description: |-
10350+
Indicates the type of GPU resource to request.
10351+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10352+
If not specified, defaults to NVIDIA GPUs.
10353+
enum:
10354+
- xe
10355+
- i915
10356+
type: string
1033910357
memory:
1034010358
type: string
1034110359
type: object

deploy/cloud/operator/api/dynamo/common/common.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,17 @@ import (
2323
)
2424

2525
type ResourceItem struct {
26-
CPU string `json:"cpu,omitempty"`
27-
Memory string `json:"memory,omitempty"`
26+
CPU string `json:"cpu,omitempty"`
27+
Memory string `json:"memory,omitempty"`
2828
// Indicates the number of GPUs to request.
2929
// total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
30-
GPU string `json:"gpu,omitempty"`
31-
Custom map[string]string `json:"custom,omitempty"`
30+
GPU string `json:"gpu,omitempty"`
31+
// Indicates the type of GPU resource to request.
32+
// Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
33+
// If not specified, defaults to NVIDIA GPUs.
34+
// +kubebuilder:validation:Enum=xe;i915
35+
GPUType string `json:"gpu_type,omitempty"`
36+
Custom map[string]string `json:"custom,omitempty"`
3237
}
3338

3439
type Resources struct {
@@ -39,6 +44,11 @@ type Resources struct {
3944
type DeploymentTargetHPAConf struct {
4045
CPU *int32 `json:"cpu,omitempty"`
4146
GPU *int32 `json:"gpu,omitempty"`
47+
// Indicates the type of GPU resource to request.
48+
// Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
49+
// If not specified, defaults to NVIDIA GPUs.
50+
// +kubebuilder:validation:Enum=xe;i915
51+
GPUType *string `json:"gpu_type,omitempty"`
4252
Memory *string `json:"memory,omitempty"`
4353
QPS *int64 `json:"qps,omitempty"`
4454
MinReplicas *int32 `json:"min_replicas,omitempty"`

deploy/cloud/operator/api/dynamo/common/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10186,6 +10186,15 @@ spec:
1018610186
Indicates the number of GPUs to request.
1018710187
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1018810188
type: string
10189+
gpu_type:
10190+
description: |-
10191+
Indicates the type of GPU resource to request.
10192+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10193+
If not specified, defaults to NVIDIA GPUs.
10194+
enum:
10195+
- xe
10196+
- i915
10197+
type: string
1018910198
memory:
1019010199
type: string
1019110200
type: object
@@ -10202,6 +10211,15 @@ spec:
1020210211
Indicates the number of GPUs to request.
1020310212
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1020410213
type: string
10214+
gpu_type:
10215+
description: |-
10216+
Indicates the type of GPU resource to request.
10217+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10218+
If not specified, defaults to NVIDIA GPUs.
10219+
enum:
10220+
- xe
10221+
- i915
10222+
type: string
1020510223
memory:
1020610224
type: string
1020710225
type: object

deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10320,6 +10320,15 @@ spec:
1032010320
Indicates the number of GPUs to request.
1032110321
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1032210322
type: string
10323+
gpu_type:
10324+
description: |-
10325+
Indicates the type of GPU resource to request.
10326+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10327+
If not specified, defaults to NVIDIA GPUs.
10328+
enum:
10329+
- xe
10330+
- i915
10331+
type: string
1032310332
memory:
1032410333
type: string
1032510334
type: object
@@ -10336,6 +10345,15 @@ spec:
1033610345
Indicates the number of GPUs to request.
1033710346
total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
1033810347
type: string
10348+
gpu_type:
10349+
description: |-
10350+
Indicates the type of GPU resource to request.
10351+
Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
10352+
If not specified, defaults to NVIDIA GPUs.
10353+
enum:
10354+
- xe
10355+
- i915
10356+
type: string
1033910357
memory:
1034010358
type: string
1034110359
type: object

deploy/cloud/operator/internal/consts/consts.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,12 @@ const (
4444

4545
KubeLabelDynamoComponentPod = "nvidia.com/dynamo-component-pod"
4646

47+
// KubeResourceGPUNvidia is the Kubernetes resource name for NVIDIA GPUs.
4748
KubeResourceGPUNvidia = "nvidia.com/gpu"
49+
// KubeResourceGPUXeIntel is the Kubernetes resource name for Intel Xe GPUs.
50+
KubeResourceGPUXeIntel = "gpu.intel.com/xe"
51+
// KubeResourceGPUi915Intel is the Kubernetes resource name for Intel i915 GPUs.
52+
KubeResourceGPUi915Intel = "gpu.intel.com/i915"
4853

4954
DynamoDeploymentConfigEnvVar = "DYN_DEPLOYMENT_CONFIG"
5055

deploy/cloud/operator/internal/controller_common/resource.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,13 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
423423
if currentResources.Limits == nil {
424424
currentResources.Limits = make(corev1.ResourceList)
425425
}
426-
currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUNvidia)] = q
426+
if resources.Limits.GPUType == "xe" {
427+
currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUXeIntel)] = q
428+
} else if resources.Limits.GPUType == "i915" {
429+
currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUi915Intel)] = q
430+
} else {
431+
currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUNvidia)] = q
432+
}
427433
}
428434
for k, v := range resources.Limits.Custom {
429435
q, err := resource.ParseQuantity(v)

deploy/cloud/operator/internal/dynamo/graph.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,15 @@ type ServiceConfig struct {
8080
}
8181

8282
type Resources struct {
83-
CPU *string `yaml:"cpu,omitempty" json:"cpu,omitempty"`
84-
Memory *string `yaml:"memory,omitempty" json:"memory,omitempty"`
85-
GPU *string `yaml:"gpu,omitempty" json:"gpu,omitempty"`
86-
Custom map[string]string `yaml:"custom,omitempty" json:"custom,omitempty"`
83+
CPU *string `yaml:"cpu,omitempty" json:"cpu,omitempty"`
84+
Memory *string `yaml:"memory,omitempty" json:"memory,omitempty"`
85+
GPU *string `yaml:"gpu,omitempty" json:"gpu,omitempty"`
86+
// Indicates the type of GPU resource to request.
87+
// Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
88+
// If not specified, defaults to NVIDIA GPUs.
89+
// +kubebuilder:validation:Enum=xe;i915
90+
GPUType *string `yaml:"gpu_type,omitempty" json:"gpu_type,omitempty"`
91+
Custom map[string]string `yaml:"custom,omitempty" json:"custom,omitempty"`
8792
}
8893

8994
type DynDeploymentConfig = map[string]*DynDeploymentServiceConfig

0 commit comments

Comments
 (0)