feat: add GPU type and enable intel GPU resources

zxue2 · eero-t · tkatila · zxue2 · commit 069c25cd3840 · 2025-10-18T12:22:38.000Z
Signed-off-by: Zhan Xue &lt;zhan.xue@intel.com&gt;
Co-authored-by: Eero Tamminen &lt;eero.t.tamminen@intel.com&gt;
Co-authored-by: Tuomas Katila &lt;tuomas.katila@intel.com&gt;
diff --git a/components/backends/vllm/deploy/agg.xe.yaml b/components/backends/vllm/deploy/agg.xe.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: vllm-agg
+spec:
+  services:
+    Frontend:
+      dynamoNamespace: vllm-agg
+      componentType: frontend
+      replicas: 1
+      extraPodSpec:
+        mainContainer:
+          image: https://hub.docker.com/r/intel/ai-dynamo-xpu:v0.4.1-b1
+    VllmDecodeWorker:
+      envFromSecret: hf-token-secret
+      dynamoNamespace: vllm-agg
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+          gpu_type: "xe"
+      extraPodSpec:
+        mainContainer:
+          image: https://hub.docker.com/r/intel/ai-dynamo-xpu:v0.4.1-b1
+          workingDir: /workspace/components/backends/vllm
+          command:
+            - /bin/sh
+            - -c
+          args:
+            - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
diff --git a/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml b/deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml
@@ -10186,6 +10186,15 @@ spec:
                             Indicates the number of GPUs to request.
                             total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
+                        gpu_type:
+                          description: |-
+                            Indicates the type of GPU resource to request.
+                            Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                            If not specified, defaults to NVIDIA GPUs.
+                          enum:
+                            - xe
+                            - i915
+                          type: string
                         memory:
                           type: string
                       type: object
@@ -10202,6 +10211,15 @@ spec:
                             Indicates the number of GPUs to request.
                             total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
+                        gpu_type:
+                          description: |-
+                            Indicates the type of GPU resource to request.
+                            Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                            If not specified, defaults to NVIDIA GPUs.
+                          enum:
+                            - xe
+                            - i915
+                          type: string
                         memory:
                           type: string
                       type: object
diff --git a/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml b/deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml
@@ -10320,6 +10320,15 @@ spec:
                                   Indicates the number of GPUs to request.
                                   total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                                 type: string
+                              gpu_type:
+                                description: |-
+                                  Indicates the type of GPU resource to request.
+                                  Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                                  If not specified, defaults to NVIDIA GPUs.
+                                enum:
+                                  - xe
+                                  - i915
+                                type: string
                               memory:
                                 type: string
                             type: object
@@ -10336,6 +10345,15 @@ spec:
                                   Indicates the number of GPUs to request.
                                   total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                                 type: string
+                              gpu_type:
+                                description: |-
+                                  Indicates the type of GPU resource to request.
+                                  Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                                  If not specified, defaults to NVIDIA GPUs.
+                                enum:
+                                  - xe
+                                  - i915
+                                type: string
                               memory:
                                 type: string
                             type: object
diff --git a/deploy/cloud/operator/api/dynamo/common/common.go b/deploy/cloud/operator/api/dynamo/common/common.go
@@ -23,12 +23,17 @@ import (
 )
 
 type ResourceItem struct {
-	CPU    string `json:"cpu,omitempty"`
-	Memory string `json:"memory,omitempty"`
+	CPU      string            `json:"cpu,omitempty"`
+	Memory   string            `json:"memory,omitempty"`
 	// Indicates the number of GPUs to request.
 	// total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
-	GPU    string            `json:"gpu,omitempty"`
-	Custom map[string]string `json:"custom,omitempty"`
+	GPU      string            `json:"gpu,omitempty"`
+	// Indicates the type of GPU resource to request.
+	// Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+	// If not specified, defaults to NVIDIA GPUs.
+	// +kubebuilder:validation:Enum=xe;i915
+	GPUType  string            `json:"gpu_type,omitempty"`
+	Custom   map[string]string `json:"custom,omitempty"`
 }
 
 type Resources struct {
@@ -39,6 +44,11 @@ type Resources struct {
 type DeploymentTargetHPAConf struct {
 	CPU         *int32  `json:"cpu,omitempty"`
 	GPU         *int32  `json:"gpu,omitempty"`
+        // Indicates the type of GPU resource to request.
+        // Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+        // If not specified, defaults to NVIDIA GPUs.
+	// +kubebuilder:validation:Enum=xe;i915
+	GPUType     *string `json:"gpu_type,omitempty"`
 	Memory      *string `json:"memory,omitempty"`
 	QPS         *int64  `json:"qps,omitempty"`
 	MinReplicas *int32  `json:"min_replicas,omitempty"`
diff --git a/deploy/cloud/operator/api/dynamo/common/zz_generated.deepcopy.go b/deploy/cloud/operator/api/dynamo/common/zz_generated.deepcopy.go
diff --git a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml
@@ -10186,6 +10186,15 @@ spec:
                             Indicates the number of GPUs to request.
                             total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
+                        gpu_type:
+                          description: |-
+                            Indicates the type of GPU resource to request.
+                            Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                            If not specified, defaults to NVIDIA GPUs.
+                          enum:
+                            - xe
+                            - i915
+                          type: string
                         memory:
                           type: string
                       type: object
@@ -10202,6 +10211,15 @@ spec:
                             Indicates the number of GPUs to request.
                             total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                           type: string
+                        gpu_type:
+                          description: |-
+                            Indicates the type of GPU resource to request.
+                            Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                            If not specified, defaults to NVIDIA GPUs.
+                          enum:
+                            - xe
+                            - i915
+                          type: string
                         memory:
                           type: string
                       type: object
diff --git a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
@@ -10320,6 +10320,15 @@ spec:
                                   Indicates the number of GPUs to request.
                                   total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                                 type: string
+                              gpu_type:
+                                description: |-
+                                  Indicates the type of GPU resource to request.
+                                  Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                                  If not specified, defaults to NVIDIA GPUs.
+                                enum:
+                                  - xe
+                                  - i915
+                                type: string
                               memory:
                                 type: string
                             type: object
@@ -10336,6 +10345,15 @@ spec:
                                   Indicates the number of GPUs to request.
                                   total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
                                 type: string
+                              gpu_type:
+                                description: |-
+                                  Indicates the type of GPU resource to request.
+                                  Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+                                  If not specified, defaults to NVIDIA GPUs.
+                                enum:
+                                  - xe
+                                  - i915
+                                type: string
                               memory:
                                 type: string
                             type: object
diff --git a/deploy/cloud/operator/internal/consts/consts.go b/deploy/cloud/operator/internal/consts/consts.go
@@ -44,7 +44,12 @@ const (
 
 	KubeLabelDynamoComponentPod = "nvidia.com/dynamo-component-pod"
 
+	// KubeResourceGPUNvidia is the Kubernetes resource name for NVIDIA GPUs.
 	KubeResourceGPUNvidia = "nvidia.com/gpu"
+	// KubeResourceGPUXeIntel is the Kubernetes resource name for Intel Xe GPUs.
+	KubeResourceGPUXeIntel = "gpu.intel.com/xe"
+	// KubeResourceGPUi915Intel is the Kubernetes resource name for Intel i915 GPUs.
+	KubeResourceGPUi915Intel = "gpu.intel.com/i915"
 
 	DynamoDeploymentConfigEnvVar = "DYN_DEPLOYMENT_CONFIG"
 
diff --git a/deploy/cloud/operator/internal/controller_common/resource.go b/deploy/cloud/operator/internal/controller_common/resource.go
@@ -423,7 +423,13 @@ func GetResourcesConfig(resources *common.Resources) (*corev1.ResourceRequiremen
 			if currentResources.Limits == nil {
 				currentResources.Limits = make(corev1.ResourceList)
 			}
-			currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUNvidia)] = q
+			if resources.Limits.GPUType == "xe" {
+				currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUXeIntel)] = q
+			} else if resources.Limits.GPUType == "i915" {
+				currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUi915Intel)] = q
+			} else {
+				currentResources.Limits[corev1.ResourceName(consts.KubeResourceGPUNvidia)] = q
+			}
 		}
 		for k, v := range resources.Limits.Custom {
 			q, err := resource.ParseQuantity(v)
diff --git a/deploy/cloud/operator/internal/dynamo/graph.go b/deploy/cloud/operator/internal/dynamo/graph.go
@@ -80,10 +80,15 @@ type ServiceConfig struct {
 }
 
 type Resources struct {
-	CPU    *string           `yaml:"cpu,omitempty" json:"cpu,omitempty"`
-	Memory *string           `yaml:"memory,omitempty" json:"memory,omitempty"`
-	GPU    *string           `yaml:"gpu,omitempty" json:"gpu,omitempty"`
-	Custom map[string]string `yaml:"custom,omitempty" json:"custom,omitempty"`
+	CPU     *string           `yaml:"cpu,omitempty" json:"cpu,omitempty"`
+	Memory  *string           `yaml:"memory,omitempty" json:"memory,omitempty"`
+	GPU     *string           `yaml:"gpu,omitempty" json:"gpu,omitempty"`
+        // Indicates the type of GPU resource to request.
+        // Valid values are "xe" for Intel Xe GPUs or "i915" for Intel i915 GPUs.
+        // If not specified, defaults to NVIDIA GPUs.
+	// +kubebuilder:validation:Enum=xe;i915
+	GPUType *string           `yaml:"gpu_type,omitempty" json:"gpu_type,omitempty"`
+	Custom  map[string]string `yaml:"custom,omitempty" json:"custom,omitempty"`
 }
 
 type DynDeploymentConfig = map[string]*DynDeploymentServiceConfig
diff --git a/deploy/helm/chart/templates/deployment.yaml b/deploy/helm/chart/templates/deployment.yaml
@@ -75,20 +75,40 @@ spec:
             {{ if $serviceSpec.resources.memory }}
             memory: "{{ $serviceSpec.resources.memory }}"
             {{ end }}
-            {{ if $serviceSpec.resources.gpu }}
-            nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
-            {{ end }}
+            {{- if $serviceSpec.resources.gpu }}
+              {{- if $serviceSpec.resources.gpu_type }}
+                {{- if $serviceSpec.resources.gpu_type | eq "xe" }}
+                gpu.intel.com/xe: "{{ $serviceSpec.resources.gpu }}"
+                {{- else if $serviceSpec.resources.gpu_type | eq "i915" }}
+                gpu.intel.com/i915: "{{ $serviceSpec.resources.gpu }}"
+                {{- else }}
+                nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
+                {{- end }}
+              {{- else }}
+              nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
+              {{- end }}
+            {{- end }}
           limits:
             {{ if $serviceSpec.resources.cpu }}
             cpu: "{{ $serviceSpec.resources.cpu }}"
             {{ end }}
             {{ if $serviceSpec.resources.memory }}
             memory: "{{ $serviceSpec.resources.memory }}"
             {{ end }}
-            {{ if $serviceSpec.resources.gpu }}
-            nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
-            {{ end }}
-        {{ end }}
+            {{- if $serviceSpec.resources.gpu }}
+              {{- if $serviceSpec.resources.gpu_type }}
+                {{- if $serviceSpec.resources.gpu_type | eq "xe" }}
+                gpu.intel.com/xe: "{{ $serviceSpec.resources.gpu }}"
+                {{- else if $serviceSpec.resources.gpu_type | eq "i915" }}
+                gpu.intel.com/i915: "{{ $serviceSpec.resources.gpu }}"
+                {{- else }}
+                nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
+                {{- end }}
+              {{- else }}
+              nvidia.com/gpu: "{{ $serviceSpec.resources.gpu }}"
+              {{- end }}
+            {{- end }}
+        {{- end }}
         {{- if $serviceSpec.envFromSecret }}
         envFrom:
         - secretRef:
@@ -184,4 +204,4 @@ spec:
           {{- end }}
         {{- end }}
 {{- end }}
-{{- end }}
+{{- end }}