Skip to content

Commit 2fb6ffb

Browse files
feat: add vllm aggregated multinode deployment example
Signed-off-by: Eric Liu <[email protected]>
1 parent a33033b commit 2fb6ffb

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: nvidia.com/v1alpha1
5+
kind: DynamoGraphDeployment
6+
metadata:
7+
name: vllm-mul
8+
spec:
9+
services:
10+
Frontend:
11+
dynamoNamespace: vllm-mul
12+
componentType: frontend
13+
replicas: 1
14+
extraPodSpec:
15+
mainContainer:
16+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0
17+
workingDir: /workspace/components/backends/vllm
18+
command:
19+
- /bin/sh
20+
- -c
21+
args:
22+
- "python3 -m dynamo.frontend --http-port 8000"
23+
VllmDecodeWorker:
24+
multinode:
25+
nodeCount: 2
26+
envFromSecret: hf-token-secret
27+
dynamoNamespace: vllm-mul
28+
componentType: worker
29+
replicas: 1
30+
extraPodSpec:
31+
mainContainer:
32+
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0
33+
workingDir: /workspace/components/backends/vllm
34+
command:
35+
- /bin/sh
36+
- -c
37+
args:
38+
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --tensor-parallel-size 2 --no-kv-transfer-config

0 commit comments

Comments
 (0)