File tree Expand file tree Collapse file tree 1 file changed +38
-0
lines changed
components/backends/vllm/deploy Expand file tree Collapse file tree 1 file changed +38
-0
lines changed Original file line number Diff line number Diff line change 1+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+
4+ apiVersion : nvidia.com/v1alpha1
5+ kind : DynamoGraphDeployment
6+ metadata :
7+ name : vllm-mul
8+ spec :
9+ services :
10+ Frontend :
11+ dynamoNamespace : vllm-mul
12+ componentType : frontend
13+ replicas : 1
14+ extraPodSpec :
15+ mainContainer :
16+ image : nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0
17+ workingDir : /workspace/components/backends/vllm
18+ command :
19+ - /bin/sh
20+ - -c
21+ args :
22+ - " python3 -m dynamo.frontend --http-port 8000"
23+ VllmDecodeWorker :
24+ multinode :
25+ nodeCount : 2
26+ envFromSecret : hf-token-secret
27+ dynamoNamespace : vllm-mul
28+ componentType : worker
29+ replicas : 1
30+ extraPodSpec :
31+ mainContainer :
32+ image : nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0
33+ workingDir : /workspace/components/backends/vllm
34+ command :
35+ - /bin/sh
36+ - -c
37+ args :
38+ - python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --tensor-parallel-size 2 --no-kv-transfer-config
You can’t perform that action at this time.
0 commit comments