File tree Expand file tree Collapse file tree 5 files changed +182
-120
lines changed
components/backends/vllm/deploy Expand file tree Collapse file tree 5 files changed +182
-120
lines changed Original file line number Diff line number Diff line change @@ -48,24 +48,19 @@ spec:
4848 VllmDecodeWorker :
4949 envFromSecret : hf-token-secret
5050 livenessProbe :
51- exec :
52- command :
53- - /bin/sh
54- - -c
55- - " exit 0"
56- periodSeconds : 60
51+ httpGet :
52+ path : /live
53+ port : 9090
54+ periodSeconds : 5
5755 timeoutSeconds : 30
58- failureThreshold : 10
56+ failureThreshold : 1
5957 readinessProbe :
60- exec :
61- command :
62- - /bin/sh
63- - -c
64- - ' grep "VllmWorker.*has been initialized" /tmp/vllm.log'
65- initialDelaySeconds : 60
66- periodSeconds : 60
58+ httpGet :
59+ path : /health
60+ port : 9090
61+ periodSeconds : 10
6762 timeoutSeconds : 30
68- failureThreshold : 10
63+ failureThreshold : 60
6964 dynamoNamespace : vllm-agg
7065 componentType : worker
7166 replicas : 1
7873 cpu : " 10"
7974 memory : " 20Gi"
8075 gpu : " 1"
76+ envs :
77+ - name : DYN_SYSTEM_ENABLED
78+ value : " true"
79+ - name : DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
80+ value : " [\" generate\" ]"
81+ - name : DYN_SYSTEM_PORT
82+ value : " 9090"
8183 extraPodSpec :
8284 mainContainer :
85+ startupProbe :
86+ httpGet :
87+ path : /health
88+ port : 9090
89+ periodSeconds : 10
90+ failureThreshold : 60
8391 image : nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
8492 workingDir : /workspace/components/backends/vllm
8593 command :
Original file line number Diff line number Diff line change @@ -48,24 +48,19 @@ spec:
4848 VllmDecodeWorker :
4949 envFromSecret : hf-token-secret
5050 livenessProbe :
51- exec :
52- command :
53- - /bin/sh
54- - -c
55- - " exit 0"
56- periodSeconds : 60
51+ httpGet :
52+ path : /live
53+ port : 9090
54+ periodSeconds : 5
5755 timeoutSeconds : 30
58- failureThreshold : 10
56+ failureThreshold : 1
5957 readinessProbe :
60- exec :
61- command :
62- - /bin/sh
63- - -c
64- - ' grep "VllmWorker.*has been initialized" /tmp/vllm.log'
65- initialDelaySeconds : 60
66- periodSeconds : 60
58+ httpGet :
59+ path : /health
60+ port : 9090
61+ periodSeconds : 10
6762 timeoutSeconds : 30
68- failureThreshold : 10
63+ failureThreshold : 60
6964 dynamoNamespace : vllm-agg-router
7065 componentType : worker
7166 replicas : 2
7873 cpu : " 10"
7974 memory : " 20Gi"
8075 gpu : " 1"
76+ envs :
77+ - name : DYN_SYSTEM_ENABLED
78+ value : " true"
79+ - name : DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
80+ value : " [\" generate\" ]"
81+ - name : DYN_SYSTEM_PORT
82+ value : " 9090"
8183 extraPodSpec :
8284 mainContainer :
85+ startupProbe :
86+ httpGet :
87+ path : /health
88+ port : 9090
89+ periodSeconds : 10
90+ failureThreshold : 60
8391 image : nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
8492 workingDir : /workspace/components/backends/vllm
8593 command :
Original file line number Diff line number Diff line change @@ -51,24 +51,19 @@ spec:
5151 componentType : worker
5252 replicas : 1
5353 livenessProbe :
54- exec :
55- command :
56- - /bin/sh
57- - -c
58- - " exit 0"
59- periodSeconds : 60
54+ httpGet :
55+ path : /live
56+ port : 9090
57+ periodSeconds : 5
6058 timeoutSeconds : 30
61- failureThreshold : 10
59+ failureThreshold : 1
6260 readinessProbe :
63- exec :
64- command :
65- - /bin/sh
66- - -c
67- - ' grep "VllmWorker.*has been initialized" /tmp/vllm.log'
68- initialDelaySeconds : 60
69- periodSeconds : 60
61+ httpGet :
62+ path : /health
63+ port : 9090
64+ periodSeconds : 10
7065 timeoutSeconds : 30
71- failureThreshold : 10
66+ failureThreshold : 60
7267 resources :
7368 requests :
7469 cpu : " 32"
7873 cpu : " 32"
7974 memory : " 40Gi"
8075 gpu : " 1"
76+ envs :
77+ - name : DYN_SYSTEM_ENABLED
78+ value : " true"
79+ - name : DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
80+ value : " [\" generate\" ]"
81+ - name : DYN_SYSTEM_PORT
82+ value : " 9090"
8183 extraPodSpec :
8284 mainContainer :
85+ startupProbe :
86+ httpGet :
87+ path : /health
88+ port : 9090
89+ periodSeconds : 10
90+ failureThreshold : 60
8391 image : nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
8492 workingDir : /workspace/components/backends/vllm
8593 command :
@@ -93,24 +101,19 @@ spec:
93101 componentType : worker
94102 replicas : 1
95103 livenessProbe :
96- exec :
97- command :
98- - /bin/sh
99- - -c
100- - " exit 0"
101- periodSeconds : 60
104+ httpGet :
105+ path : /live
106+ port : 9090
107+ periodSeconds : 5
102108 timeoutSeconds : 30
103- failureThreshold : 10
109+ failureThreshold : 1
104110 readinessProbe :
105- exec :
106- command :
107- - /bin/sh
108- - -c
109- - ' grep "VllmWorker.*has been initialized" /tmp/vllm.log'
110- initialDelaySeconds : 60
111- periodSeconds : 60
111+ httpGet :
112+ path : /health
113+ port : 9090
114+ periodSeconds : 10
112115 timeoutSeconds : 30
113- failureThreshold : 10
116+ failureThreshold : 60
114117 resources :
115118 requests :
116119 cpu : " 32"
@@ -120,8 +123,21 @@ spec:
120123 cpu : " 32"
121124 memory : " 40Gi"
122125 gpu : " 1"
126+ envs :
127+ - name : DYN_SYSTEM_ENABLED
128+ value : " true"
129+ - name : DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
130+ value : " [\" generate\" ]"
131+ - name : DYN_SYSTEM_PORT
132+ value : " 9090"
123133 extraPodSpec :
124134 mainContainer :
135+ startupProbe :
136+ httpGet :
137+ path : /health
138+ port : 9090
139+ periodSeconds : 10
140+ failureThreshold : 60
125141 image : nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
126142 workingDir : /workspace/components/backends/vllm
127143 command :
Original file line number Diff line number Diff line change @@ -51,24 +51,19 @@ spec:
5151 componentType : worker
5252 replicas : 1
5353 livenessProbe :
54- exec :
55- command :
56- - /bin/sh
57- - -c
58- - " exit 0"
59- periodSeconds : 60
54+ httpGet :
55+ path : /live
56+ port : 9090
57+ periodSeconds : 5
6058 timeoutSeconds : 30
61- failureThreshold : 10
59+ failureThreshold : 1
6260 readinessProbe :
63- exec :
64- command :
65- - /bin/sh
66- - -c
67- - ' grep "VllmWorker.*has been initialized" /tmp/vllm.log'
68- initialDelaySeconds : 60
69- periodSeconds : 60
61+ httpGet :
62+ path : /health
63+ port : 9090
64+ periodSeconds : 10
7065 timeoutSeconds : 30
71- failureThreshold : 10
66+ failureThreshold : 60
7267 resources :
7368 requests :
7469 cpu : " 10"
7873 cpu : " 10"
7974 memory : " 20Gi"
8075 gpu : " 1"
76+ envs :
77+ - name : DYN_SYSTEM_ENABLED
78+ value : " true"
79+ - name : DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
80+ value : " [\" generate\" ]"
81+ - name : DYN_SYSTEM_PORT
82+ value : " 9090"
8183 extraPodSpec :
8284 mainContainer :
85+ startupProbe :
86+ httpGet :
87+ path : /health
88+ port : 9090
89+ periodSeconds : 10
90+ failureThreshold : 60
8391 image : nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
8492 workingDir : /workspace/components/backends/vllm
8593 command :
@@ -93,24 +101,19 @@ spec:
93101 componentType : worker
94102 replicas : 1
95103 livenessProbe :
96- exec :
97- command :
98- - /bin/sh
99- - -c
100- - " exit 0"
101- periodSeconds : 60
104+ httpGet :
105+ path : /health
106+ port : 9090
107+ periodSeconds : 5
102108 timeoutSeconds : 30
103- failureThreshold : 10
109+ failureThreshold : 1
104110 readinessProbe :
105- exec :
106- command :
107- - /bin/sh
108- - -c
109- - ' grep "VllmWorker.*has been initialized" /tmp/vllm.log'
110- initialDelaySeconds : 60
111- periodSeconds : 60
111+ httpGet :
112+ path : /health
113+ port : 9090
114+ periodSeconds : 10
112115 timeoutSeconds : 30
113- failureThreshold : 10
116+ failureThreshold : 60
114117 resources :
115118 requests :
116119 cpu : " 10"
@@ -120,8 +123,21 @@ spec:
120123 cpu : " 10"
121124 memory : " 20Gi"
122125 gpu : " 1"
126+ envs :
127+ - name : DYN_SYSTEM_ENABLED
128+ value : " true"
129+ - name : DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
130+ value : " [\" generate\" ]"
131+ - name : DYN_SYSTEM_PORT
132+ value : " 9090"
123133 extraPodSpec :
124134 mainContainer :
135+ startupProbe :
136+ httpGet :
137+ path : /health
138+ port : 9090
139+ periodSeconds : 10
140+ failureThreshold : 60
125141 image : nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
126142 workingDir : /workspace/components/backends/vllm
127143 command :
You can’t perform that action at this time.
0 commit comments