File tree Expand file tree Collapse file tree 1 file changed +8
-5
lines changed Expand file tree Collapse file tree 1 file changed +8
-5
lines changed Original file line number Diff line number Diff line change @@ -140,11 +140,16 @@ def run_multi_api_server(args: argparse.Namespace):
140140 num_api_servers = args .api_server_count
141141 assert num_api_servers > 0
142142
143+ orig_disable_mm_preprocessor_cache = args .disable_mm_preprocessor_cache
144+
143145 # set_process_title("ProcManager")
144146
145147 if num_api_servers > 1 :
146148 setup_multiprocess_prometheus ()
147149
150+ # Not compatible with API server scale-out
151+ args .disable_mm_preprocessor_cache = True
152+
148153 listen_address , sock = setup_server (args )
149154
150155 engine_args = vllm .AsyncEngineArgs .from_cli_args (args )
@@ -161,11 +166,9 @@ def run_multi_api_server(args: argparse.Namespace):
161166 "with api_server_count > 1" )
162167
163168 if model_config .is_multimodal_model and not (
164- model_config .disable_mm_preprocessor_cache ):
165- logger .warning (
166- "Multi-model preprocessor cache will be disabled for"
167- " api_server_count > 1" )
168- model_config .disable_mm_preprocessor_cache = True
169+ orig_disable_mm_preprocessor_cache ):
170+ logger .warning ("Multi-model preprocessor cache will be disabled "
171+ "for api_server_count > 1" )
169172
170173 executor_class = Executor .get_class (vllm_config )
171174 log_stats = not engine_args .disable_log_stats
You can’t perform that action at this time.
0 commit comments