File tree Expand file tree Collapse file tree 2 files changed +24
-5
lines changed Expand file tree Collapse file tree 2 files changed +24
-5
lines changed Original file line number Diff line number Diff line change @@ -932,11 +932,9 @@ def launch_server(
932932 warmup_thread .join ()
933933
934934
935- def _wait_and_warmup (
935+ def _execute_server_warmup (
936936 server_args : ServerArgs ,
937937 pipe_finish_writer : Optional [multiprocessing .connection .Connection ],
938- image_token_text : str ,
939- launch_callback : Optional [Callable [[], None ]] = None ,
940938):
941939 headers = {}
942940 url = server_args .url ()
@@ -961,7 +959,7 @@ def _wait_and_warmup(
961959 pipe_finish_writer .send (last_traceback )
962960 logger .error (f"Initialization failed. warmup error: { last_traceback } " )
963961 kill_process_tree (os .getpid ())
964- return
962+ return success
965963
966964 model_info = res .json ()
967965
@@ -1035,10 +1033,25 @@ def _wait_and_warmup(
10351033 pipe_finish_writer .send (last_traceback )
10361034 logger .error (f"Initialization failed. warmup error: { last_traceback } " )
10371035 kill_process_tree (os .getpid ())
1038- return
1036+ return False
10391037
10401038 # Debug print
10411039 # logger.info(f"warmup request returns: {res.json()=}")
1040+ return success
1041+
1042+
1043+ def _wait_and_warmup (
1044+ server_args : ServerArgs ,
1045+ pipe_finish_writer : Optional [multiprocessing .connection .Connection ],
1046+ image_token_text : str ,
1047+ launch_callback : Optional [Callable [[], None ]] = None ,
1048+ ):
1049+ if not server_args .skip_server_warmup :
1050+ if not _execute_server_warmup (
1051+ server_args ,
1052+ pipe_finish_writer ,
1053+ ):
1054+ return
10421055
10431056 logger .info ("The server is fired up and ready to roll!" )
10441057
Original file line number Diff line number Diff line change @@ -46,6 +46,7 @@ class ServerArgs:
4646 tokenizer_path : Optional [str ] = None
4747 tokenizer_mode : str = "auto"
4848 skip_tokenizer_init : bool = False
49+ skip_server_warmup : bool = False
4950 load_format : str = "auto"
5051 model_loader_extra_config : str = "{}"
5152 trust_remote_code : bool = False
@@ -597,6 +598,11 @@ def add_cli_args(parser: argparse.ArgumentParser):
597598 action = "store_true" ,
598599 help = "If set, skip init tokenizer and pass input_ids in generate request." ,
599600 )
601+ parser .add_argument (
602+ "--skip-server-warmup" ,
603+ action = "store_true" ,
604+ help = "If set, skip warmup." ,
605+ )
600606 parser .add_argument (
601607 "--load-format" ,
602608 type = str ,
You can’t perform that action at this time.
0 commit comments