Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions python/sglang/srt/entrypoints/http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,11 +932,9 @@ def launch_server(
warmup_thread.join()


def _wait_and_warmup(
def _execute_server_warmup(
server_args: ServerArgs,
pipe_finish_writer: Optional[multiprocessing.connection.Connection],
image_token_text: str,
launch_callback: Optional[Callable[[], None]] = None,
):
headers = {}
url = server_args.url()
Expand All @@ -961,7 +959,7 @@ def _wait_and_warmup(
pipe_finish_writer.send(last_traceback)
logger.error(f"Initialization failed. warmup error: {last_traceback}")
kill_process_tree(os.getpid())
return
return success

model_info = res.json()

Expand Down Expand Up @@ -1035,10 +1033,25 @@ def _wait_and_warmup(
pipe_finish_writer.send(last_traceback)
logger.error(f"Initialization failed. warmup error: {last_traceback}")
kill_process_tree(os.getpid())
return
return False

# Debug print
# logger.info(f"warmup request returns: {res.json()=}")
return success


def _wait_and_warmup(
server_args: ServerArgs,
pipe_finish_writer: Optional[multiprocessing.connection.Connection],
image_token_text: str,
launch_callback: Optional[Callable[[], None]] = None,
):
if not server_args.skip_server_warmup:
if not _execute_server_warmup(
server_args,
pipe_finish_writer,
):
return

logger.info("The server is fired up and ready to roll!")

Expand Down
6 changes: 6 additions & 0 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ServerArgs:
tokenizer_path: Optional[str] = None
tokenizer_mode: str = "auto"
skip_tokenizer_init: bool = False
skip_server_warmup: bool = False
load_format: str = "auto"
model_loader_extra_config: str = "{}"
trust_remote_code: bool = False
Expand Down Expand Up @@ -597,6 +598,11 @@ def add_cli_args(parser: argparse.ArgumentParser):
action="store_true",
help="If set, skip init tokenizer and pass input_ids in generate request.",
)
parser.add_argument(
"--skip-server-warmup",
action="store_true",
help="If set, skip warmup.",
)
parser.add_argument(
"--load-format",
type=str,
Expand Down
Loading