Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/sglang/bench_offline_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class BenchArgs:
extra_request_body: Optional[str] = None
apply_chat_template: bool = False
profile: bool = False
skip_warmup: bool = False
skip_bench_warmup: bool = False
do_not_exit: bool = False
prompt_suffix: str = ""

Expand Down Expand Up @@ -172,7 +172,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"SGLANG_TORCH_PROFILER_DIR to enable profiler.",
)
parser.add_argument(
"--skip-warmup",
"--skip-bench-warmup",
action="store_true",
help="Skip the warmup batches.",
)
Expand Down Expand Up @@ -346,7 +346,7 @@ def throughput_test(
)

# Warm up
if not bench_args.skip_warmup:
if not bench_args.skip_bench_warmup:
logging.info("\nWarmup...")
throughput_test_once(
backend_name=bench_args.backend,
Expand Down
6 changes: 3 additions & 3 deletions python/sglang/bench_one_batch_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BenchArgs:
input_len_step_percentage: float = 0.0
result_filename: str = "result.jsonl"
base_url: str = ""
skip_warmup: bool = False
skip_bench_warmup: bool = False
show_report: bool = False
profile: bool = False
profile_by_stage: bool = False
Expand Down Expand Up @@ -75,7 +75,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--result-filename", type=str, default=BenchArgs.result_filename
)
parser.add_argument("--base-url", type=str, default=BenchArgs.base_url)
parser.add_argument("--skip-warmup", action="store_true")
parser.add_argument("--skip-bench-warmup", action="store_true")
parser.add_argument("--show-report", action="store_true")
parser.add_argument("--profile", action="store_true")
parser.add_argument("--profile-by-stage", action="store_true")
Expand Down Expand Up @@ -258,7 +258,7 @@ def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):
tokenizer = get_tokenizer(tokenizer_path)

# warmup
if not bench_args.skip_warmup:
if not bench_args.skip_bench_warmup:
print("=" * 8 + " Warmup Begin " + "=" * 8)
run_one_case(
base_url,
Expand Down
23 changes: 18 additions & 5 deletions python/sglang/srt/entrypoints/http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,11 +932,9 @@ def launch_server(
warmup_thread.join()


def _wait_and_warmup(
def _execute_server_warmup(
server_args: ServerArgs,
pipe_finish_writer: Optional[multiprocessing.connection.Connection],
image_token_text: str,
launch_callback: Optional[Callable[[], None]] = None,
):
headers = {}
url = server_args.url()
Expand All @@ -961,7 +959,7 @@ def _wait_and_warmup(
pipe_finish_writer.send(last_traceback)
logger.error(f"Initialization failed. warmup error: {last_traceback}")
kill_process_tree(os.getpid())
return
return success

model_info = res.json()

Expand Down Expand Up @@ -1035,10 +1033,25 @@ def _wait_and_warmup(
pipe_finish_writer.send(last_traceback)
logger.error(f"Initialization failed. warmup error: {last_traceback}")
kill_process_tree(os.getpid())
return
return False

# Debug print
# logger.info(f"warmup request returns: {res.json()=}")
return success


def _wait_and_warmup(
server_args: ServerArgs,
pipe_finish_writer: Optional[multiprocessing.connection.Connection],
image_token_text: str,
launch_callback: Optional[Callable[[], None]] = None,
):
if not server_args.skip_warmup:
if not _execute_server_warmup(
server_args,
pipe_finish_writer,
):
return

logger.info("The server is fired up and ready to roll!")

Expand Down
6 changes: 6 additions & 0 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class ServerArgs:
tokenizer_path: Optional[str] = None
tokenizer_mode: str = "auto"
skip_tokenizer_init: bool = False
skip_warmup: bool = False
load_format: str = "auto"
model_loader_extra_config: str = "{}"
trust_remote_code: bool = False
Expand Down Expand Up @@ -597,6 +598,11 @@ def add_cli_args(parser: argparse.ArgumentParser):
action="store_true",
help="If set, skip init tokenizer and pass input_ids in generate request.",
)
parser.add_argument(
"--skip-warmup",
action="store_true",
help="If set, skip warmup.",
)
parser.add_argument(
"--load-format",
type=str,
Expand Down
Loading