Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions python/sglang/srt/entrypoints/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,7 @@ def _launch_subprocesses(
configure_logger(server_args)
_set_envs_and_config(server_args)
server_args.check_server_args()
_set_gc(server_args)

# Allocate ports for inter-process communications
if port_args is None:
Expand Down Expand Up @@ -1179,6 +1180,13 @@ def launch_phase_sigquit_handler(signum, frame):
mp.set_start_method("spawn", force=True)


def _set_gc(server_args: ServerArgs):
if gc_threshold := server_args.gc_threshold:
import gc
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

According to PEP 8, imports should be at the top of the file. Please move import gc to the top of python/sglang/srt/entrypoints/engine.py for better code style and consistency.


gc.set_threshold(*gc_threshold)


def _wait_for_scheduler_ready(
scheduler_pipe_readers: List,
scheduler_procs: List,
Expand Down
13 changes: 13 additions & 0 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ class ServerArgs:
enable_deterministic_inference: bool = False
rl_on_policy_target: Optional[str] = None
enable_attn_tp_input_scattered: bool = False
gc_threshold: Optional[List[int]] = None
# Context parallelism used in the long sequence prefill phase of DeepSeek v3.2
enable_nsa_prefill_context_parallel: bool = False
nsa_prefill_cp_mode: str = "round-robin-split"
Expand Down Expand Up @@ -5600,6 +5601,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
action="store_true",
help="Enable fused moe triton and sum all reduce.",
)
parser.add_argument(
"--gc-threshold",
type=int,
nargs="+",
help="Set the garbage collection thresholds (the collection frequency). Accepts 1 to 3 integers.",
)

# Dynamic batch tokenizer
parser.add_argument(
Expand Down Expand Up @@ -6116,6 +6123,12 @@ def check_server_args(self):
"When enabling two batch overlap, moe_a2a_backend cannot be 'none'."
)

if self.gc_threshold:
if not (1 <= len(self.gc_threshold) <= 3):
raise ValueError(
"When setting gc_threshold, it must contain 1 to 3 integers."
)

def check_lora_server_args(self):
assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"

Expand Down
Loading