From ed06de57a39f9e33fda14d9eb2afc933bd438ab3 Mon Sep 17 00:00:00 2001 From: Muqi Li Date: Thu, 26 Mar 2026 21:03:18 +0800 Subject: [PATCH 1/2] Add gc_threshold arg Co-authored-by: zyc2 yvnenvy@gmail.com --- python/sglang/srt/entrypoints/engine.py | 8 ++++++++ python/sglang/srt/server_args.py | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/python/sglang/srt/entrypoints/engine.py b/python/sglang/srt/entrypoints/engine.py index 35ebbf1bcc71..bf4b24fa05a2 100644 --- a/python/sglang/srt/entrypoints/engine.py +++ b/python/sglang/srt/entrypoints/engine.py @@ -616,6 +616,7 @@ def _launch_subprocesses( configure_logger(server_args) _set_envs_and_config(server_args) server_args.check_server_args() + _set_gc(server_args) # Allocate ports for inter-process communications if port_args is None: @@ -1179,6 +1180,13 @@ def launch_phase_sigquit_handler(signum, frame): mp.set_start_method("spawn", force=True) +def _set_gc(server_args: ServerArgs): + if gc_threshold := server_args.gc_threshold: + import gc + + gc.set_threshold(*gc_threshold) + + def _wait_for_scheduler_ready( scheduler_pipe_readers: List, scheduler_procs: List, diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index c770f3d161f4..738294c3be26 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -661,6 +661,7 @@ class ServerArgs: enable_deterministic_inference: bool = False rl_on_policy_target: Optional[str] = None enable_attn_tp_input_scattered: bool = False + gc_threshold: Optional[List[int]] = None # Context parallelism used in the long sequence prefill phase of DeepSeek v3.2 enable_nsa_prefill_context_parallel: bool = False nsa_prefill_cp_mode: str = "round-robin-split" @@ -5600,6 +5601,12 @@ def add_cli_args(parser: argparse.ArgumentParser): action="store_true", help="Enable fused moe triton and sum all reduce.", ) + parser.add_argument( + "--gc-threshold", + type=int, + nargs="+", + help="Set the garbage collection thresholds(the collection frequency)", + ) # Dynamic batch tokenizer parser.add_argument( @@ -6116,6 +6123,12 @@ def check_server_args(self): "When enabling two batch overlap, moe_a2a_backend cannot be 'none'." ) + if self.gc_threshold: + if not (1 <= len(self.gc_threshold) <= 3): + raise ValueError( + "When setting gc_threshold, it must contain 1 to 3 integers." + ) + def check_lora_server_args(self): assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive" From 72f0c3c7c949a9445dbb8429ee45a5feb762cc67 Mon Sep 17 00:00:00 2001 From: Muqi Li Date: Thu, 26 Mar 2026 22:12:11 +0800 Subject: [PATCH 2/2] Update python/sglang/srt/server_args.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- python/sglang/srt/server_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 738294c3be26..d9210c65647a 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -5605,7 +5605,7 @@ def add_cli_args(parser: argparse.ArgumentParser): "--gc-threshold", type=int, nargs="+", - help="Set the garbage collection thresholds(the collection frequency)", + help="Set the garbage collection thresholds (the collection frequency). Accepts 1 to 3 integers.", ) # Dynamic batch tokenizer