diff --git a/python/sglang/srt/managers/eplb_manager.py b/python/sglang/srt/managers/eplb_manager.py index d9e5032647e..fd136df20f6 100644 --- a/python/sglang/srt/managers/eplb_manager.py +++ b/python/sglang/srt/managers/eplb_manager.py @@ -27,7 +27,8 @@ def __init__(self, model_runner: "ModelRunner"): <= self._server_args.expert_distribution_recorder_buffer_size ), "eplb_rebalance_num_iterations must be less than expert_distribution_recorder_buffer_size" - get_global_expert_distribution_recorder().start_record() + if not get_global_expert_distribution_recorder().recording: + get_global_expert_distribution_recorder().start_record() logger.info( f"[EPLBManager] system started, will rebalance per {self._server_args.eplb_rebalance_num_iterations} iterations." diff --git a/python/sglang/srt/managers/expert_distribution.py b/python/sglang/srt/managers/expert_distribution.py index a36ca58506d..59206117fae 100644 --- a/python/sglang/srt/managers/expert_distribution.py +++ b/python/sglang/srt/managers/expert_distribution.py @@ -91,6 +91,10 @@ def stop_record(self): def dump_record(self, output_mode: _OutputMode = "file"): self._on_not_implemented() + @property + def recording(self): + return False + def _on_not_implemented(self): raise Exception( "Please set ServerArgs.expert_distribution_recorder_mode to use ExpertDistributionRecorder." @@ -123,6 +127,12 @@ def __init__( for k in self._accumulator.get_single_pass_gatherer_keys() } + if server_args.enable_expert_distribution_metrics: + logger.info( + "ExpertDistributionRecorder auto start record since enable_expert_distribution_metrics" + ) + self.start_record() + def with_current_layer(self, layer_idx): return self._current_layer_idx.with_value(layer_idx) @@ -221,6 +231,10 @@ def dump_record(self, output_mode: _OutputMode = "file"): self._reset() return output + @property + def recording(self): + return self._recording + _global_expert_distribution_recorder: Optional[ExpertDistributionRecorder] = ( _ExpertDistributionRecorderNoop() diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 6b15d702a59..c286139eb67 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -1348,7 +1348,7 @@ def add_cli_args(parser: argparse.ArgumentParser): "--deepep-config", type=str, default=ServerArgs.deepep_config, - help="Tuned DeepEP config suitable for your own cluster.", + help="Tuned DeepEP config suitable for your own cluster. It can be either a string with JSON content or a file path.", ) parser.add_argument(