Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/sglang/srt/managers/eplb_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def __init__(self, model_runner: "ModelRunner"):
<= self._server_args.expert_distribution_recorder_buffer_size
), "eplb_rebalance_num_iterations must be less than expert_distribution_recorder_buffer_size"

get_global_expert_distribution_recorder().start_record()
if not get_global_expert_distribution_recorder().recording:
get_global_expert_distribution_recorder().start_record()

logger.info(
f"[EPLBManager] system started, will rebalance per {self._server_args.eplb_rebalance_num_iterations} iterations."
Expand Down
14 changes: 14 additions & 0 deletions python/sglang/srt/managers/expert_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ def stop_record(self):
def dump_record(self, output_mode: _OutputMode = "file"):
self._on_not_implemented()

@property
def recording(self):
return False
Copy link
Collaborator

@ch-wan ch-wan May 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it raise an error here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

theoretically speaking the dummy class is really recording=false, so I am ok for both raise and return false


def _on_not_implemented(self):
raise Exception(
"Please set ServerArgs.expert_distribution_recorder_mode to use ExpertDistributionRecorder."
Expand Down Expand Up @@ -123,6 +127,12 @@ def __init__(
for k in self._accumulator.get_single_pass_gatherer_keys()
}

if server_args.enable_expert_distribution_metrics:
logger.info(
"ExpertDistributionRecorder auto start record since enable_expert_distribution_metrics"
)
self.start_record()

def with_current_layer(self, layer_idx):
return self._current_layer_idx.with_value(layer_idx)

Expand Down Expand Up @@ -221,6 +231,10 @@ def dump_record(self, output_mode: _OutputMode = "file"):
self._reset()
return output

@property
def recording(self):
return self._recording


_global_expert_distribution_recorder: Optional[ExpertDistributionRecorder] = (
_ExpertDistributionRecorderNoop()
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--deepep-config",
type=str,
default=ServerArgs.deepep_config,
help="Tuned DeepEP config suitable for your own cluster.",
help="Tuned DeepEP config suitable for your own cluster. It can be either a string with JSON content or a file path.",
)

parser.add_argument(
Expand Down
Loading