Skip to content
6 changes: 5 additions & 1 deletion python/sglang/srt/model_executor/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,9 +1348,13 @@ def load_model(self):
f"mem usage={self.weight_load_mem_usage:.2f} GB."
)
if self.server_args.debug_tensor_dump_output_folder is not None:
dump_folder = self.server_args.debug_tensor_dump_output_folder
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: possibly worth to document this behavior in self.server_args.debug_tensor_dump_output_folder help docstring

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good suggestion, thanks!
I've added clarification to the help docstring regarding the behavior in Eagle mode.
In addition, I submitted a PR to update the documentation in sgl-project.github.io to make this behavior more explicit:
sgl-project/sgl-project.github.io#26

if self.spec_algorithm.is_eagle():
role = "draft" if self.is_draft_worker else "target"
dump_folder = os.path.join(dump_folder, role)
register_forward_hook_for_model(
self.model,
self.server_args.debug_tensor_dump_output_folder,
dump_folder,
self.server_args.debug_tensor_dump_layers,
self.tp_size,
self.tp_rank,
Expand Down
6 changes: 5 additions & 1 deletion python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -6096,7 +6096,11 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--debug-tensor-dump-output-folder",
type=str,
default=ServerArgs.debug_tensor_dump_output_folder,
help="The output folder for dumping tensors.",
help=(
"The output folder for dumping tensors. "
"In Eagle mode, tensor outputs from draft and target models "
"are stored in separate subdirectories ('draft' and 'target')."
),
)
parser.add_argument(
"--debug-tensor-dump-layers",
Expand Down
Loading