Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
265 commits
Select commit Hold shift + click to select a range
95d1b38
more
fzyzcjy Apr 1, 2025
d801823
more
fzyzcjy Apr 1, 2025
dfa311d
more
fzyzcjy Apr 1, 2025
21dcb36
more
fzyzcjy Apr 1, 2025
f3484c8
more
fzyzcjy Apr 1, 2025
cfe4ec2
more
fzyzcjy Apr 1, 2025
acdadc7
more
fzyzcjy Apr 1, 2025
1357690
more
fzyzcjy Apr 1, 2025
10a9b2a
more
fzyzcjy Apr 1, 2025
eb4ad45
more
fzyzcjy Apr 1, 2025
12b75ff
more
fzyzcjy Apr 1, 2025
61fb364
more
fzyzcjy Apr 1, 2025
0c2263c
more
fzyzcjy Apr 1, 2025
6a0f338
more
fzyzcjy Apr 1, 2025
86861d6
more
fzyzcjy Apr 1, 2025
c3d806a
more
fzyzcjy Apr 1, 2025
99a76c9
more
fzyzcjy Apr 1, 2025
8d6f2cb
more
fzyzcjy Apr 1, 2025
47c010c
more
fzyzcjy Apr 1, 2025
4a064a2
more
fzyzcjy Apr 1, 2025
634f28f
more
fzyzcjy Apr 1, 2025
a495780
more
fzyzcjy Apr 1, 2025
03ae2ed
more
fzyzcjy Apr 1, 2025
7ece402
more
fzyzcjy Apr 1, 2025
057d740
more
fzyzcjy Apr 1, 2025
e97bd89
more
fzyzcjy Apr 1, 2025
c0d97d6
more
fzyzcjy Apr 1, 2025
bd03337
more
fzyzcjy Apr 1, 2025
de8f68e
more
fzyzcjy Apr 1, 2025
a81db0d
more
fzyzcjy Apr 1, 2025
2b7cc46
more
fzyzcjy Apr 1, 2025
cbeae3a
more
fzyzcjy Apr 1, 2025
ab7eeef
more
fzyzcjy Apr 1, 2025
3b7b887
more
fzyzcjy Apr 1, 2025
56f5e09
more
fzyzcjy Apr 1, 2025
d1716ae
more
fzyzcjy Apr 1, 2025
942b8e2
more
fzyzcjy Apr 1, 2025
1658696
more
fzyzcjy Apr 1, 2025
6b595d3
more
fzyzcjy Apr 1, 2025
b84e2ca
more
fzyzcjy Apr 1, 2025
29b8f4a
more
fzyzcjy Apr 1, 2025
7a5f544
more
fzyzcjy Apr 1, 2025
de3d02a
more
fzyzcjy Apr 1, 2025
f806ac8
more
fzyzcjy Apr 1, 2025
b18639c
more
fzyzcjy Apr 1, 2025
c712bbd
more
fzyzcjy Apr 1, 2025
05305b2
more
fzyzcjy Apr 1, 2025
14fdd55
more
fzyzcjy Apr 1, 2025
b392cb9
more
fzyzcjy Apr 1, 2025
594b751
more
fzyzcjy Apr 1, 2025
f09eb0b
more
fzyzcjy Apr 1, 2025
2c73330
more
fzyzcjy Apr 1, 2025
a6cc800
more
fzyzcjy Apr 1, 2025
5dfc75b
more
fzyzcjy Apr 1, 2025
e13b536
more
fzyzcjy Apr 1, 2025
a47caa5
more
fzyzcjy Apr 1, 2025
4c662b6
more
fzyzcjy Apr 1, 2025
4387c3f
more
fzyzcjy Apr 1, 2025
e86f54f
more
fzyzcjy Apr 1, 2025
4d74806
more
fzyzcjy Apr 1, 2025
925c0c4
more
fzyzcjy Apr 1, 2025
ef7b83e
more
fzyzcjy Apr 1, 2025
d6b7aa9
more
fzyzcjy Apr 1, 2025
db7222e
more
fzyzcjy Apr 1, 2025
94e2ff2
more
fzyzcjy Apr 1, 2025
a92833f
more
fzyzcjy Apr 1, 2025
e3f4ac4
more
fzyzcjy Apr 1, 2025
34fc042
more
fzyzcjy Apr 1, 2025
5f8ad32
more
fzyzcjy Apr 1, 2025
1e48c81
more
fzyzcjy Apr 1, 2025
097c654
more
fzyzcjy Apr 1, 2025
fae53ff
more
fzyzcjy Apr 1, 2025
b84adaf
more
fzyzcjy Apr 1, 2025
bafc37b
more
fzyzcjy Apr 1, 2025
0ddaadb
more
fzyzcjy Apr 1, 2025
9e845f8
more
fzyzcjy Apr 1, 2025
9f7d540
more
fzyzcjy Apr 1, 2025
32b9dc2
more
fzyzcjy Apr 1, 2025
b6ea1d7
more
fzyzcjy Apr 1, 2025
32ac6ef
more
fzyzcjy Apr 1, 2025
c86c997
more
fzyzcjy Apr 1, 2025
a95be45
more
fzyzcjy Apr 1, 2025
d7f7ba2
more
fzyzcjy Apr 1, 2025
a449b80
more
fzyzcjy Apr 1, 2025
137e8cd
more
fzyzcjy Apr 1, 2025
3b092be
more
fzyzcjy Apr 1, 2025
a62ab02
more
fzyzcjy Apr 1, 2025
6660cd5
more
fzyzcjy Apr 1, 2025
f8ae307
more
fzyzcjy Apr 1, 2025
2304780
more
fzyzcjy Apr 1, 2025
7e4bd01
more
fzyzcjy Apr 1, 2025
df8524f
more
fzyzcjy Apr 1, 2025
189de3f
more
fzyzcjy Apr 1, 2025
e867ace
more
fzyzcjy Apr 1, 2025
9cf2f0b
more
fzyzcjy Apr 1, 2025
0c03f39
more
fzyzcjy Apr 1, 2025
b7864ad
more
fzyzcjy Apr 1, 2025
46e673b
more
fzyzcjy Apr 1, 2025
7af777e
more
fzyzcjy Apr 1, 2025
0363d39
more
fzyzcjy Apr 1, 2025
dde1118
more
fzyzcjy Apr 1, 2025
bb5da82
more
fzyzcjy Apr 1, 2025
b3c14d7
more
fzyzcjy Apr 1, 2025
90f723d
more
fzyzcjy Apr 1, 2025
00a93e3
more
fzyzcjy Apr 1, 2025
68f0b65
more
fzyzcjy Apr 1, 2025
c9a8ac2
more
fzyzcjy Apr 1, 2025
0a22597
more
fzyzcjy Apr 1, 2025
4408c78
more
fzyzcjy Apr 1, 2025
ffda545
more
fzyzcjy Apr 1, 2025
61d5198
more
fzyzcjy Apr 1, 2025
7525f70
more
fzyzcjy Apr 1, 2025
739f0b3
more
fzyzcjy Apr 1, 2025
05ed6b9
more
fzyzcjy Apr 1, 2025
3a87c40
more
fzyzcjy Apr 1, 2025
bbd55ce
more
fzyzcjy Apr 1, 2025
9f67203
more
fzyzcjy Apr 1, 2025
329927f
more
fzyzcjy Apr 1, 2025
ab5e799
more
fzyzcjy Apr 1, 2025
1d24fc2
more
fzyzcjy Apr 1, 2025
d3f6e7b
more
fzyzcjy Apr 1, 2025
b14482b
more
fzyzcjy Apr 1, 2025
7d8ecfa
more
fzyzcjy Apr 1, 2025
e3ca65f
more
fzyzcjy Apr 1, 2025
3d5610a
more
fzyzcjy Apr 1, 2025
9f2d044
more
fzyzcjy Apr 1, 2025
6f16852
more
fzyzcjy Apr 1, 2025
1306e2d
more
fzyzcjy Apr 1, 2025
fe482a1
more
fzyzcjy Apr 1, 2025
4f869a2
more
fzyzcjy Apr 1, 2025
5a17b17
more
fzyzcjy Apr 1, 2025
bb79fed
more
fzyzcjy Apr 1, 2025
486ec0f
more
fzyzcjy Apr 1, 2025
5c3a476
fmt
fzyzcjy Apr 1, 2025
cabeddb
bump ci
fzyzcjy Apr 1, 2025
7821819
fix ci
fzyzcjy Apr 1, 2025
ec98f75
fix ci
fzyzcjy Apr 1, 2025
c8bfea5
Merge branch 'main' into feat/expert_distribution_recorder
fzyzcjy Apr 1, 2025
1e5dfbc
fmt
fzyzcjy Apr 1, 2025
9fc8f9f
Merge branch 'main-upstream' into feat/expert_distribution_recorder
fzyzcjy Apr 10, 2025
72e47a1
more
fzyzcjy Apr 10, 2025
1dbde71
more
fzyzcjy Apr 10, 2025
e07a323
more
fzyzcjy Apr 10, 2025
96d9183
more
fzyzcjy Apr 10, 2025
3c6d511
more
fzyzcjy Apr 10, 2025
5036994
more
fzyzcjy Apr 10, 2025
15cdeea
more
fzyzcjy Apr 10, 2025
8fe42ea
more
fzyzcjy Apr 10, 2025
0d17f33
more
fzyzcjy Apr 10, 2025
2e0ca4f
more
fzyzcjy Apr 10, 2025
fe90008
more
fzyzcjy Apr 10, 2025
2d533bb
more
fzyzcjy Apr 10, 2025
b9f368f
more
fzyzcjy Apr 10, 2025
12a0270
fmt
fzyzcjy Apr 10, 2025
dc09c51
more
fzyzcjy Apr 10, 2025
46d4e3e
more
fzyzcjy Apr 10, 2025
0e579a3
more
fzyzcjy Apr 10, 2025
2fd3c19
more
fzyzcjy Apr 10, 2025
d053592
fmt
fzyzcjy Apr 10, 2025
3f8befd
Merge branch 'feat/model_config_from_server_args' into feat/expert_di…
fzyzcjy Apr 10, 2025
8fc36d8
more
fzyzcjy Apr 10, 2025
c4ebfae
more
fzyzcjy Apr 10, 2025
a787fbd
more
fzyzcjy Apr 10, 2025
5e82540
more
fzyzcjy Apr 10, 2025
e4373ee
more
fzyzcjy Apr 10, 2025
0adb6df
more
fzyzcjy Apr 10, 2025
5cb974a
more
fzyzcjy Apr 10, 2025
4571aa1
more
fzyzcjy Apr 10, 2025
f61cfc8
more
fzyzcjy Apr 10, 2025
9ec9519
more
fzyzcjy Apr 10, 2025
114c138
more
fzyzcjy Apr 10, 2025
b14f151
more
fzyzcjy Apr 10, 2025
e267065
more
fzyzcjy Apr 10, 2025
4033e92
more
fzyzcjy Apr 10, 2025
384cfd4
more
fzyzcjy Apr 10, 2025
8114618
more
fzyzcjy Apr 10, 2025
fc11b7a
fmt
fzyzcjy Apr 10, 2025
df0611f
more
fzyzcjy Apr 10, 2025
204351d
more
fzyzcjy Apr 10, 2025
78af10c
more
fzyzcjy Apr 10, 2025
5df2e41
more
fzyzcjy Apr 10, 2025
c302cf3
more
fzyzcjy Apr 10, 2025
bc4ace9
more
fzyzcjy Apr 10, 2025
a9f5f54
fmt
fzyzcjy Apr 10, 2025
13d8419
Merge branch 'feat/expert_distribution_recorder' into feat/phy_vs_log…
fzyzcjy Apr 10, 2025
d542934
more
fzyzcjy Apr 10, 2025
ad5a7d5
more
fzyzcjy Apr 10, 2025
fb9b8b1
Revert "more"
fzyzcjy Apr 10, 2025
f0ec4fd
fmt
fzyzcjy Apr 10, 2025
e33bbf2
more
fzyzcjy Apr 10, 2025
8ee30d3
more
fzyzcjy Apr 10, 2025
af9291f
more
fzyzcjy Apr 10, 2025
4e6803c
more
fzyzcjy Apr 10, 2025
c8db6f8
more
fzyzcjy Apr 10, 2025
b44d974
more
fzyzcjy Apr 10, 2025
8dc939f
more
fzyzcjy Apr 10, 2025
a12276c
more
fzyzcjy Apr 10, 2025
3922251
more
fzyzcjy Apr 10, 2025
4a82f7a
more
fzyzcjy Apr 10, 2025
fba78f3
Revert "fmt"
fzyzcjy Apr 10, 2025
b46ebce
Revert "Revert "more""
fzyzcjy Apr 10, 2025
be9c6b3
Revert "more"
fzyzcjy Apr 10, 2025
79a6b78
Revert "more"
fzyzcjy Apr 10, 2025
609b9f2
fmt
fzyzcjy Apr 10, 2025
6020a10
Merge branch 'feat/expert_distribution_recorder' into feat/phy_vs_log…
fzyzcjy Apr 10, 2025
1cbfdf7
more
fzyzcjy Apr 10, 2025
56b2b80
more
fzyzcjy Apr 10, 2025
77fc845
more
fzyzcjy Apr 10, 2025
f7c96de
more
fzyzcjy Apr 10, 2025
66f641a
more
fzyzcjy Apr 10, 2025
4c4ee66
more
fzyzcjy Apr 10, 2025
59d74fc
more
fzyzcjy Apr 10, 2025
b642d61
more
fzyzcjy Apr 10, 2025
8952dae
more
fzyzcjy Apr 10, 2025
8e8dd6c
fmt
fzyzcjy Apr 10, 2025
3dfd359
more
fzyzcjy Apr 10, 2025
740b8e7
more
fzyzcjy Apr 10, 2025
5a6305e
more
fzyzcjy Apr 10, 2025
2b2da31
more
fzyzcjy Apr 10, 2025
1ea467b
more
fzyzcjy Apr 10, 2025
a90120a
more
fzyzcjy Apr 10, 2025
a4d47fa
more
fzyzcjy Apr 10, 2025
6948dab
fmt
fzyzcjy Apr 10, 2025
351bf69
more
fzyzcjy Apr 10, 2025
dfca693
more
fzyzcjy Apr 10, 2025
e5e955e
more
fzyzcjy Apr 10, 2025
cae8461
more
fzyzcjy Apr 10, 2025
d38fda2
more
fzyzcjy Apr 10, 2025
1f04f71
more
fzyzcjy Apr 10, 2025
ec13124
more
fzyzcjy Apr 10, 2025
a9cb3fa
more
fzyzcjy Apr 10, 2025
66a6702
more
fzyzcjy Apr 10, 2025
fa0ee76
rm
fzyzcjy Apr 10, 2025
52e820c
rm
fzyzcjy Apr 10, 2025
892630f
Merge branch 'feat/expert_distribution_recorder' into feat/phy_vs_log…
fzyzcjy Apr 10, 2025
5767e79
cherry pick
fzyzcjy Apr 10, 2025
f7be532
rm
fzyzcjy Apr 10, 2025
cf76067
more
fzyzcjy Apr 10, 2025
ee32245
more
fzyzcjy Apr 10, 2025
7ad2a48
more
fzyzcjy Apr 10, 2025
d845b4e
more
fzyzcjy Apr 10, 2025
b7a52d5
more
fzyzcjy Apr 10, 2025
da995b1
more
fzyzcjy Apr 10, 2025
abbb2af
more
fzyzcjy Apr 10, 2025
eedfcb1
more
fzyzcjy Apr 10, 2025
3852d05
more
fzyzcjy Apr 10, 2025
cd4caee
more
fzyzcjy Apr 10, 2025
031ce90
more
fzyzcjy Apr 10, 2025
4623be4
more
fzyzcjy Apr 10, 2025
c19f4d8
fmt
fzyzcjy Apr 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions docs/backend/native_api.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -408,19 +408,7 @@
"print_highlight(response)\n",
"\n",
"response = requests.post(f\"http://localhost:{port}/dump_expert_distribution_record\")\n",
"print_highlight(response)\n",
"\n",
"import glob\n",
"\n",
"output_file = glob.glob(\"expert_distribution_*.csv\")[0]\n",
"with open(output_file, \"r\") as f:\n",
" print_highlight(\"\\n| Layer ID | Expert ID | Count |\")\n",
" print_highlight(\"|----------|-----------|--------|\")\n",
" next(f)\n",
" for i, line in enumerate(f):\n",
" if i < 9:\n",
" layer_id, expert_id, count = line.strip().split(\",\")\n",
" print_highlight(f\"| {layer_id:8} | {expert_id:9} | {count:6} |\")"
"print_highlight(response)"
]
},
{
Expand Down
11 changes: 1 addition & 10 deletions python/sglang/bench_one_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,7 @@ def load_model(server_args, port_args, tp_rank):
suppress_other_loggers()
rank_print = print if tp_rank == 0 else lambda *args, **kwargs: None

model_config = ModelConfig(
server_args.model_path,
trust_remote_code=server_args.trust_remote_code,
revision=server_args.revision,
context_length=server_args.context_length,
model_override_args=server_args.json_model_override_args,
is_embedding=server_args.is_embedding,
dtype=server_args.dtype,
quantization=server_args.quantization,
)
model_config = ModelConfig.from_server_args(server_args)
model_runner = ModelRunner(
model_config=model_config,
mem_fraction_static=server_args.mem_fraction_static,
Expand Down
14 changes: 14 additions & 0 deletions python/sglang/srt/configs/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from sglang.srt.hf_transformers_utils import get_config, get_context_length
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import get_bool_env_var, is_hip

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -171,6 +172,19 @@ def __init__(
self.hf_eos_token_id = self.get_hf_eos_token_id()
self.image_token_id = getattr(self.hf_config, "image_token_id", None)

@staticmethod
def from_server_args(server_args: ServerArgs, model_path: str = None):
return ModelConfig(
model_path=model_path or server_args.model_path,
trust_remote_code=server_args.trust_remote_code,
revision=server_args.revision,
context_length=server_args.context_length,
model_override_args=server_args.json_model_override_args,
is_embedding=server_args.is_embedding,
dtype=server_args.dtype,
quantization=server_args.quantization,
)

# adapted from https://github.com/vllm-project/vllm/blob/main/vllm/config.py#L289
def get_total_num_kv_heads(self) -> int:
"""Returns the total number of KV heads."""
Expand Down
22 changes: 19 additions & 3 deletions python/sglang/srt/entrypoints/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
import zmq.asyncio
from PIL.Image import Image

from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.managers.expert_location import ExpertLocationMetadata

# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)

Expand Down Expand Up @@ -495,6 +498,9 @@ def _launch_subprocesses(
server_args.model_path, server_args.tokenizer_path
)

model_config = ModelConfig.from_server_args(server_args)
expert_location_metadata = ExpertLocationMetadata.from_model_config(model_config)

scheduler_procs = []
if server_args.dp_size == 1:
# Launch tensor parallel scheduler processes
Expand All @@ -516,7 +522,15 @@ def _launch_subprocesses(
)
proc = mp.Process(
target=run_scheduler_process,
args=(server_args, port_args, gpu_id, tp_rank, None, writer),
args=(
server_args,
port_args,
expert_location_metadata,
gpu_id,
tp_rank,
None,
writer,
),
)
with memory_saver_adapter.configure_subprocess():
proc.start()
Expand All @@ -528,7 +542,7 @@ def _launch_subprocesses(
scheduler_pipe_readers = [reader]
proc = mp.Process(
target=run_data_parallel_controller_process,
args=(server_args, port_args, writer),
args=(server_args, port_args, expert_location_metadata, writer),
)
proc.start()
scheduler_procs.append(proc)
Expand Down Expand Up @@ -565,7 +579,9 @@ def _launch_subprocesses(
detoken_proc.start()

# Launch tokenizer process
tokenizer_manager = TokenizerManager(server_args, port_args)
tokenizer_manager = TokenizerManager(
server_args, port_args, expert_location_metadata
)
if server_args.chat_template:
load_chat_template_for_openai_api(
tokenizer_manager, server_args.chat_template, server_args.model_path
Expand Down
7 changes: 2 additions & 5 deletions python/sglang/srt/entrypoints/http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,11 +366,8 @@ async def stop_expert_distribution_record_async():
@app.api_route("/dump_expert_distribution_record", methods=["GET", "POST"])
async def dump_expert_distribution_record_async():
"""Dump expert distribution record."""
await _global_state.tokenizer_manager.dump_expert_distribution_record()
return Response(
content="Dump expert distribution record.\n",
status_code=200,
)
content = await _global_state.tokenizer_manager.dump_expert_distribution_record()
return ORJSONResponse(content, status_code=200)


@app.post("/update_weights_from_disk")
Expand Down
31 changes: 31 additions & 0 deletions python/sglang/srt/layers/moe/ep_moe/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import torch

from sglang.srt.managers.schedule_batch import get_global_expert_location_metadata

try:
from deep_gemm import (
get_col_major_tma_aligned_tensor,
Expand Down Expand Up @@ -131,6 +133,7 @@ def __init__(
top_k: int,
hidden_size: int,
intermediate_size: int,
layer_id: int,
params_dtype: Optional[torch.dtype] = None,
renormalize: bool = True,
use_grouped_topk: bool = False,
Expand All @@ -153,6 +156,7 @@ def __init__(
)
self.tp_rank = get_tensor_model_parallel_rank()

self.layer_id = layer_id
self.num_experts = num_experts
assert self.num_experts % self.tp_size == 0
self.num_experts_per_partition = self.num_experts // self.tp_size
Expand Down Expand Up @@ -221,6 +225,9 @@ def forward(self, hidden_states: torch.Tensor, router_logits: torch.Tensor):
num_expert_group=self.num_expert_group,
correction_bias=self.correction_bias,
custom_routing_function=self.custom_routing_function,
expert_logical_to_rank_dispatch_physical_map=get_global_expert_location_metadata().logical_to_rank_dispatch_physical_map[
self.tp_rank, self.layer_id, :
],
)

reorder_topk_ids, src2dst, seg_indptr = run_moe_ep_preproess(
Expand Down Expand Up @@ -409,6 +416,28 @@ def weight_loader(
weight_name: str,
shard_id: str,
expert_id: int,
) -> None:
physical_expert_ids = (
get_global_expert_location_metadata().logical_to_all_physical(
self.layer_id, expert_id
)
)
for physical_expert_id in physical_expert_ids:
self._weight_loader_physical(
param=param,
loaded_weight=loaded_weight,
weight_name=weight_name,
shard_id=shard_id,
expert_id=physical_expert_id,
)

def _weight_loader_physical(
self,
param: torch.nn.Parameter,
loaded_weight: torch.Tensor,
weight_name: str,
shard_id: str,
expert_id: int,
) -> None:
if expert_id < self.start_expert_id or expert_id > self.end_expert_id:
return
Expand Down Expand Up @@ -802,6 +831,7 @@ def __init__(
top_k: int,
hidden_size: int,
intermediate_size: int,
layer_id: int,
params_dtype: Optional[torch.dtype] = None,
renormalize: bool = True,
use_grouped_topk: bool = False,
Expand All @@ -820,6 +850,7 @@ def __init__(
top_k,
hidden_size,
intermediate_size,
layer_id,
params_dtype,
renormalize,
use_grouped_topk,
Expand Down
7 changes: 6 additions & 1 deletion python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sglang.srt.managers.expert_distribution import expert_distribution_recorder
from sglang.srt.utils import DeepEPMode

try:
Expand Down Expand Up @@ -248,7 +249,7 @@ def _dispatch_core(
recv_x,
recv_topk_idx,
recv_topk_weights,
_, # num_recv_tokens_per_expert_list
num_recv_tokens_per_expert_list,
self.handle,
event,
) = buffer.dispatch(
Expand All @@ -264,6 +265,10 @@ def _dispatch_core(
allocate_on_comm_stream=(previous_event is not None) and self.async_finish,
)

expert_distribution_recorder.on_deepep_dispatch_normal(
num_recv_tokens_per_expert_list
)

return (
recv_x,
recv_topk_idx,
Expand Down
1 change: 1 addition & 0 deletions python/sglang/srt/layers/moe/fused_moe_triton/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ def __init__(
top_k: int,
hidden_size: int,
intermediate_size: int,
layer_id: Optional[int] = None,
params_dtype: Optional[torch.dtype] = None,
reduce_results: bool = False,
renormalize: bool = True,
Expand Down
19 changes: 12 additions & 7 deletions python/sglang/srt/layers/moe/topk.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@
# limitations under the License.
# ==============================================================================

import os
from typing import Callable, Optional

import torch
import torch.nn.functional as F

from sglang.srt.managers.expert_distribution import ExpertDistributionRecorder
from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.managers.expert_distribution import expert_distribution_recorder
from sglang.srt.managers.schedule_batch import (
get_global_expert_location_metadata,
global_expert_location_metadata,
global_server_args_dict,
)
from sglang.srt.utils import get_compiler_backend, is_cuda, is_hip

_is_cuda = is_cuda()
_is_hip = is_hip()


expert_distribution_recorder = ExpertDistributionRecorder()


def fused_topk_native(
hidden_states: torch.Tensor,
gating_output: torch.Tensor,
Expand Down Expand Up @@ -250,6 +250,7 @@ def select_experts(
custom_routing_function: Optional[Callable] = None,
correction_bias: Optional[torch.Tensor] = None,
torch_native: bool = False,
expert_logical_to_rank_dispatch_physical_map: Optional[torch.Tensor] = None,
):
n_share_experts_fusion = 0
if global_server_args_dict["n_share_experts_fusion"] is not None:
Expand Down Expand Up @@ -301,6 +302,10 @@ def select_experts(
renormalize=renormalize,
)

expert_distribution_recorder.record_new_token(topk_ids)
if expert_logical_to_rank_dispatch_physical_map is not None:
# TODO this is inefficient, and I will fuse into existing kernels
topk_ids = expert_logical_to_rank_dispatch_physical_map[topk_ids]

expert_distribution_recorder.on_select_experts(topk_ids=topk_ids)

return topk_weights, topk_ids
Loading
Loading