Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion tests/kernels/moe/test_grouped_topk.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
import pytest
import torch

from vllm.config import (
CompilationConfig,
VllmConfig,
get_cached_compilation_config,
set_current_vllm_config,
)
from vllm.model_executor.layers.fused_moe.fused_moe import (
GroupedTopk,
fused_grouped_topk,
Expand Down Expand Up @@ -41,14 +47,19 @@ def test_grouped_topk(
routed_scaling_factor: float,
dtype: torch.dtype,
):
vllm_config = VllmConfig(
compilation_config=CompilationConfig(custom_ops=["all", "+grouped_topk"])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't need to add "all" as we are only testing "grouped_topk". All will enable all custom ops. We should also test with only +grouped_topk.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch:)

)
get_cached_compilation_config.cache_clear()

current_platform.seed_everything(0)
hidden_states = torch.randn((n_token, n_hidden), dtype=dtype, device="cuda")
gating_output = torch.randn((n_token, n_expert), dtype=dtype, device="cuda")
e_score_correction_bias = torch.randn(
(n_expert,), dtype=torch.float32, device="cuda"
)

with monkeypatch.context() as m:
with set_current_vllm_config(vllm_config), monkeypatch.context() as m:
m.setenv("VLLM_USE_FUSED_MOE_GROUPED_TOPK", "0")
grouped_topk = GroupedTopk(
topk=topk,
Expand All @@ -58,6 +69,7 @@ def test_grouped_topk(
scoring_func=scoring_func,
routed_scaling_factor=routed_scaling_factor,
)
assert grouped_topk._forward_method.__name__ == "forward_cuda"
baseline_topk_weights, baseline_topk_ids = grouped_topk(
hidden_states=hidden_states,
gating_output=gating_output,
Expand Down