Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5f67b7d
[Kernels][MoE] Add FusedMoERouter object
bnellnm Dec 11, 2025
39e3b82
fix
bnellnm Jan 6, 2026
ca28a10
fix merge
bnellnm Jan 8, 2026
a60ef40
fix quark
bnellnm Jan 8, 2026
a509b0e
[Misc][Refactor] Separate router from FusedMoE class
bnellnm Dec 12, 2025
cc6f57b
claude generated router subclasses
bnellnm Dec 12, 2025
564d4dc
claude refactoring
bnellnm Dec 12, 2025
8f39b7a
more refactoring
bnellnm Dec 13, 2025
b989dd0
fix typo
bnellnm Dec 13, 2025
9b37b1a
remove unused file
bnellnm Dec 13, 2025
49cab40
fix
bnellnm Jan 6, 2026
9ad6f86
cleanups
bnellnm Jan 8, 2026
cc432b8
cleanups
bnellnm Jan 8, 2026
8d575b5
cleanups
bnellnm Jan 8, 2026
999cfaf
tests wip
bnellnm Jan 9, 2026
b2c8ee7
move test_routing_simulator.py
bnellnm Jan 9, 2026
bc0755a
add baseline + comparison code
bnellnm Jan 9, 2026
a9a3110
add eplb routing tests
bnellnm Jan 10, 2026
1857e04
renormalize still needed
bnellnm Jan 10, 2026
11b24ee
fix test
bnellnm Jan 10, 2026
d909415
try to fix doc
bnellnm Jan 10, 2026
c2eaaf7
try to fix doc
bnellnm Jan 11, 2026
d0897d8
add softmax check, reduce number of tests
bnellnm Jan 11, 2026
13b36dc
fix doc
bnellnm Jan 12, 2026
73cc749
move routing implementations to respective router object files + fix …
bnellnm Jan 12, 2026
5018500
hack fix for router capture functionality
bnellnm Jan 12, 2026
836dce5
fix null check
bnellnm Jan 12, 2026
e2aa393
fix lint
bnellnm Jan 13, 2026
b653d38
fix lint
bnellnm Jan 13, 2026
6e6ccdb
fix lint
bnellnm Jan 13, 2026
fc14bdf
fix eplb init ordering issue
bnellnm Jan 14, 2026
ac2bf7f
review comments
bnellnm Jan 15, 2026
eac533a
fix imports + fix ernie model
bnellnm Jan 15, 2026
7079d6f
fix other ernie model
bnellnm Jan 15, 2026
f46db23
add scoring_func to FusedTopkRouter
bnellnm Jan 16, 2026
18a2b27
fix test imports
bnellnm Jan 17, 2026
20918ab
fix merge issues
bnellnm Jan 17, 2026
76a76fe
add some comments. fix for phi-3.5-moe custom router
bnellnm Jan 17, 2026
2c5dbae
fix flashinfer fp4 failure
bnellnm Jan 17, 2026
525ffc6
fix test errors
bnellnm Jan 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/kernels/moe/modular_kernel_tools/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
get_tensor_model_parallel_world_size,
)
from vllm.forward_context import set_forward_context
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.config import (
FusedMoEConfig,
FusedMoEParallelConfig,
FusedMoEQuantConfig,
)
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm, has_pplx

from .mk_objects import (
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_batched_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
from tests.kernels.quant_utils import native_batched_masked_quant_matmul
from tests.kernels.utils import torch_experts
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
invoke_moe_batched_triton_kernel,
)
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.platforms import current_platform
from vllm.triton_utils import tl
from vllm.utils.torch_utils import set_random_seed
Expand Down
6 changes: 4 additions & 2 deletions tests/kernels/moe/test_block_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@
)
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.model_executor.layers.activation import SiluAndMul
from vllm.model_executor.layers.fused_moe import fused_experts
from vllm.model_executor.layers.fused_moe import (
fused_experts,
fused_topk,
)
from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
_valid_deep_gemm_shape,
deep_gemm_moe_fp8,
)
from vllm.model_executor.layers.fused_moe.fused_moe import (
fused_topk,
modular_triton_fused_moe,
)
from vllm.platforms import current_platform
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_cutlass_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm import _custom_ops as ops
from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
from vllm.model_executor.layers.fused_moe import fused_experts, fused_topk
from vllm.model_executor.layers.fused_moe.config import (
FUSED_MOE_UNQUANTIZED_CONFIG,
FusedMoEQuantConfig,
Expand All @@ -19,7 +20,6 @@
CutlassExpertsFp8,
run_cutlass_moe_fp8,
)
from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk
from vllm.model_executor.layers.fused_moe.prepare_finalize import (
MoEPrepareAndFinalizeNoEP,
)
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_flashinfer_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from tests.kernels.utils import torch_moe
from vllm import _custom_ops as ops
from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
FlashInferExperts,
is_valid_flashinfer_cutlass_fused_moe,
)
from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import (
create_flashinfer_prepare_finalize,
)
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
from vllm.platforms import current_platform
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_grouped_topk.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
get_cached_compilation_config,
set_current_vllm_config,
)
from vllm.model_executor.layers.fused_moe.fused_moe import (
from vllm.model_executor.layers.fused_moe.router.grouped_topk_router import (
GroupedTopk,
fused_grouped_topk,
)
Expand Down
4 changes: 3 additions & 1 deletion tests/kernels/moe/test_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.distributed.parallel_state import init_distributed_environment
from vllm.forward_context import set_forward_context
from vllm.model_executor.layers.fused_moe import (
fused_topk,
)
from vllm.model_executor.layers.fused_moe.config import (
FUSED_MOE_UNQUANTIZED_CONFIG,
int4_w4a16_moe_quant_config,
Expand All @@ -34,7 +37,6 @@
fused_marlin_moe,
)
from vllm.model_executor.layers.fused_moe.fused_moe import (
fused_topk,
modular_triton_fused_moe,
)
from vllm.model_executor.layers.fused_moe.moe_torch_iterative import (
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_moe_permute_unpermute.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest
import torch

from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.layer import determine_expert_map
from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
moe_permute,
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_nvfp4_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from tests.kernels.utils import torch_moe
from vllm import _custom_ops as ops
from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.config import nvfp4_moe_quant_config
from vllm.model_executor.layers.fused_moe.cutlass_moe import (
CutlassExpertsFp4,
)
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.prepare_finalize import (
MoEPrepareAndFinalizeNoEP,
)
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_pplx_cutlass_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from tests.kernels.utils import torch_experts
from vllm import _custom_ops as ops
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config
from vllm.model_executor.layers.fused_moe.cutlass_moe import CutlassBatchedExpertsFp8
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
from vllm.platforms import current_platform
from vllm.utils.math_utils import cdiv
Expand Down
Loading