Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions vllm/model_executor/models/deepseek_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,12 +715,15 @@ def __init__(
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
self.prefix = prefix
if vllm_config.parallel_config.enable_expert_parallel:
self.use_mega_moe = (
vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
self.use_mega_moe = (
vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
)
if self.use_mega_moe and not vllm_config.parallel_config.enable_expert_parallel:
raise NotImplementedError(
"DeepSeek V4 MegaMoE currently requires expert parallel. "
"Enable it with --enable-expert-parallel, or pick a different "
"moe backend."
)
else:
self.use_mega_moe = False

self.routed_scaling_factor = getattr(config, "routed_scaling_factor", 1.0)
self.hidden_size = config.hidden_size
Expand Down Expand Up @@ -1223,12 +1226,15 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
self.config = config
if vllm_config.parallel_config.enable_expert_parallel:
self.use_mega_moe = (
vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
self.use_mega_moe = (
vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
)
Comment on lines +1229 to +1231
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The guard against using MegaMoE without expert parallel is missing here in DeepseekV4Model, although it was added to DeepseekV4MoE. For consistency and to ensure the model fails early during initialization (before creating layers), the same guard should be applied here. This also ensures that self.use_mega_moe is only True when the configuration is valid, which is important as this flag is used in the forward pass and for expert mapping logic.

Suggested change
self.use_mega_moe = (
vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
)
self.use_mega_moe = (
vllm_config.kernel_config.moe_backend == "deep_gemm_mega_moe"
)
if self.use_mega_moe and not vllm_config.parallel_config.enable_expert_parallel:
raise NotImplementedError(
"DeepSeek V4 MegaMoE currently requires expert parallel. "
"Enable it with --enable-expert-parallel, or pick a different "
"--moe-backend."
)

if self.use_mega_moe and not vllm_config.parallel_config.enable_expert_parallel:
raise NotImplementedError(
"DeepSeek V4 MegaMoE currently requires expert parallel. "
"Enable it with --enable-expert-parallel, or pick a different "
"moe backend."
)
else:
self.use_mega_moe = False
self.vocab_size = config.vocab_size
self.hc_eps = config.hc_eps
self.hc_mult = config.hc_mult
Expand Down
Loading