Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
44f7715
Added qwen3 vision language moe support for speculative decoding
shanjiaz Jan 9, 2026
9612a1a
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 9, 2026
bbef7e7
min diff
shanjiaz Jan 9, 2026
86e804f
min diff
shanjiaz Jan 9, 2026
35a1024
white space
shanjiaz Jan 9, 2026
4ab9986
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 9, 2026
4f8160c
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 12, 2026
5ee93e0
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 12, 2026
0ba1e92
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 13, 2026
a65da8e
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 13, 2026
4bef2f9
Added test and refined conditions.
shanjiaz Jan 13, 2026
3b035ba
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 13, 2026
3a71574
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 13, 2026
de8b289
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 14, 2026
5256ed9
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 14, 2026
75bd33c
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 15, 2026
b63cadc
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 15, 2026
3fb773f
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 19, 2026
b27e6c4
move logic to set_positions
shanjiaz Jan 19, 2026
95b3617
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
f8cbcaf
format
shanjiaz Jan 20, 2026
23798e3
min diff
shanjiaz Jan 20, 2026
edbaec8
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
654ddb7
remove test for now
shanjiaz Jan 20, 2026
d559e41
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
1666aa0
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
e121abe
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
ea62713
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
ab352e4
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
8f2b1e1
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
0a59c88
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
46521b5
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
aacde22
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
42db6eb
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 20, 2026
44e3fbe
Merge branch 'main' into qwen3-vl-moe-spec-update
shanjiaz Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions vllm/model_executor/models/qwen3_vl_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,14 @@ def forward(
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]

aux_hidden_states = []
for layer_idx, layer in islice(
enumerate(self.layers), self.start_layer, self.end_layer
):
if layer_idx in self.aux_hidden_state_layers:
aux_hidden_states.append(hidden_states + residual)

hidden_states, residual = layer(
positions,
hidden_states,
Expand All @@ -132,6 +137,9 @@ def forward(
{"hidden_states": hidden_states, "residual": residual}
)
hidden_states, _ = self.norm(hidden_states, residual)

if len(aux_hidden_states) > 0:
return hidden_states, aux_hidden_states
return hidden_states

def load_fused_expert_weights(
Expand Down
10 changes: 9 additions & 1 deletion vllm/v1/spec_decode/eagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ def __init__(
self.input_ids = torch.zeros(
self.max_num_tokens, dtype=torch.int32, device=device
)
self.uses_mrope = self.vllm_config.model_config.uses_mrope
# Use draft model's M-RoPE setting, not target model's
# Draft models may be text-only even if target is multimodal
self.uses_mrope = self.draft_model_config.uses_mrope
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be fine to use as should support both multi-modal and text only draft models, correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes!

if self.uses_mrope:
# NOTE: `mrope_positions` is implemented with one additional dummy
# position on purpose to make it non-contiguous so that it can work
Expand Down Expand Up @@ -221,6 +223,11 @@ def _set_positions(self, num_tokens: int, positions: torch.Tensor):
if self.uses_mrope:
self.mrope_positions[:, :num_tokens] = positions
else:
# Convert M-RoPE positions if target model uses M-RoPE
# but draft doesn't, For text inputs, all M-RoPE
# dimensions are identical
if self.vllm_config.model_config.uses_mrope:
positions = positions[0]
self.positions[:num_tokens] = positions

def initialize_cudagraph_keys(self, cudagraph_mode: CUDAGraphMode) -> None:
Expand Down Expand Up @@ -1080,6 +1087,7 @@ def load_model(self, target_model: nn.Module) -> None:
if self.get_model_name(target_model) in [
"Qwen2_5_VLForConditionalGeneration",
"Qwen3VLForConditionalGeneration",
"Qwen3VLMoeForConditionalGeneration",
]:
self.model.config.image_token_index = target_model.config.image_token_id
elif self.get_model_name(target_model) == "PixtralForConditionalGeneration":
Expand Down