Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
aa35f90
Update eagle.py
StanislavII Mar 10, 2026
b9a016b
Create qwen3_dflash.py
StanislavII Mar 10, 2026
45a7f01
Update speculative.py
StanislavII Mar 10, 2026
023b5fd
Update speculative.py
StanislavII Mar 10, 2026
3c8229d
Update qwen3.py
StanislavII Mar 10, 2026
74d87e9
Update registry.py
StanislavII Mar 10, 2026
299d282
Update eagle.py
StanislavII Mar 10, 2026
926c59e
Update eagle.py
StanislavII Mar 10, 2026
e14c9ee
Update eagle.py
StanislavII Mar 11, 2026
68c5f9b
Create test_gpu.py
StanislavII Mar 11, 2026
badc5ac
Delete vllm/v1/worker/test_gpu.py
StanislavII Mar 11, 2026
226c253
Update gpu_model_runner.py
StanislavII Mar 11, 2026
2171d16
Update gpu_model_runner.py
StanislavII Mar 11, 2026
e23c99f
Update eagle.py
StanislavII Mar 11, 2026
f6c3212
Update speculative.py
StanislavII Mar 11, 2026
c8c7ed0
Update qwen3_dflash.py
StanislavII Mar 11, 2026
487f1ca
Update eagle.py
StanislavII Mar 11, 2026
d9a6c4a
Update eagle.py
StanislavII Mar 11, 2026
1a79f0d
Update qwen3_dflash.py
StanislavII Mar 11, 2026
1b0e430
Update speculative.py
StanislavII Mar 11, 2026
16e3d3b
Update eagle.py
StanislavII Mar 11, 2026
85b37fd
Update speculative.py
StanislavII Mar 11, 2026
9b408e4
Update eagle.py
StanislavII Mar 11, 2026
f14567e
Update eagle.py
StanislavII Mar 11, 2026
d1d43c1
Update eagle.py
StanislavII Mar 11, 2026
379699d
Update eagle.py
StanislavII Mar 11, 2026
bdfcd40
Update eagle.py
StanislavII Mar 11, 2026
b1fab46
Update eagle.py
StanislavII Mar 11, 2026
70d3b7e
Update gpu_model_runner.py
StanislavII Mar 11, 2026
bf6949a
Update gpu_model_runner.py
StanislavII Mar 11, 2026
37bb4de
Update gpu_model_runner.py
StanislavII Mar 11, 2026
fb9d058
Update eagle.py
StanislavII Mar 11, 2026
81e8578
Update eagle.py
StanislavII Mar 11, 2026
1cbeb07
Update gpu_model_runner.py
StanislavII Mar 11, 2026
f7c8b9a
Update qwen3.py
StanislavII Mar 11, 2026
c8f5e49
Merge branch 'main' into dflash-changer
StanislavII Mar 11, 2026
6319706
Update eagle.py
StanislavII Mar 11, 2026
1d1225b
Update eagle.py
StanislavII Mar 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions vllm/config/speculative.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@
"pangu_ultra_moe_mtp",
"step3p5_mtp",
]
EagleModelTypes = Literal["eagle", "eagle3", "extract_hidden_states", MTPModelTypes]
DFlashModelTypes = Literal["dflash"]
EagleModelTypes = Literal["eagle", "eagle3", MTPModelTypes, DFlashModelTypes]

NgramGPUTypes = Literal["ngram_gpu"]
SpeculativeMethod = Literal[
"ngram",
Expand Down Expand Up @@ -196,7 +198,11 @@ def compute_hash(self) -> str:
factors: list[Any] = []
# Eagle3 and extract_hidden_states affect the computation graph because
# they return intermediate hidden states in addition to the final hidden state.
uses_aux_hidden_states = self.method in ("eagle3", "extract_hidden_states")
uses_aux_hidden_states = self.method in (
"eagle3",
"extract_hidden_states",
"dflash",
)
factors.append(uses_aux_hidden_states)

# The specific layers used also affect the computation graph
Expand Down Expand Up @@ -480,7 +486,7 @@ def __post_init__(self):
)

# Automatically detect the method
if self.method in ("eagle", "eagle3"):
if self.method in ("eagle", "eagle3", "dflash"):
pass
# examples:
# yuhuili/EAGLE-LLaMA3-Instruct-8B
Expand All @@ -490,6 +496,8 @@ def __post_init__(self):
self.method = "eagle"
elif "eagle3" in self.draft_model_config.model.lower():
self.method = "eagle3"
elif "dflash" in self.draft_model_config.model.lower():
self.method = "dflash"
elif self.draft_model_config.hf_config.model_type == "medusa":
self.method = "medusa"
elif self.draft_model_config.hf_config.model_type == "mlp_speculator":
Expand Down Expand Up @@ -795,7 +803,7 @@ def _verify_args(self) -> Self:
"kimi_k25",
]
if (
self.method in ("eagle3", "extract_hidden_states")
self.method in ("eagle3", "extract_hidden_states", "dflash")
and self.target_model_config
and not any(
supported_model in self.target_model_config.hf_text_config.model_type
Expand Down Expand Up @@ -843,7 +851,7 @@ def max_num_new_slots_for_drafting(self) -> int:
return slots_per_req

def use_eagle(self) -> bool:
return self.method in ("eagle", "eagle3", "mtp")
return self.method in ("eagle", "eagle3", "mtp", "dflash")

def uses_draft_model(self) -> bool:
return self.method == "draft_model"
Expand Down
20 changes: 18 additions & 2 deletions vllm/model_executor/models/qwen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,25 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None:
self.model.aux_hidden_state_layers = layers

def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]:
def build_target_layer_ids(self, num_target_layers: int, num_draft_layers: int):
if num_draft_layers == 1:
return [num_target_layers // 2]
start = 1
end = num_target_layers - 3
span = end - start
target_layer_ids = [
int(round(start + (i * span) / (num_draft_layers - 1)))
for i in range(num_draft_layers)
]
return target_layer_ids

def get_eagle3_aux_hidden_state_layers(self, method) -> tuple[int, ...]:
num_layers = len(self.model.layers)
return (2, num_layers // 2, num_layers - 3)
if method == "dflash":
return_layers = self.build_target_layer_ids(num_layers, 5)
else:
return_layers = [2, num_layers // 2, num_layers - 3]
return tuple(return_layers)

def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
return self.model.embed_input_ids(input_ids)
Expand Down
Loading
Loading