diff --git a/vllm/transformers_utils/configs/speculators/algos.py b/vllm/transformers_utils/configs/speculators/algos.py index 405d5f5de1d1..31b60796d128 100644 --- a/vllm/transformers_utils/configs/speculators/algos.py +++ b/vllm/transformers_utils/configs/speculators/algos.py @@ -65,6 +65,7 @@ def update_dflash(config_dict: dict, pre_trained_config: dict) -> None: if config_dict.get("target_hidden_size") is not None: pre_trained_config["target_hidden_size"] = config_dict["target_hidden_size"] + # TODO: does this need to be shifted by 1 like in gpu_model_runner? aux_layer_ids = config_dict["aux_hidden_state_layer_ids"] pre_trained_config["eagle_aux_hidden_state_layer_ids"] = aux_layer_ids diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index b6bc942fc857..d7df9e4eeebe 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -4938,7 +4938,8 @@ def _get_eagle3_aux_layers_from_config(self) -> tuple[int, ...] | None: if not layer_ids: dflash_config = getattr(hf_config, "dflash_config", None) if dflash_config and isinstance(dflash_config, dict): - layer_ids = dflash_config.get("target_layer_ids") + # Add 1 to convert DFlash's aux layer id semantics + layer_ids = [i + 1 for i in dflash_config.get("target_layer_ids", [])] if layer_ids and isinstance(layer_ids, (list, tuple)): return tuple(layer_ids)