Skip to content

Commit 10e78bd

Browse files
committed
rebase and change helper function name
Signed-off-by: qizixi <[email protected]>
1 parent f9f39ac commit 10e78bd

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

vllm/v1/spec_decode/eagle.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,11 @@ def propose(
151151
else:
152152
raise ValueError(f"Unsupported method: {self.method}")
153153

154+
# At this moment, we assume all eagle layers belong to the same KV
155+
# cache group, thus using the same attention metadata.
156+
per_layer_attn_metadata = {}
157+
for layer_name in self.attn_layer_names:
158+
per_layer_attn_metadata[layer_name] = attn_metadata
154159
if self.use_cuda_graph and \
155160
num_tokens <= self.cudagraph_batch_sizes[-1]:
156161
num_input_tokens = self.vllm_config.pad_for_cudagraph(num_tokens)
@@ -356,7 +361,8 @@ def dummy_run(
356361
self.hidden_states[:num_tokens],
357362
)
358363

359-
def validate_kv_cache_group(self, kv_cache_config: KVCacheConfig) -> None:
364+
def validate_same_kv_cache_group(self,
365+
kv_cache_config: KVCacheConfig) -> None:
360366
"""
361367
Validate that all eagle layers belong to the same KVCacheGroup.
362368
Need this assumption to ensure all eagle layers can use the

vllm/v1/worker/gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2024,7 +2024,7 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
20242024
assert isinstance(self.drafter, EagleProposer)
20252025
# validate all draft model layers belong to the same kv cache
20262026
# group
2027-
self.drafter.validate_kv_cache_group(kv_cache_config)
2027+
self.drafter.validate_same_kv_cache_group(kv_cache_config)
20282028

20292029
bind_kv_cache(
20302030
kv_caches,

0 commit comments

Comments
 (0)