From 95f3df13648607129eacb4446c7744b18f70bec8 Mon Sep 17 00:00:00 2001 From: Kyuyeun Kim Date: Wed, 26 Nov 2025 18:55:07 +0000 Subject: [PATCH] [Bugfix] Fix attention not found error Signed-off-by: Kyuyeun Kim --- tests/runner/test_kv_cache_manager.py | 2 +- tpu_inference/runner/kv_cache_manager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/runner/test_kv_cache_manager.py b/tests/runner/test_kv_cache_manager.py index b48dcf8923..6c17fd8010 100644 --- a/tests/runner/test_kv_cache_manager.py +++ b/tests/runner/test_kv_cache_manager.py @@ -5,8 +5,8 @@ import numpy as np import pytest import torch -from vllm.attention import Attention from vllm.attention.backends.abstract import AttentionType +from vllm.attention.layer import Attention from vllm.config import (CacheConfig, ModelConfig, ParallelConfig, SchedulerConfig, VllmConfig) from vllm.sampling_params import SamplingType diff --git a/tpu_inference/runner/kv_cache_manager.py b/tpu_inference/runner/kv_cache_manager.py index b3366ed80b..8a7025bdc0 100644 --- a/tpu_inference/runner/kv_cache_manager.py +++ b/tpu_inference/runner/kv_cache_manager.py @@ -7,8 +7,8 @@ import vllm.envs as envs from jax.sharding import NamedSharding, PartitionSpec from torchax.ops.mappings import t2j_dtype -from vllm.attention import Attention from vllm.attention.backends.abstract import AttentionType +from vllm.attention.layer import Attention from vllm.config import get_layers_from_vllm_config from vllm.utils.math_utils import cdiv from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,