-
-
Notifications
You must be signed in to change notification settings - Fork 11.1k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Your current environment
The output of `python collect_env.py`
Your output of `python collect_env.py` here
🐛 Describe the bug
The following backtrace showing that flash_attn_interface cannot be found was observed after #17228 got merged:
Traceback (most recent call last):
File "/mnt/vllm/benchmarks/./ds.py", line 3, in <module>
llm = LLM(model="/mnt/model/DeepSeek-R1/DeepSeek-R1-UD-Q2_K_XL.gguf",
File "/mnt/vllm/vllm/utils.py", line 1161, in inner
return fn(*args, **kwargs)
File "/mnt/vllm/vllm/entrypoints/llm.py", line 247, in __init__
self.llm_engine = LLMEngine.from_engine_args(
File "/mnt/vllm/vllm/engine/llm_engine.py", line 516, in from_engine_args
return engine_cls.from_vllm_config(
File "/mnt/vllm/vllm/engine/llm_engine.py", line 492, in from_vllm_config
return cls(
File "/mnt/vllm/vllm/engine/llm_engine.py", line 281, in __init__
self.model_executor = executor_class(vllm_config=vllm_config, )
File "/mnt/vllm/vllm/executor/executor_base.py", line 286, in __init__
super().__init__(*args, **kwargs)
File "/mnt/vllm/vllm/executor/executor_base.py", line 52, in __init__
self._init_executor()
File "/mnt/vllm/vllm/executor/mp_distributed_executor.py", line 123, in _init_executor
self._run_workers("init_worker", all_kwargs)
File "/mnt/vllm/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
File "/mnt/vllm/vllm/utils.py", line 2456, in run_method
return func(*args, **kwargs)
File "/mnt/vllm/vllm/worker/worker_base.py", line 594, in init_worker
self.worker = worker_class(**kwargs)
File "/mnt/vllm/vllm/worker/worker.py", line 82, in __init__
self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
File "/mnt/vllm/vllm/worker/model_runner.py", line 1071, in __init__
self.attn_backend = get_attn_backend(
File "/mnt/vllm/vllm/attention/selector.py", line 95, in get_attn_backend
return _cached_get_attn_backend(
File "/mnt/vllm/vllm/attention/selector.py", line 148, in _cached_get_attn_backend
attention_cls = current_platform.get_attn_backend_cls(
File "/mnt/vllm/vllm/platforms/rocm.py", line 145, in get_attn_backend_cls
from vllm.attention.backends.rocm_aiter_mla import (
File "/mnt/vllm/vllm/attention/backends/rocm_aiter_mla.py", line 11, in <module>
from vllm.attention.backends.mla.common import (MLACommonBackend,
File "/mnt/vllm/vllm/attention/backends/mla/common.py", line 217, in <module>
from vllm.vllm_flash_attn.fa_utils import get_flash_attn_version
File "/mnt/vllm/vllm/vllm_flash_attn/__init__.py", line 11, in <module>
from .flash_attn_interface import (fa_version_unsupported_reason,
ModuleNotFoundError: No module named 'vllm.vllm_flash_attn.flash_attn_interface'
This error message cannot be reproduced after rewinding back to the previous commit dc2ceca
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working