-
-
Notifications
You must be signed in to change notification settings - Fork 11.6k
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Your current environment
The output of python collect_env.py
Your output of `python collect_env.py` here
🐛 Describe the bug
_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] EngineCore failed to start.
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] Traceback (most recent call last):
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 770, in run_engine_core
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 538, in __init__
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] super().__init__(
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 109, in __init__
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] num_gpu_blocks, num_cpu_blocks, kv_cache_config = self._initialize_kv_caches(
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 218, in _initialize_kv_caches
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] available_gpu_memory = self.model_executor.determine_available_memory()
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/abstract.py", line 123, in determine_available_memory
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return self.collective_rpc("determine_available_memory")
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/uniproc_executor.py", line 73, in collective_rpc
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return [run_method(self.driver_worker, method, args, kwargs)]
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/utils/__init__.py", line 900, in run_method
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return func(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 120, in decorate_context
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return func(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 305, in determine_available_memory
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] self.model_runner.profile_run()
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 3706, in profile_run
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] dummy_encoder_outputs = self.model.get_multimodal_embeddings(
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 1623, in get_multimodal_embeddings
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] video_embeddings = self._process_video_input(multimodal_input)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 1462, in _process_video_input
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw_list)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1786, in _call_impl
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 555, in forward
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] hidden_states = blk(
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1786, in _call_impl
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 237, in forward
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] x = x + self.attn(
(EngineCore_DP0 pid=175) Process EngineCore_DP0:
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1786, in _call_impl
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen2_5_vl.py", line 415, in forward
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] output = self.flash_attn_varlen_func(
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/vllm/vllm_flash_attn/flash_attn_interface.py", line 236, in flash_attn_varlen_func
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] out, softmax_lse = torch.ops._vllm_fa2_C.varlen_fwd(
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] File "/usr/local/lib/python3.12/dist-packages/torch/_ops.py", line 1255, in __call__
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] return self._op(*args, **kwargs)
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) ERROR 10-27 08:33:41 [core.py:779] RuntimeError: This flash attention build does not support headdim not being a multiple of 32.
(EngineCore_DP0 pid=175) Traceback (most recent call last):
(EngineCore_DP0 pid=175) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore_DP0 pid=175) self.run()
(EngineCore_DP0 pid=175) File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
(EngineCore_DP0 pid=175) self._target(*self._args, **self._kwargs)
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 783, in run_engine_core
(EngineCore_DP0 pid=175) raise e
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 770, in run_engine_core
(EngineCore_DP0 pid=175) engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 538, in __init__
(EngineCore_DP0 pid=175) super().__init__(
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 109, in __init__
(EngineCore_DP0 pid=175) num_gpu_blocks, num_cpu_blocks, kv_cache_config = self._initialize_kv_caches(
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 218, in _initialize_kv_caches
(EngineCore_DP0 pid=175) available_gpu_memory = self.model_executor.determine_available_memory()
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/abstract.py", line 123, in determine_available_memory
(EngineCore_DP0 pid=175) return self.collective_rpc("determine_available_memory")
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/uniproc_executor.py", line 73, in collective_rpc
(EngineCore_DP0 pid=175) return [run_method(self.driver_worker, method, args, kwargs)]
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/utils/__init__.py", line 900, in run_method
(EngineCore_DP0 pid=175) return func(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 120, in decorate_context
(EngineCore_DP0 pid=175) return func(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 305, in determine_available_memory
(EngineCore_DP0 pid=175) self.model_runner.profile_run()
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 3706, in profile_run
(EngineCore_DP0 pid=175) dummy_encoder_outputs = self.model.get_multimodal_embeddings(
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 1623, in get_multimodal_embeddings
(EngineCore_DP0 pid=175) video_embeddings = self._process_video_input(multimodal_input)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 1462, in _process_video_input
(EngineCore_DP0 pid=175) video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw_list)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=175) return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1786, in _call_impl
(EngineCore_DP0 pid=175) return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 555, in forward
(EngineCore_DP0 pid=175) hidden_states = blk(
(EngineCore_DP0 pid=175) ^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=175) return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1786, in _call_impl
(EngineCore_DP0 pid=175) return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 237, in forward
(EngineCore_DP0 pid=175) x = x + self.attn(
(EngineCore_DP0 pid=175) ^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=175) return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1786, in _call_impl
(EngineCore_DP0 pid=175) return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen2_5_vl.py", line 415, in forward
(EngineCore_DP0 pid=175) output = self.flash_attn_varlen_func(
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/vllm/vllm_flash_attn/flash_attn_interface.py", line 236, in flash_attn_varlen_func
(EngineCore_DP0 pid=175) out, softmax_lse = torch.ops._vllm_fa2_C.varlen_fwd(
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) File "/usr/local/lib/python3.12/dist-packages/torch/_ops.py", line 1255, in __call__
(EngineCore_DP0 pid=175) return self._op(*args, **kwargs)
(EngineCore_DP0 pid=175) ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=175) RuntimeError: This flash attention build does not support headdim not being a multiple of 32.
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
ivanbaldolikaixin2000, cassandeer, zmarty and ivanbaldo
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working