vllm-project · vllm-bot · Jul 9, 2025 · Jul 8, 2025
diff --git a/tests/utils.py b/tests/utils.py
@@ -818,14 +818,15 @@ def create_new_process_for_each_test(
 
     Args:
         method: The process creation method. Can be either "spawn" or "fork". 
-               If not specified,
-               it defaults to "spawn" on ROCm platforms and "fork" otherwise.
+               If not specified, it defaults to "spawn" on ROCm and XPU
+               platforms and "fork" otherwise.
 
     Returns:
         A decorator to run test functions in separate processes.
     """
     if method is None:
-        method = "spawn" if current_platform.is_rocm() else "fork"
+        use_spawn = current_platform.is_rocm() or current_platform.is_xpu()
+        method = "spawn" if use_spawn else "fork"
 
     assert method in ["spawn",
                       "fork"], "Method must be either 'spawn' or 'fork'"

diff --git a/tests/v1/e2e/test_cascade_attention.py b/tests/v1/e2e/test_cascade_attention.py
@@ -5,10 +5,10 @@
 
 from vllm import LLM, SamplingParams
 
-from ...utils import fork_new_process_for_each_test
+from ...utils import create_new_process_for_each_test
 
 
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 @pytest.mark.parametrize("attn_backend",
                          ["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"])
 def test_cascade_attention(example_system_message, monkeypatch, attn_backend):

diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
@@ -1535,6 +1535,13 @@ def cuda_is_initialized() -> bool:
     return torch.cuda.is_initialized()
 
 
+def xpu_is_initialized() -> bool:
+    """Check if XPU is initialized."""
+    if not torch.xpu._is_compiled():
+        return False
+    return torch.xpu.is_initialized()
+
+
 def cuda_get_device_properties(device,
                                names: Sequence[str],
                                init_cuda=False) -> tuple[Any, ...]:
@@ -2848,6 +2855,8 @@ def _maybe_force_spawn():
     reason = None
     if cuda_is_initialized():
         reason = "CUDA is initialized"
+    elif xpu_is_initialized():
+        reason = "XPU is initialized"
     elif is_in_ray_actor():
         # even if we choose to spawn, we need to pass the ray address
         # to the subprocess so that it knows how to connect to the ray cluster.