vllm-project · ZJY0516 · Jun 3, 2026 · May 28, 2026 · May 29, 2026 · May 30, 2026
@@ -80,16 +80,6 @@
 
 logger = init_logger(__name__)
 
-
-def is_cumem_allocator_available() -> bool:
-    try:
-        from vllm.device_allocator.cumem import cumem_available
-    except ImportError:
-        return False
-
-    return cumem_available
-
-
 RunnerOption = Literal["auto", RunnerType]
 ConvertType = Literal["none", "embed", "classify"]
 ConvertOption = Literal["auto", ConvertType]
@@ -542,7 +532,10 @@ def __post_init__(
                     "Enabling cumem allocator because sleep mode requires it."
                 )
                 self.enable_cumem_allocator = True
-        if self.enable_cumem_allocator and not is_cumem_allocator_available():
+        if (
+            self.enable_cumem_allocator
+            and not current_platform.is_cumem_allocator_available()
+        ):
             raise ValueError("cumem allocator is not supported on current platform.")
 
         hf_config = get_config(

diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -199,6 +199,14 @@ def is_sleep_mode_available(self) -> bool:
         # all ROCm platforms for now.
         return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)
 
+    def is_cumem_allocator_available(self) -> bool:
+        try:
+            from vllm.device_allocator.cumem import cumem_available
+        except ImportError:
+            return False
+
+        return cumem_available
+
     @classmethod
     def get_pass_manager_cls(cls) -> str:
         """