vllm-project · benchislett · Apr 21, 2026 · Apr 20, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
@@ -325,15 +325,26 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
 
         if cache_config.enable_prefix_caching:
             if cache_config.mamba_cache_mode == "none":
-                cache_config.mamba_cache_mode = (
-                    "all" if model_config.supports_mamba_prefix_caching else "align"
-                )
-                logger.warning(
-                    "Mamba cache mode is set to '%s' for %s by default "
-                    "when prefix caching is enabled",
-                    cache_config.mamba_cache_mode,
-                    model_config.architecture,
-                )
+                if (
+                    model_config.supports_mamba_prefix_caching
+                    and vllm_config.speculative_config is not None
+                ):
+                    cache_config.mamba_cache_mode = "align"
+                    logger.warning(
+                        "Mamba cache mode is set to 'align' for %s by default "
+                        "when prefix caching and speculative decoding are enabled",
+                        model_config.architecture,
+                    )
+                else:
+                    cache_config.mamba_cache_mode = (
+                        "all" if model_config.supports_mamba_prefix_caching else "align"
+                    )
+                    logger.warning(
+                        "Mamba cache mode is set to '%s' for %s by default "
+                        "when prefix caching is enabled",
+                        cache_config.mamba_cache_mode,
+                        model_config.architecture,
+                    )
             if (
                 cache_config.mamba_cache_mode == "all"
                 and not model_config.supports_mamba_prefix_caching