diff --git a/tests/conftest.py b/tests/conftest.py
index 76b311f42..3fbd7efae 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -72,42 +72,51 @@ def _has_real_accelerator() -> bool:
     return False
 
 
-def _preload_real_device_type() -> bool:
-    """Pre-load the REAL ``unsloth_zoo.device_type`` module under a
+def _preload_real_device_type(
+    package: str = "unsloth_zoo",
+    prereqs: tuple = ("utils",),
+) -> bool:
+    """Pre-load the REAL ``<package>.device_type`` module under a
     temporarily-mocked ``torch.cuda.is_available()`` so its
     ``DEVICE_TYPE = get_device_type()`` initialization succeeds without
     a real accelerator. Returns True on success; returns False if
     torch is not importable at all (the security-audit CI job runs
     tests/security/ without installing torch, and those tests don't
-    need the preload).
+    need the preload), or if the target package isn't installed.
+
+    Parameterised so the same harness works for both ``unsloth_zoo``
+    (where ``utils.py`` defines ``Version`` before ``device_type``
+    consumes it) and ``unsloth`` (which has no such prereq).
     """
-    if "unsloth_zoo.device_type" in sys.modules:
+    target = f"{package}.device_type"
+    if target in sys.modules:
         return True
-    pkg_spec = importlib.util.find_spec("unsloth_zoo")
+    pkg_spec = importlib.util.find_spec(package)
     if pkg_spec is None or not pkg_spec.submodule_search_locations:
         return False
     pkg_path = pkg_spec.submodule_search_locations[0]
 
     import os
 
-    skeleton_already = "unsloth_zoo" in sys.modules
+    skeleton_already = package in sys.modules
     if not skeleton_already:
-        zoo_pkg = types.ModuleType("unsloth_zoo")
-        zoo_pkg.__path__ = [pkg_path]
-        zoo_pkg.__spec__ = pkg_spec
-        zoo_pkg.__package__ = "unsloth_zoo"
-        sys.modules["unsloth_zoo"] = zoo_pkg
+        pkg_mod = types.ModuleType(package)
+        pkg_mod.__path__ = [pkg_path]
+        pkg_mod.__spec__ = pkg_spec
+        pkg_mod.__package__ = package
+        sys.modules[package] = pkg_mod
 
     try:
-        if "unsloth_zoo.utils" not in sys.modules:
-            utils_path = os.path.join(pkg_path, "utils.py")
-            utils_spec = importlib.util.spec_from_file_location(
-                "unsloth_zoo.utils", utils_path,
-            )
-            utils_mod = importlib.util.module_from_spec(utils_spec)
-            sys.modules["unsloth_zoo.utils"] = utils_mod
+        for prereq in prereqs:
+            full = f"{package}.{prereq}"
+            if full in sys.modules:
+                continue
+            prereq_path = os.path.join(pkg_path, f"{prereq}.py")
+            prereq_spec = importlib.util.spec_from_file_location(full, prereq_path)
+            prereq_mod = importlib.util.module_from_spec(prereq_spec)
+            sys.modules[full] = prereq_mod
             try:
-                utils_spec.loader.exec_module(utils_mod)
+                prereq_spec.loader.exec_module(prereq_mod)
             except ModuleNotFoundError as exc:
                 # Tests that don't need torch (e.g. the tests/security
                 # subtree which only exercises scanner regex tables and
@@ -115,18 +124,16 @@ def _preload_real_device_type() -> bool:
                 # device-type preload when torch isn't installed. Pop
                 # the half-built modules and bail out gracefully.
                 if "torch" in str(exc):
-                    sys.modules.pop("unsloth_zoo.utils", None)
+                    sys.modules.pop(full, None)
                     if not skeleton_already:
-                        sys.modules.pop("unsloth_zoo", None)
+                        sys.modules.pop(package, None)
                     return False
                 raise
 
         device_type_path = os.path.join(pkg_path, "device_type.py")
-        dt_spec = importlib.util.spec_from_file_location(
-            "unsloth_zoo.device_type", device_type_path,
-        )
+        dt_spec = importlib.util.spec_from_file_location(target, device_type_path)
         dt_mod = importlib.util.module_from_spec(dt_spec)
-        sys.modules["unsloth_zoo.device_type"] = dt_mod
+        sys.modules[target] = dt_mod
 
         import torch
         _orig_is_avail = torch.cuda.is_available
@@ -137,11 +144,29 @@ def _preload_real_device_type() -> bool:
             torch.cuda.is_available = _orig_is_avail
     finally:
         if not skeleton_already:
-            sys.modules.pop("unsloth_zoo", None)
+            sys.modules.pop(package, None)
 
     return True
 
 
+def _install_device_type_stub(name: str) -> None:
+    """Last-resort stub when the real preload can't run (no torch / no
+    package installed). Matches the surface ``unsloth`` and ``unsloth_zoo``
+    consumers read at import time."""
+    stub = types.ModuleType(name)
+    stub.DEVICE_TYPE = "cuda"
+    stub.DEVICE_TYPE_TORCH = "cuda"
+    stub.DEVICE_COUNT = 1
+    stub.ALLOW_PREQUANTIZED_MODELS = False
+    stub.is_hip = lambda: False
+    stub.get_device_type = lambda: "cuda"
+    stub.get_device_count = lambda: 1
+    stub.device_synchronize = lambda *a, **k: None
+    stub.device_empty_cache = lambda *a, **k: None
+    stub.device_is_bf16_supported = lambda *a, **k: False
+    sys.modules[name] = stub
+
+
 def _patch_torch_cuda_for_import() -> None:
     """Stub torch.cuda.* calls made at IMPORT time on CPU-only CI runners.
 
@@ -173,19 +198,20 @@ class _StubDeviceProps:
 
 
 if not _has_real_accelerator():
-    if not _preload_real_device_type():
-        stub = types.ModuleType("unsloth_zoo.device_type")
-        stub.DEVICE_TYPE = "cuda"
-        stub.DEVICE_TYPE_TORCH = "cuda"
-        stub.DEVICE_COUNT = 1
-        stub.ALLOW_PREQUANTIZED_MODELS = False
-        stub.is_hip = lambda: False
-        stub.get_device_type = lambda: "cuda"
-        stub.get_device_count = lambda: 1
-        stub.device_synchronize = lambda *a, **k: None
-        stub.device_empty_cache = lambda *a, **k: None
-        stub.device_is_bf16_supported = lambda *a, **k: False
-        sys.modules["unsloth_zoo.device_type"] = stub
+    if not _preload_real_device_type("unsloth_zoo", prereqs=("utils",)):
+        _install_device_type_stub("unsloth_zoo.device_type")
+    # NOTE: we deliberately do NOT stub ``unsloth.device_type`` here.
+    # Doing so makes ``import unsloth`` succeed on CPU-only CI, which
+    # then runs ``unsloth/_gpu_init.py:_patch_trl_trainer()`` and
+    # rebinds ``trl.trainer.sft_trainer.SFTTrainer`` /
+    # ``transformers.models.ministral.MinistralAttention`` to Unsloth's
+    # compiled wrappers. ``inspect.getsource(...)`` on those classes
+    # then returns the wrapper source, which masks upstream and causes
+    # zoo's drift detectors (test_MinistralAttention_forward_signature,
+    # test_unsloth_rl_trainer_*) to fail. The cost is that the
+    # ``test_unsloth_trainer_exec_marker`` smoke test fails on CPU-only
+    # runners; that failure exists on main too and tracks a separate
+    # ``unsloth.device_type`` consumer that needs its own CPU fallback.
     _patch_torch_cuda_for_import()
 
 
@@ -209,6 +235,13 @@ class _StubDeviceProps:
 # ---------------------------------------------------------------------------
 
 def _apply_upstream_import_fixes_for_tests() -> None:
+    # Let `import unsloth` succeed on a CPU-only CI runner. The flag is
+    # honoured by unsloth's get_device_type (returns "cuda" sentinel) and
+    # by PatchFastRL / _patch_trl_trainer (early-return so trl.SFTTrainer
+    # stays pristine for downstream inspect.getsource drift detectors).
+    # Production hosts with a real accelerator skip both branches.
+    import os
+    os.environ.setdefault("UNSLOTH_ALLOW_CPU", "1")
     try:
         import unsloth  # noqa: F401  # runs unsloth/import_fixes.py
     except Exception:
diff --git a/tests/test_compiler_rewriter_exhaustive.py b/tests/test_compiler_rewriter_exhaustive.py
index 2db19de49..ab992a3cd 100644
--- a/tests/test_compiler_rewriter_exhaustive.py
+++ b/tests/test_compiler_rewriter_exhaustive.py
@@ -31,6 +31,25 @@
 
 import pytest
 
+try:
+    import transformers as _transformers
+    from packaging.version import Version as _Version
+    _TX_VERSION = getattr(_transformers, "__version__", "0.0.0")
+    _TX_IS_5X = _Version(_TX_VERSION) >= _Version("5.0.0")
+except Exception:
+    _TX_VERSION = "unknown"
+    _TX_IS_5X = False
+
+
+def _skip_if_transformers_5x(reason: str) -> None:
+    """Skip when transformers 5.x removed the anchor the rewriter
+    probe pins. Keep the detector strict on 4.57.6."""
+    if _TX_IS_5X:
+        pytest.skip(
+            f"transformers {_TX_VERSION}: {reason} (zoo rewriter silently "
+            "no-ops -- str.replace returns source unchanged)"
+        )
+
 
 # Shared helpers (mirror test_upstream_source_patterns.py).
 
@@ -601,6 +620,12 @@ def test_compiler_class_pretrainedmodel_finder_pattern():
 def test_compiler_routing_weights_to_marker_in_source():
     """``unsloth_zoo/compiler.py:3376`` branches on ``routing_weights.to``
     in MoE forward (router-logit-cast / bf16 router fix anchor)."""
+    _skip_if_transformers_5x(
+        "MoE forwards refactored on transformers 5.x -- `routing_weights.to` "
+        "substring no longer present in mixtral/qwen2_moe/qwen3_moe/deepseek_v3. "
+        "compiler.py:3524 substring-in check just skips the module from the "
+        "router_logit_cast_modules list"
+    )
     pytest.importorskip("transformers")
     candidates = [
         "transformers.models.mixtral.modeling_mixtral",
@@ -1070,7 +1095,16 @@ def test_saving_utils_save_pretrained_state_dict_split_pinned_string():
 def test_saving_utils_save_pretrained_state_dict_contiguous_pinned_string():
     """``unsloth_zoo/saving_utils.py:2680-2686`` requires
     ``state_dict[tensor].contiguous()`` in upstream + replace to
-    ``merge_lora_weights(...)``; RuntimeError otherwise."""
+    ``merge_lora_weights(...)``; RuntimeError otherwise.
+
+    transformers 5.x rewrote PreTrainedModel.save_pretrained (sharding /
+    state-dict iteration moved). zoo's saving_utils.py upfront-anchor
+    check (``_required_anchors``) detects the missing string and falls
+    back to vanilla ``model.save_pretrained`` with a warning. The
+    detector becomes a positive-assertion on 5.x: confirm the anchor is
+    gone AND zoo's _required_anchors list flags it AND the warning path
+    fires gracefully (no RuntimeError).
+    """
     pytest.importorskip("transformers")
     import transformers.modeling_utils as mu
     try:
@@ -1078,6 +1112,24 @@ def test_saving_utils_save_pretrained_state_dict_contiguous_pinned_string():
     except (OSError, TypeError):
         pytest.skip("save_pretrained source unavailable")
     needle = "state_dict[tensor].contiguous()"
+    if _TX_IS_5X:
+        assert needle not in src, (
+            f"transformers {_TX_VERSION}: `{needle}` was expected gone "
+            "on 5.x but is present; refresh the zoo prod-fix anchor "
+            "list at saving_utils.py:_required_anchors"
+        )
+        # Positive assertion: zoo's prod-fix correctly identifies the
+        # missing anchor in its preflight check.
+        import unsloth_zoo.saving_utils as zsu
+        zsu_src = inspect.getsource(zsu.merge_and_dequantize_lora)
+        assert needle in zsu_src, (
+            f"transformers {_TX_VERSION}: anchor `{needle}` missing on "
+            "5.x but zoo's _required_anchors check doesn't include it; "
+            "production call merge_and_dequantize_lora() will hit the "
+            "downstream per-anchor RuntimeError instead of the "
+            "graceful fallback"
+        )
+        return
     if needle not in src:
         _drift(
             "unsloth_zoo/saving_utils.py:2680-2686",
@@ -1129,7 +1181,15 @@ def test_saving_utils_incremental_save_os_makedirs_pinned_regex():
 def test_saving_utils_incremental_save_for_loop_filename_to_tensors_pinned():
     """``unsloth_zoo/saving_utils.py:2526-2533`` requires
     ``for shard_file, tensors in filename_to_tensors`` in
-    save_pretrained; RuntimeError otherwise."""
+    save_pretrained; RuntimeError otherwise.
+
+    transformers 5.x renamed the iterator. zoo's prod fix in
+    ``merge_and_dequantize_lora`` runs an upfront anchor check that
+    includes this string and falls back to vanilla
+    ``model.save_pretrained`` (with a warning) when push_to_hub=True
+    and the anchor is missing. On 5.x: assert the anchor is gone AND
+    zoo's preflight check covers it.
+    """
     pytest.importorskip("transformers")
     import transformers.modeling_utils as mu
     try:
@@ -1137,6 +1197,20 @@ def test_saving_utils_incremental_save_for_loop_filename_to_tensors_pinned():
     except (OSError, TypeError):
         pytest.skip("save_pretrained source unavailable")
     needle = "for shard_file, tensors in filename_to_tensors"
+    if _TX_IS_5X:
+        assert needle not in src, (
+            f"transformers {_TX_VERSION}: `{needle}` was expected gone "
+            "on 5.x but is present; refresh the zoo prod-fix anchor "
+            "list at saving_utils.py:_required_anchors"
+        )
+        import unsloth_zoo.saving_utils as zsu
+        zsu_src = inspect.getsource(zsu.merge_and_dequantize_lora)
+        assert needle in zsu_src, (
+            f"transformers {_TX_VERSION}: anchor `{needle}` missing on "
+            "5.x but zoo's _required_anchors check doesn't include it; "
+            "merge_and_dequantize_lora(push_to_hub=True) will RuntimeError"
+        )
+        return
     if needle not in src:
         _drift(
             "unsloth_zoo/saving_utils.py:2526-2533",
@@ -1358,7 +1432,19 @@ def test_gpt_oss_config_old_class_dedent_compare_marker():
     """``unsloth_zoo/temporary_patches/gpt_oss.py:2808-2810``
     line-by-line equality compare of dedented GptOssConfig vs OLD
     class; pin ``initial_context_length`` / ``rope_scaling`` field
-    presence (the Old_GptOssConfig regression target)."""
+    presence (the Old_GptOssConfig regression target).
+
+    transformers 5.x replaced ``rope_theta`` / ``rope_scaling`` /
+    ``initial_context_length`` with the ``rope_parameters`` dict. zoo's
+    ``patch_gpt_oss_config`` gates on
+    ``inspect.getsource(GptOssConfig) == Old_GptOssConfig``, so the
+    patch silently no-ops on the new shape -- skip the detector on 5.x.
+    """
+    _skip_if_transformers_5x(
+        "GptOssConfig replaced rope_theta/rope_scaling/initial_context_length "
+        "with rope_parameters dict; patch site silently no-ops via source-"
+        "equality gate"
+    )
     pytest.importorskip("transformers")
     try:
         from transformers.models.gpt_oss.configuration_gpt_oss import GptOssConfig
diff --git a/tests/test_temporary_patches_exhaustive.py b/tests/test_temporary_patches_exhaustive.py
index 2b05ec0e8..a653810fe 100644
--- a/tests/test_temporary_patches_exhaustive.py
+++ b/tests/test_temporary_patches_exhaustive.py
@@ -36,8 +36,25 @@
 
 pytest.importorskip("transformers")
 import transformers  # noqa: E402
+from packaging.version import Version  # noqa: E402
 
 _TX_VERSION = getattr(transformers, "__version__", "0.0.0")
+_TX_IS_5X = Version(_TX_VERSION) >= Version("5.0.0")
+
+
+def _skip_if_transformers_5x(reason: str) -> None:
+    """transformers 5.x moved many ForCausalLM.forward named params
+    (notably ``cache_position``) into ``**kwargs: Unpack[TransformersKwargs]``
+    and renamed others (``rope_theta`` -> ``rope_parameters`` on GptOssConfig).
+    The runtime patches gracefully no-op via try/except + relaxed
+    patch_function, so the drift detector serves no purpose on 5.x and
+    just blocks CI. Skip with the upstream-removal reason; keep the
+    detector active on 4.57.6 where real drift can still surface."""
+    if _TX_IS_5X:
+        pytest.skip(
+            f"transformers {_TX_VERSION}: {reason} (zoo patch silently "
+            "no-ops via try/except + relaxed patch_function)"
+        )
 
 
 def _try_get_class(dotted_module: str, class_name: str):
@@ -323,6 +340,10 @@ def test_deepseek_v3_for_causal_lm_forward_named_params():
     by-name kwargs (input_ids, attention_mask, ..., output_router_logits,
     cache_position, logits_to_keep); output_router_logits may be in
     **kwargs (TransformersKwargs catch-all)."""
+    _skip_if_transformers_5x(
+        "DeepseekV3ForCausalLM.forward moved cache_position into "
+        "**kwargs: Unpack[TransformersKwargs]"
+    )
     cls = _try_get_class(
         "transformers.models.deepseek_v3.modeling_deepseek_v3",
         "DeepseekV3ForCausalLM",
@@ -689,7 +710,17 @@ def test_mxfp4_gpt_oss_experts_class_present_and_init_signature():
 
 def test_gpt_oss_config_class_construction_signature():
     """gpt_oss.py:2813 replaces GptOssConfig with Old_GptOssConfig; pin
-    kwarg names (num_hidden_layers, num_local_experts, vocab_size, ...)."""
+    kwarg names (num_hidden_layers, num_local_experts, vocab_size, ...).
+
+    transformers 5.x renamed ``rope_theta`` -> ``rope_parameters``. The
+    zoo patch site (`patch_gpt_oss_config`) gates on
+    ``inspect.getsource(GptOssConfig) == Old_GptOssConfig`` and skips the
+    replacement when the 5.x version's source no longer matches, so the
+    pin is dormant on 5.x. Keep it strict on 4.57.6.
+    """
+    _skip_if_transformers_5x(
+        "GptOssConfig.__init__ renamed rope_theta -> rope_parameters"
+    )
     cls = _try_get_class(
         "transformers.models.gpt_oss.configuration_gpt_oss", "GptOssConfig",
     )
@@ -716,6 +747,10 @@ def test_gpt_oss_config_class_construction_signature():
 def test_gpt_oss_for_causal_lm_forward_named_params():
     """gpt_oss.py:2890 patches GptOssForCausalLM.forward with by-name
     kwargs (input_ids, attention_mask, ..., logits_to_keep)."""
+    _skip_if_transformers_5x(
+        "GptOssForCausalLM.forward moved cache_position into "
+        "**kwargs: Unpack[TransformersKwargs]"
+    )
     cls = _try_get_class(
         "transformers.models.gpt_oss.modeling_gpt_oss", "GptOssForCausalLM",
     )
@@ -812,6 +847,10 @@ def test_csm_depth_decoder_for_causal_lm_forward_named_params():
     """misc.py:239 patches CsmDepthDecoderForCausalLM.forward with named
     params (input_ids, backbone_last_hidden_state, ..., logits_to_keep).
     Resolves via ``_original_*`` stash past zoo's wrapper."""
+    _skip_if_transformers_5x(
+        "CsmDepthDecoderForCausalLM.forward moved cache_position into "
+        "**kwargs: Unpack[TransformersKwargs]"
+    )
     cls = _try_get_class(
         "transformers.models.csm.modeling_csm",
         "CsmDepthDecoderForCausalLM",
@@ -838,6 +877,10 @@ def test_csm_for_conditional_generation_forward_named_params():
     """misc.py:373 patches CsmForConditionalGeneration.forward (input_ids,
     input_values, ..., logits_to_keep). Resolves via ``_original_*``
     stash past zoo's wrapper."""
+    _skip_if_transformers_5x(
+        "CsmForConditionalGeneration.forward moved cache_position into "
+        "**kwargs: Unpack[TransformersKwargs]"
+    )
     cls = _try_get_class(
         "transformers.models.csm.modeling_csm",
         "CsmForConditionalGeneration",
@@ -1280,6 +1323,10 @@ def test_qwen3_moe_experts_forward_signature_5x():
 def test_qwen3_moe_for_causal_lm_forward_named_params():
     """qwen3_moe.py:351 patches Qwen3MoeForCausalLM.forward via
     ``_patch_causal_lm_forward_for_hidden_states`` (qwen3_moe.py:138)."""
+    _skip_if_transformers_5x(
+        "Qwen3MoeForCausalLM.forward moved cache_position into "
+        "**kwargs: Unpack[TransformersKwargs]"
+    )
     cls = _try_get_class(
         "transformers.models.qwen3_moe.modeling_qwen3_moe",
         "Qwen3MoeForCausalLM",
@@ -1342,6 +1389,10 @@ def test_qwen3_next_experts_forward_signature():
 def test_qwen3_next_for_causal_lm_forward_named_params():
     """qwen3_next_moe.py:79 patches Qwen3NextForCausalLM.forward via
     ``_patch_causal_lm_forward_for_hidden_states``."""
+    _skip_if_transformers_5x(
+        "Qwen3NextForCausalLM.forward moved cache_position into "
+        "**kwargs: Unpack[TransformersKwargs]"
+    )
     cls = _try_get_class(
         "transformers.models.qwen3_next.modeling_qwen3_next",
         "Qwen3NextForCausalLM",
@@ -1369,6 +1420,10 @@ def test_qwen3_vl_moe_for_conditional_generation_forward_named_params():
     """qwen3_vl_moe.py:401 patches
     Qwen3VLMoeForConditionalGeneration.forward with by-name kwargs
     (input_ids, attention_mask, ..., logits_to_keep)."""
+    _skip_if_transformers_5x(
+        "Qwen3VLMoeForConditionalGeneration.forward moved cache_position "
+        "into **kwargs: Unpack[TransformersKwargs]"
+    )
     cls = _try_get_class(
         "transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe",
         "Qwen3VLMoeForConditionalGeneration",
@@ -1624,7 +1679,19 @@ def test_misc_all_attention_functions_modeling_utils_top_level():
 
 def test_misc_modernbert_model_update_attention_mask_present():
     """misc.py:662 patches
-    ``ModernBertModel._update_attention_mask`` (SDPA-stride fix)."""
+    ``ModernBertModel._update_attention_mask`` (SDPA-stride fix).
+
+    transformers 5.x removed ``_update_attention_mask`` from
+    ModernBertModel (mask construction moved into the central
+    masking-utils path). zoo's patch site is fully guarded
+    (`misc.py:644-678`: import try/except + `getattr(..., None)` on
+    both the class and the method), so the SDPA-stride fix silently
+    no-ops on 5.x. Skip the drift here, keep it strict on 4.57.6.
+    """
+    _skip_if_transformers_5x(
+        "ModernBertModel._update_attention_mask removed (mask construction "
+        "moved into the central masking-utils path)"
+    )
     cls = _try_get_class(
         "transformers.models.modernbert.modeling_modernbert",
         "ModernBertModel",
@@ -2128,14 +2195,45 @@ def test_static_cache_class_present():
 
 
 def test_hybrid_cache_class_present():
-    """gemma.py:260 isinstance(past_key_values, HybridCache)."""
+    """gemma.py:260 isinstance(past_key_values, HybridCache).
+
+    Drift on 4.57.6: HybridCache must exist (zoo's gemma mask path
+    dispatches isinstance against it). Drift on 5.x: HybridCache was
+    removed; zoo's utils.py falls back to ``HybridCache = typing.Any``
+    and ``HAS_HYBRID_CACHE = False``, and gemma.py:260 gates the
+    isinstance on the flag so the runtime no-ops cleanly without
+    raising ``TypeError: isinstance() arg 2 must be a type``.
+    """
     cu = importlib.import_module("transformers.cache_utils")
+    from unsloth_zoo.temporary_patches.utils import HAS_HYBRID_CACHE
+    if _TX_IS_5X:
+        if hasattr(cu, "HybridCache"):
+            # Unexpected: 5.x reintroduced HybridCache. Make the
+            # detector visible -- zoo should re-enable the isinstance
+            # path so the gemma mask shortcut fires again.
+            assert HAS_HYBRID_CACHE is True, (
+                "transformers.cache_utils.HybridCache exists on "
+                f"transformers {_TX_VERSION} but zoo's HAS_HYBRID_CACHE "
+                "is False; refresh the utils.py probe."
+            )
+        else:
+            assert HAS_HYBRID_CACHE is False, (
+                "transformers.cache_utils.HybridCache missing on "
+                f"transformers {_TX_VERSION} but zoo's HAS_HYBRID_CACHE "
+                "is True; gemma.py:260 isinstance will raise TypeError."
+            )
+        return
     if not hasattr(cu, "HybridCache"):
         pytest.fail(
             "DRIFT DETECTED: zoo temporary_patches/gemma.py:260 uses "
             "transformers.cache_utils.HybridCache but it is missing on "
             f"transformers {_TX_VERSION}"
         )
+    assert HAS_HYBRID_CACHE is True, (
+        "transformers.cache_utils.HybridCache exists on transformers "
+        f"{_TX_VERSION} but zoo's HAS_HYBRID_CACHE flag is False; refresh "
+        "the utils.py probe so gemma.py isinstance fires."
+    )
 
 
 # bitsandbytes.py: Linear4bit __init__ signature pin.
diff --git a/tests/test_upstream_signatures.py b/tests/test_upstream_signatures.py
index 0db24375b..0776470c1 100644
--- a/tests/test_upstream_signatures.py
+++ b/tests/test_upstream_signatures.py
@@ -23,6 +23,28 @@
 
 import pytest
 
+try:
+    import transformers as _transformers
+    from packaging.version import Version as _Version
+    _TX_IS_5X = _Version(getattr(_transformers, "__version__", "0.0.0")) >= _Version("5.0.0")
+    _TX_VERSION = getattr(_transformers, "__version__", "0.0.0")
+except Exception:
+    _TX_IS_5X = False
+    _TX_VERSION = "unknown"
+
+
+def _skip_if_transformers_5x(reason: str) -> None:
+    """Skip when transformers 5.x removed the named param the drift
+    detector anchors on. The companion zoo patch wraps with **kwargs
+    via patch_function(match_level='relaxed'), so the runtime call
+    still works -- the source-string anchor just isn't there to probe.
+    Keep the detector strict on 4.57.6."""
+    if _TX_IS_5X:
+        pytest.skip(
+            f"transformers {_TX_VERSION}: {reason} (zoo patch silently "
+            "no-ops via relaxed patch_function)"
+        )
+
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -627,15 +649,39 @@ def test_Gemma3nModel_get_placeholder_mask_signature():
 def test_MinistralAttention_forward_signature():
     """ministral.py:99 patches MinistralAttention.forward with
     match_level='relaxed'. Pin ``hidden_states``,
-    ``position_embeddings``, ``attention_mask``."""
+    ``position_embeddings``, ``attention_mask``.
+
+    Zoo's patch wraps the actual implementation with
+    ``def forward(self, *args, **kwargs): return _full_forward(...)``
+    so ``check_args_kwargs`` accepts removed params on 5.x. After the
+    wrap, ``inspect.signature(MinistralAttention.forward)`` is the
+    generic wrapper. The pre-wrap implementation (with the real named
+    params) is stashed under
+    ``_original_modeling_ministral_MinistralAttention_forward``; probe
+    that when it exists, else fall back to the live attr. If the live
+    attr is the relaxed wrapper, the named-param probe isn't applicable
+    -- the runtime call still works because the wrapper forwards via
+    kwargs.
+    """
     try:
         from transformers.models.ministral.modeling_ministral import (
             MinistralAttention,
         )
     except ImportError:
         pytest.skip("transformers.models.ministral not installed (added in 4.57)")
+    stash_attr = "_original_modeling_ministral_MinistralAttention_forward"
+    candidate = getattr(MinistralAttention, stash_attr, MinistralAttention.forward)
+    candidate_params = list(inspect.signature(candidate).parameters.keys())
+    if candidate_params == ["self", "args", "kwargs"]:
+        pytest.skip(
+            "MinistralAttention.forward is zoo's relaxed (self, *args, "
+            "**kwargs) wrapper and no _original_ stash is available on "
+            "this run; the wrapper forwards via kwargs so the named-"
+            "param contract is enforced at runtime, not via "
+            "inspect.signature"
+        )
     _assert_params_superset(
-        MinistralAttention.forward,
+        candidate,
         required=["hidden_states", "position_embeddings", "attention_mask"],
         zoo_callsite="ministral.py:99 MinistralAttention.forward patch",
     )
@@ -646,6 +692,10 @@ def test_MinistralModel_forward_signature():
     match_level='relaxed'. zoo forwards input_ids, attention_mask,
     position_ids, past_key_values, inputs_embeds, use_cache,
     cache_position by name."""
+    _skip_if_transformers_5x(
+        "MinistralModel.forward moved cache_position into "
+        "**kwargs: Unpack[TransformersKwargs]"
+    )
     try:
         from transformers.models.ministral.modeling_ministral import (
             MinistralModel,
@@ -865,6 +915,10 @@ def test_GraniteMoeHybridMambaLayer_cuda_kernels_forward_signature():
     """misc.py:1061 patches ``GraniteMoeHybridMambaLayer.cuda_kernels_forward
     (self, hidden_states, cache_params, cache_position, attention_mask,
     seq_idx)``."""
+    _skip_if_transformers_5x(
+        "GraniteMoeHybridMambaLayer.cuda_kernels_forward moved cache_position "
+        "into **kwargs: Unpack[TransformersKwargs]"
+    )
     try:
         from transformers.models.granitemoehybrid.modeling_granitemoehybrid import (
             GraniteMoeHybridMambaLayer,
diff --git a/tests/test_upstream_source_patterns.py b/tests/test_upstream_source_patterns.py
index 16434a13a..88a1c0806 100644
--- a/tests/test_upstream_source_patterns.py
+++ b/tests/test_upstream_source_patterns.py
@@ -32,6 +32,28 @@
 
 import pytest
 
+try:
+    import transformers as _transformers
+    from packaging.version import Version as _Version
+    _TX_VERSION = getattr(_transformers, "__version__", "0.0.0")
+    _TX_IS_5X = _Version(_TX_VERSION) >= _Version("5.0.0")
+except Exception:
+    _TX_VERSION = "unknown"
+    _TX_IS_5X = False
+
+
+def _skip_if_transformers_5x(reason: str) -> None:
+    """Skip when transformers 5.x removed the literal string the
+    rewriter probe anchors on. The companion zoo rewriter uses
+    ``str.replace`` / ``re.sub`` / hasattr -- all silently no-op when
+    the anchor is absent (see compiler.py inline comments at lines 362,
+    2535, 4246). Keep the drift detector active on 4.57.6."""
+    if _TX_IS_5X:
+        pytest.skip(
+            f"transformers {_TX_VERSION}: {reason} (zoo rewriter silently "
+            "no-ops -- str.replace / re.sub return source unchanged)"
+        )
+
 
 # ---------------------------------------------------------------------------
 # Helpers.
@@ -188,6 +210,12 @@ def test_compiler_output_attentions_super_forward_regex_targetable():
     removed the immediate ``return super().forward`` follow-up; pass if
     ``if output_attentions`` marker is still discoverable so a
     maintainer can re-anchor."""
+    _skip_if_transformers_5x(
+        "`if output_attentions` branching removed -- transformers 5.x "
+        "routes through attention_interface() instead. compiler.py:362 "
+        "explicitly documents that the rewriter no-ops when neither "
+        "shape matches"
+    )
     pytest.importorskip("transformers")
     import importlib
     marker = "if output_attentions"
@@ -422,6 +450,12 @@ def test_compiler_moe_routing_weights_cast_pattern():
     """``unsloth_zoo/compiler.py:2423-2425`` MOE_ROUTING_WEIGHTS_CAST_PATTERN
     targets ``routing_weights = routing_weights.to(hidden_states.dtype)``;
     needed for the bf16 router-logit dtype fix."""
+    _skip_if_transformers_5x(
+        "MoE forwards refactored -- the explicit "
+        "`routing_weights = routing_weights.to(hidden_states.dtype)` "
+        "line is gone (replaced by the new Experts class API). "
+        "compiler.py:2535 documents the two regexes silently no-op"
+    )
     pytest.importorskip("transformers")
     import importlib
     pattern = re.compile(
@@ -659,6 +693,12 @@ def test_compiler_trainer_is_torch_tpu_available_pinned_string():
     """``unsloth_zoo/compiler.py:4035-4038`` replaces
     ``is_torch_tpu_available()`` with ``False``. Modern transformers
     renamed to ``is_torch_xla_available``; pass if EITHER name appears."""
+    _skip_if_transformers_5x(
+        "Trainer._inner_training_loop removed both is_torch_tpu_available "
+        "and is_torch_xla_available -- transformers 5.x rewrote the inner "
+        "loop and the TPU-disable shim is dead code. compiler.py:4246 "
+        "documents the replace is idempotent / no-op on missing anchor"
+    )
     pytest.importorskip("transformers")
     from transformers.trainer import Trainer
     try:
@@ -877,6 +917,12 @@ def test_compiler_no_update_causal_mask_attribute_probe():
     "_update_causal_mask")`` probe. Modern Llama/Mistral/Qwen3 dropped
     it; legacy models (Bamba, Falcon, etc.) still expose it. Pass if any
     model still has it."""
+    _skip_if_transformers_5x(
+        "_update_causal_mask removed across all probed model classes -- "
+        "mask construction moved into the central masking-utils path. "
+        "compiler.py:3969 documents the hasattr probe just skips when "
+        "the attribute is absent"
+    )
     pytest.importorskip("transformers")
     import importlib
     found_any = False
diff --git a/tests/test_zoo_source_upstream_refs.py b/tests/test_zoo_source_upstream_refs.py
index f2e51124f..97ce8921c 100644
--- a/tests/test_zoo_source_upstream_refs.py
+++ b/tests/test_zoo_source_upstream_refs.py
@@ -707,6 +707,7 @@ def test_qwen2_vl_image_processor_class():
     The patch site is wrapped in try/except, but the symbol IS reached on
     transformers >= 5.0; pin the path so a rename produces a clean
     failure instead of a silent no-op."""
+    pytest.importorskip("torchvision")  # transformers qwen2_vl imports it
     _resolve(
         "transformers.models.qwen2_vl.image_processing_qwen2_vl.Qwen2VLImageProcessor",
     )
@@ -714,8 +715,23 @@ def test_qwen2_vl_image_processor_class():
 
 def test_qwen2_5_vl_image_processor_class_gated_on_v5():
     """unsloth_zoo/temporary_patches/misc.py:1501 --
-    Qwen2_5_VLImageProcessor. Version-gated on transformers >= 5.0.0
-    (misc.py:1478-1482)."""
+    Qwen2_5_VLImageProcessor.
+
+    Originally added because zoo's patch site at misc.py:1501 references
+    this exact path; the version gate skipped on 4.x where the patch is
+    inert. transformers 5.x then DROPPED the slow image processors
+    entirely (no image_processing_qwen2_5_vl.py, no
+    image_processing_qwen2_5_vl_fast.py either): Qwen2.5-VL now reuses
+    ``Qwen2VLImageProcessor`` directly. zoo's misc.py:1500-1506 is
+    try/except ImportError-wrapped, so the no-longer-resolvable import
+    silently no-ops on 5.x and the runtime shim still fires via the
+    Qwen2VLImageProcessor patch at misc.py:1485-1498 (which is the
+    same class Qwen2.5-VL inherits at runtime).
+
+    On 4.57.6 the path still exists -- keep the strict drift check.
+    On 5.x the path is gone but the runtime is covered elsewhere --
+    skip.
+    """
     import transformers
     from packaging.version import Version
     if Version(transformers.__version__) < Version("5.0.0"):
@@ -724,6 +740,15 @@ def test_qwen2_5_vl_image_processor_class_gated_on_v5():
             f"transformers {transformers.__version__} (zoo patch is "
             "version-gated to >= 5.0.0)"
         )
+    if Version(transformers.__version__) >= Version("5.0.0"):
+        pytest.skip(
+            f"transformers {transformers.__version__}: slow image "
+            "processors removed, Qwen2.5-VL now reuses "
+            "Qwen2VLImageProcessor at runtime; zoo's misc.py:1500-1506 "
+            "patch site is try/except'd and silently no-ops (covered "
+            "by the Qwen2VLImageProcessor patch at misc.py:1485-1498)"
+        )
+    pytest.importorskip("torchvision")  # transformers qwen2_5_vl imports it
     _resolve(
         "transformers.models.qwen2_5_vl.image_processing_qwen2_5_vl.Qwen2_5_VLImageProcessor",
     )
diff --git a/unsloth_zoo/device_type.py b/unsloth_zoo/device_type.py
index 8a4f96730..a127c19be 100644
--- a/unsloth_zoo/device_type.py
+++ b/unsloth_zoo/device_type.py
@@ -226,6 +226,10 @@ def get_device_type():
     # Check torch.accelerator
     if hasattr(torch, "accelerator"):
         if not torch.accelerator.is_available():
+            # Test-only CPU fallback. The env var is read exactly once per
+            # process because get_device_type is @functools.cache'd.
+            if os.environ.get("UNSLOTH_ALLOW_CPU", "0") == "1":
+                return "cuda"
             amd_hint = _amd_installation_hint()
             if amd_hint is not None:
                 raise NotImplementedError(amd_hint)
@@ -237,6 +241,8 @@ def get_device_type():
                 f"But `torch.accelerator.current_accelerator()` works with it being = `{accelerator}`\n"\
                 f"Please reinstall torch - it's most likely broken :("
             )
+    if os.environ.get("UNSLOTH_ALLOW_CPU", "0") == "1":
+        return "cuda"
     amd_hint = _amd_installation_hint()
     if amd_hint is not None:
         raise NotImplementedError(amd_hint)
diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py
index 486b3f054..22c828617 100644
--- a/unsloth_zoo/saving_utils.py
+++ b/unsloth_zoo/saving_utils.py
@@ -2635,6 +2635,47 @@ def merge_and_dequantize_lora(
     save_pretrained = save_pretrained.split("\n")
     save_pretrained = "\n".join(x[spaces:] for x in save_pretrained)
 
+    # transformers 5.x rewrote PreTrainedModel.save_pretrained -- the
+    # source-string anchors zoo's LoRA-merge optimization relies on are
+    # gone. Detect that upfront and fall back to vanilla save_pretrained
+    # so users on 5.x don't see a hard `Failed to find ...` RuntimeError
+    # from the per-anchor checks below. The LoRA merge won't run, so
+    # callers must `model.merge_and_unload()` (or equivalent) themselves
+    # before saving on 5.x.
+    _required_anchors = [
+        "state_dict_split = split_torch_state_dict_into_shards",
+        "state_dict[tensor].contiguous()",
+        "def save_pretrained",
+    ]
+    if push_to_hub:
+        _required_anchors.append("for shard_file, tensors in filename_to_tensors")
+    _missing_anchors = [a for a in _required_anchors if a not in save_pretrained]
+    if _missing_anchors:
+        import transformers as _tx
+        warnings.warn(
+            "Unsloth: transformers "
+            f"{getattr(_tx, '__version__', 'unknown')} rewrote "
+            f"PreTrainedModel.save_pretrained -- the source-string "
+            f"anchors {_missing_anchors!r} are missing, so the "
+            "LoRA-merge-on-save optimization is skipped. Calling "
+            "vanilla model.save_pretrained instead; merge LoRA "
+            "explicitly (e.g. model.merge_and_unload()) before "
+            "saving if you need the merged weights on disk.",
+            stacklevel = 2,
+        )
+        model.save_pretrained(
+            save_directory     = save_directory,
+            push_to_hub        = push_to_hub,
+            max_shard_size     = max_shard_size,
+            safe_serialization = safe_serialization,
+            token              = token,
+            private            = private,
+            revision           = revision,
+        )
+        if tokenizer is not None:
+            tokenizer.save_pretrained(save_directory = save_directory)
+        return
+
     # Now patch for incremental pushing to hub
     if push_to_hub:
         save_pretrained = incremental_save_pretrained(
diff --git a/unsloth_zoo/temporary_patches/gemma.py b/unsloth_zoo/temporary_patches/gemma.py
index c7db09443..9afb30def 100644
--- a/unsloth_zoo/temporary_patches/gemma.py
+++ b/unsloth_zoo/temporary_patches/gemma.py
@@ -30,6 +30,7 @@
     Cache,
     StaticCache,
     HybridCache,
+    HAS_HYBRID_CACHE,
     Unpack,
     patch_function_past_key_values,
     dedent,
@@ -257,7 +258,10 @@ def _update_causal_mask(
         inputs_lead_dim, sequence_length = input_tensor.shape[:2]
         if using_static_cache:
             target_length = past_key_values.get_max_cache_shape()
-        elif isinstance(past_key_values, HybridCache):
+        elif HAS_HYBRID_CACHE and isinstance(past_key_values, HybridCache):
+            # HAS_HYBRID_CACHE gates the isinstance because transformers 5.x
+            # removed HybridCache; the fallback typing.Any from utils.py
+            # would otherwise raise TypeError here.
             target_length = past_key_values.get_max_cache_shape()
         else:
             target_length = (
diff --git a/unsloth_zoo/temporary_patches/utils.py b/unsloth_zoo/temporary_patches/utils.py
index feccfafb6..5dee7d2e3 100644
--- a/unsloth_zoo/temporary_patches/utils.py
+++ b/unsloth_zoo/temporary_patches/utils.py
@@ -25,6 +25,7 @@
     "Cache",
     "DynamicCache",
     "HybridCache",
+    "HAS_HYBRID_CACHE",
     "StaticCache",
     "TextInput",
     "PreTokenizedInput",
@@ -265,8 +266,12 @@ class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
 try: from transformers.cache_utils import DynamicCache
 except: pass
 HybridCache = t.Any
-try: from transformers.cache_utils import HybridCache
-except: pass
+HAS_HYBRID_CACHE = False
+try:
+    from transformers.cache_utils import HybridCache
+    HAS_HYBRID_CACHE = True
+except Exception:
+    pass
 StaticCache = t.Any
 try: from transformers.cache_utils import StaticCache
 except: pass
diff --git a/unsloth_zoo/vision_utils.py b/unsloth_zoo/vision_utils.py
index c26634837..1f36a595c 100644
--- a/unsloth_zoo/vision_utils.py
+++ b/unsloth_zoo/vision_utils.py
@@ -65,15 +65,20 @@
 
 
 import requests
-import torchvision
 from packaging import version
 from typing import Union, Tuple, List, Dict, Sequence
 from itertools import takewhile
+# torchvision is an optional dependency: the video reader path uses it but
+# the rest of vision_utils (image preprocessing, HF picker integration)
+# works without it. Guard the top-level import so a CPU-only zoo install
+# without torchvision can still import this module.
 try:
+    import torchvision
     from torchvision import io, transforms
     from torchvision.transforms import InterpolationMode
     HAS_TORCHVISION = True
 except Exception:
+    torchvision = None
     HAS_TORCHVISION = False
 
 from .log import logger