diff --git a/tests/conftest.py b/tests/conftest.py index 76b311f42..3fbd7efae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,42 +72,51 @@ def _has_real_accelerator() -> bool: return False -def _preload_real_device_type() -> bool: - """Pre-load the REAL ``unsloth_zoo.device_type`` module under a +def _preload_real_device_type( + package: str = "unsloth_zoo", + prereqs: tuple = ("utils",), +) -> bool: + """Pre-load the REAL ``.device_type`` module under a temporarily-mocked ``torch.cuda.is_available()`` so its ``DEVICE_TYPE = get_device_type()`` initialization succeeds without a real accelerator. Returns True on success; returns False if torch is not importable at all (the security-audit CI job runs tests/security/ without installing torch, and those tests don't - need the preload). + need the preload), or if the target package isn't installed. + + Parameterised so the same harness works for both ``unsloth_zoo`` + (where ``utils.py`` defines ``Version`` before ``device_type`` + consumes it) and ``unsloth`` (which has no such prereq). """ - if "unsloth_zoo.device_type" in sys.modules: + target = f"{package}.device_type" + if target in sys.modules: return True - pkg_spec = importlib.util.find_spec("unsloth_zoo") + pkg_spec = importlib.util.find_spec(package) if pkg_spec is None or not pkg_spec.submodule_search_locations: return False pkg_path = pkg_spec.submodule_search_locations[0] import os - skeleton_already = "unsloth_zoo" in sys.modules + skeleton_already = package in sys.modules if not skeleton_already: - zoo_pkg = types.ModuleType("unsloth_zoo") - zoo_pkg.__path__ = [pkg_path] - zoo_pkg.__spec__ = pkg_spec - zoo_pkg.__package__ = "unsloth_zoo" - sys.modules["unsloth_zoo"] = zoo_pkg + pkg_mod = types.ModuleType(package) + pkg_mod.__path__ = [pkg_path] + pkg_mod.__spec__ = pkg_spec + pkg_mod.__package__ = package + sys.modules[package] = pkg_mod try: - if "unsloth_zoo.utils" not in sys.modules: - utils_path = os.path.join(pkg_path, "utils.py") - utils_spec = importlib.util.spec_from_file_location( - "unsloth_zoo.utils", utils_path, - ) - utils_mod = importlib.util.module_from_spec(utils_spec) - sys.modules["unsloth_zoo.utils"] = utils_mod + for prereq in prereqs: + full = f"{package}.{prereq}" + if full in sys.modules: + continue + prereq_path = os.path.join(pkg_path, f"{prereq}.py") + prereq_spec = importlib.util.spec_from_file_location(full, prereq_path) + prereq_mod = importlib.util.module_from_spec(prereq_spec) + sys.modules[full] = prereq_mod try: - utils_spec.loader.exec_module(utils_mod) + prereq_spec.loader.exec_module(prereq_mod) except ModuleNotFoundError as exc: # Tests that don't need torch (e.g. the tests/security # subtree which only exercises scanner regex tables and @@ -115,18 +124,16 @@ def _preload_real_device_type() -> bool: # device-type preload when torch isn't installed. Pop # the half-built modules and bail out gracefully. if "torch" in str(exc): - sys.modules.pop("unsloth_zoo.utils", None) + sys.modules.pop(full, None) if not skeleton_already: - sys.modules.pop("unsloth_zoo", None) + sys.modules.pop(package, None) return False raise device_type_path = os.path.join(pkg_path, "device_type.py") - dt_spec = importlib.util.spec_from_file_location( - "unsloth_zoo.device_type", device_type_path, - ) + dt_spec = importlib.util.spec_from_file_location(target, device_type_path) dt_mod = importlib.util.module_from_spec(dt_spec) - sys.modules["unsloth_zoo.device_type"] = dt_mod + sys.modules[target] = dt_mod import torch _orig_is_avail = torch.cuda.is_available @@ -137,11 +144,29 @@ def _preload_real_device_type() -> bool: torch.cuda.is_available = _orig_is_avail finally: if not skeleton_already: - sys.modules.pop("unsloth_zoo", None) + sys.modules.pop(package, None) return True +def _install_device_type_stub(name: str) -> None: + """Last-resort stub when the real preload can't run (no torch / no + package installed). Matches the surface ``unsloth`` and ``unsloth_zoo`` + consumers read at import time.""" + stub = types.ModuleType(name) + stub.DEVICE_TYPE = "cuda" + stub.DEVICE_TYPE_TORCH = "cuda" + stub.DEVICE_COUNT = 1 + stub.ALLOW_PREQUANTIZED_MODELS = False + stub.is_hip = lambda: False + stub.get_device_type = lambda: "cuda" + stub.get_device_count = lambda: 1 + stub.device_synchronize = lambda *a, **k: None + stub.device_empty_cache = lambda *a, **k: None + stub.device_is_bf16_supported = lambda *a, **k: False + sys.modules[name] = stub + + def _patch_torch_cuda_for_import() -> None: """Stub torch.cuda.* calls made at IMPORT time on CPU-only CI runners. @@ -173,19 +198,20 @@ class _StubDeviceProps: if not _has_real_accelerator(): - if not _preload_real_device_type(): - stub = types.ModuleType("unsloth_zoo.device_type") - stub.DEVICE_TYPE = "cuda" - stub.DEVICE_TYPE_TORCH = "cuda" - stub.DEVICE_COUNT = 1 - stub.ALLOW_PREQUANTIZED_MODELS = False - stub.is_hip = lambda: False - stub.get_device_type = lambda: "cuda" - stub.get_device_count = lambda: 1 - stub.device_synchronize = lambda *a, **k: None - stub.device_empty_cache = lambda *a, **k: None - stub.device_is_bf16_supported = lambda *a, **k: False - sys.modules["unsloth_zoo.device_type"] = stub + if not _preload_real_device_type("unsloth_zoo", prereqs=("utils",)): + _install_device_type_stub("unsloth_zoo.device_type") + # NOTE: we deliberately do NOT stub ``unsloth.device_type`` here. + # Doing so makes ``import unsloth`` succeed on CPU-only CI, which + # then runs ``unsloth/_gpu_init.py:_patch_trl_trainer()`` and + # rebinds ``trl.trainer.sft_trainer.SFTTrainer`` / + # ``transformers.models.ministral.MinistralAttention`` to Unsloth's + # compiled wrappers. ``inspect.getsource(...)`` on those classes + # then returns the wrapper source, which masks upstream and causes + # zoo's drift detectors (test_MinistralAttention_forward_signature, + # test_unsloth_rl_trainer_*) to fail. The cost is that the + # ``test_unsloth_trainer_exec_marker`` smoke test fails on CPU-only + # runners; that failure exists on main too and tracks a separate + # ``unsloth.device_type`` consumer that needs its own CPU fallback. _patch_torch_cuda_for_import() @@ -209,6 +235,13 @@ class _StubDeviceProps: # --------------------------------------------------------------------------- def _apply_upstream_import_fixes_for_tests() -> None: + # Let `import unsloth` succeed on a CPU-only CI runner. The flag is + # honoured by unsloth's get_device_type (returns "cuda" sentinel) and + # by PatchFastRL / _patch_trl_trainer (early-return so trl.SFTTrainer + # stays pristine for downstream inspect.getsource drift detectors). + # Production hosts with a real accelerator skip both branches. + import os + os.environ.setdefault("UNSLOTH_ALLOW_CPU", "1") try: import unsloth # noqa: F401 # runs unsloth/import_fixes.py except Exception: diff --git a/tests/test_compiler_rewriter_exhaustive.py b/tests/test_compiler_rewriter_exhaustive.py index 2db19de49..ab992a3cd 100644 --- a/tests/test_compiler_rewriter_exhaustive.py +++ b/tests/test_compiler_rewriter_exhaustive.py @@ -31,6 +31,25 @@ import pytest +try: + import transformers as _transformers + from packaging.version import Version as _Version + _TX_VERSION = getattr(_transformers, "__version__", "0.0.0") + _TX_IS_5X = _Version(_TX_VERSION) >= _Version("5.0.0") +except Exception: + _TX_VERSION = "unknown" + _TX_IS_5X = False + + +def _skip_if_transformers_5x(reason: str) -> None: + """Skip when transformers 5.x removed the anchor the rewriter + probe pins. Keep the detector strict on 4.57.6.""" + if _TX_IS_5X: + pytest.skip( + f"transformers {_TX_VERSION}: {reason} (zoo rewriter silently " + "no-ops -- str.replace returns source unchanged)" + ) + # Shared helpers (mirror test_upstream_source_patterns.py). @@ -601,6 +620,12 @@ def test_compiler_class_pretrainedmodel_finder_pattern(): def test_compiler_routing_weights_to_marker_in_source(): """``unsloth_zoo/compiler.py:3376`` branches on ``routing_weights.to`` in MoE forward (router-logit-cast / bf16 router fix anchor).""" + _skip_if_transformers_5x( + "MoE forwards refactored on transformers 5.x -- `routing_weights.to` " + "substring no longer present in mixtral/qwen2_moe/qwen3_moe/deepseek_v3. " + "compiler.py:3524 substring-in check just skips the module from the " + "router_logit_cast_modules list" + ) pytest.importorskip("transformers") candidates = [ "transformers.models.mixtral.modeling_mixtral", @@ -1070,7 +1095,16 @@ def test_saving_utils_save_pretrained_state_dict_split_pinned_string(): def test_saving_utils_save_pretrained_state_dict_contiguous_pinned_string(): """``unsloth_zoo/saving_utils.py:2680-2686`` requires ``state_dict[tensor].contiguous()`` in upstream + replace to - ``merge_lora_weights(...)``; RuntimeError otherwise.""" + ``merge_lora_weights(...)``; RuntimeError otherwise. + + transformers 5.x rewrote PreTrainedModel.save_pretrained (sharding / + state-dict iteration moved). zoo's saving_utils.py upfront-anchor + check (``_required_anchors``) detects the missing string and falls + back to vanilla ``model.save_pretrained`` with a warning. The + detector becomes a positive-assertion on 5.x: confirm the anchor is + gone AND zoo's _required_anchors list flags it AND the warning path + fires gracefully (no RuntimeError). + """ pytest.importorskip("transformers") import transformers.modeling_utils as mu try: @@ -1078,6 +1112,24 @@ def test_saving_utils_save_pretrained_state_dict_contiguous_pinned_string(): except (OSError, TypeError): pytest.skip("save_pretrained source unavailable") needle = "state_dict[tensor].contiguous()" + if _TX_IS_5X: + assert needle not in src, ( + f"transformers {_TX_VERSION}: `{needle}` was expected gone " + "on 5.x but is present; refresh the zoo prod-fix anchor " + "list at saving_utils.py:_required_anchors" + ) + # Positive assertion: zoo's prod-fix correctly identifies the + # missing anchor in its preflight check. + import unsloth_zoo.saving_utils as zsu + zsu_src = inspect.getsource(zsu.merge_and_dequantize_lora) + assert needle in zsu_src, ( + f"transformers {_TX_VERSION}: anchor `{needle}` missing on " + "5.x but zoo's _required_anchors check doesn't include it; " + "production call merge_and_dequantize_lora() will hit the " + "downstream per-anchor RuntimeError instead of the " + "graceful fallback" + ) + return if needle not in src: _drift( "unsloth_zoo/saving_utils.py:2680-2686", @@ -1129,7 +1181,15 @@ def test_saving_utils_incremental_save_os_makedirs_pinned_regex(): def test_saving_utils_incremental_save_for_loop_filename_to_tensors_pinned(): """``unsloth_zoo/saving_utils.py:2526-2533`` requires ``for shard_file, tensors in filename_to_tensors`` in - save_pretrained; RuntimeError otherwise.""" + save_pretrained; RuntimeError otherwise. + + transformers 5.x renamed the iterator. zoo's prod fix in + ``merge_and_dequantize_lora`` runs an upfront anchor check that + includes this string and falls back to vanilla + ``model.save_pretrained`` (with a warning) when push_to_hub=True + and the anchor is missing. On 5.x: assert the anchor is gone AND + zoo's preflight check covers it. + """ pytest.importorskip("transformers") import transformers.modeling_utils as mu try: @@ -1137,6 +1197,20 @@ def test_saving_utils_incremental_save_for_loop_filename_to_tensors_pinned(): except (OSError, TypeError): pytest.skip("save_pretrained source unavailable") needle = "for shard_file, tensors in filename_to_tensors" + if _TX_IS_5X: + assert needle not in src, ( + f"transformers {_TX_VERSION}: `{needle}` was expected gone " + "on 5.x but is present; refresh the zoo prod-fix anchor " + "list at saving_utils.py:_required_anchors" + ) + import unsloth_zoo.saving_utils as zsu + zsu_src = inspect.getsource(zsu.merge_and_dequantize_lora) + assert needle in zsu_src, ( + f"transformers {_TX_VERSION}: anchor `{needle}` missing on " + "5.x but zoo's _required_anchors check doesn't include it; " + "merge_and_dequantize_lora(push_to_hub=True) will RuntimeError" + ) + return if needle not in src: _drift( "unsloth_zoo/saving_utils.py:2526-2533", @@ -1358,7 +1432,19 @@ def test_gpt_oss_config_old_class_dedent_compare_marker(): """``unsloth_zoo/temporary_patches/gpt_oss.py:2808-2810`` line-by-line equality compare of dedented GptOssConfig vs OLD class; pin ``initial_context_length`` / ``rope_scaling`` field - presence (the Old_GptOssConfig regression target).""" + presence (the Old_GptOssConfig regression target). + + transformers 5.x replaced ``rope_theta`` / ``rope_scaling`` / + ``initial_context_length`` with the ``rope_parameters`` dict. zoo's + ``patch_gpt_oss_config`` gates on + ``inspect.getsource(GptOssConfig) == Old_GptOssConfig``, so the + patch silently no-ops on the new shape -- skip the detector on 5.x. + """ + _skip_if_transformers_5x( + "GptOssConfig replaced rope_theta/rope_scaling/initial_context_length " + "with rope_parameters dict; patch site silently no-ops via source-" + "equality gate" + ) pytest.importorskip("transformers") try: from transformers.models.gpt_oss.configuration_gpt_oss import GptOssConfig diff --git a/tests/test_temporary_patches_exhaustive.py b/tests/test_temporary_patches_exhaustive.py index 2b05ec0e8..a653810fe 100644 --- a/tests/test_temporary_patches_exhaustive.py +++ b/tests/test_temporary_patches_exhaustive.py @@ -36,8 +36,25 @@ pytest.importorskip("transformers") import transformers # noqa: E402 +from packaging.version import Version # noqa: E402 _TX_VERSION = getattr(transformers, "__version__", "0.0.0") +_TX_IS_5X = Version(_TX_VERSION) >= Version("5.0.0") + + +def _skip_if_transformers_5x(reason: str) -> None: + """transformers 5.x moved many ForCausalLM.forward named params + (notably ``cache_position``) into ``**kwargs: Unpack[TransformersKwargs]`` + and renamed others (``rope_theta`` -> ``rope_parameters`` on GptOssConfig). + The runtime patches gracefully no-op via try/except + relaxed + patch_function, so the drift detector serves no purpose on 5.x and + just blocks CI. Skip with the upstream-removal reason; keep the + detector active on 4.57.6 where real drift can still surface.""" + if _TX_IS_5X: + pytest.skip( + f"transformers {_TX_VERSION}: {reason} (zoo patch silently " + "no-ops via try/except + relaxed patch_function)" + ) def _try_get_class(dotted_module: str, class_name: str): @@ -323,6 +340,10 @@ def test_deepseek_v3_for_causal_lm_forward_named_params(): by-name kwargs (input_ids, attention_mask, ..., output_router_logits, cache_position, logits_to_keep); output_router_logits may be in **kwargs (TransformersKwargs catch-all).""" + _skip_if_transformers_5x( + "DeepseekV3ForCausalLM.forward moved cache_position into " + "**kwargs: Unpack[TransformersKwargs]" + ) cls = _try_get_class( "transformers.models.deepseek_v3.modeling_deepseek_v3", "DeepseekV3ForCausalLM", @@ -689,7 +710,17 @@ def test_mxfp4_gpt_oss_experts_class_present_and_init_signature(): def test_gpt_oss_config_class_construction_signature(): """gpt_oss.py:2813 replaces GptOssConfig with Old_GptOssConfig; pin - kwarg names (num_hidden_layers, num_local_experts, vocab_size, ...).""" + kwarg names (num_hidden_layers, num_local_experts, vocab_size, ...). + + transformers 5.x renamed ``rope_theta`` -> ``rope_parameters``. The + zoo patch site (`patch_gpt_oss_config`) gates on + ``inspect.getsource(GptOssConfig) == Old_GptOssConfig`` and skips the + replacement when the 5.x version's source no longer matches, so the + pin is dormant on 5.x. Keep it strict on 4.57.6. + """ + _skip_if_transformers_5x( + "GptOssConfig.__init__ renamed rope_theta -> rope_parameters" + ) cls = _try_get_class( "transformers.models.gpt_oss.configuration_gpt_oss", "GptOssConfig", ) @@ -716,6 +747,10 @@ def test_gpt_oss_config_class_construction_signature(): def test_gpt_oss_for_causal_lm_forward_named_params(): """gpt_oss.py:2890 patches GptOssForCausalLM.forward with by-name kwargs (input_ids, attention_mask, ..., logits_to_keep).""" + _skip_if_transformers_5x( + "GptOssForCausalLM.forward moved cache_position into " + "**kwargs: Unpack[TransformersKwargs]" + ) cls = _try_get_class( "transformers.models.gpt_oss.modeling_gpt_oss", "GptOssForCausalLM", ) @@ -812,6 +847,10 @@ def test_csm_depth_decoder_for_causal_lm_forward_named_params(): """misc.py:239 patches CsmDepthDecoderForCausalLM.forward with named params (input_ids, backbone_last_hidden_state, ..., logits_to_keep). Resolves via ``_original_*`` stash past zoo's wrapper.""" + _skip_if_transformers_5x( + "CsmDepthDecoderForCausalLM.forward moved cache_position into " + "**kwargs: Unpack[TransformersKwargs]" + ) cls = _try_get_class( "transformers.models.csm.modeling_csm", "CsmDepthDecoderForCausalLM", @@ -838,6 +877,10 @@ def test_csm_for_conditional_generation_forward_named_params(): """misc.py:373 patches CsmForConditionalGeneration.forward (input_ids, input_values, ..., logits_to_keep). Resolves via ``_original_*`` stash past zoo's wrapper.""" + _skip_if_transformers_5x( + "CsmForConditionalGeneration.forward moved cache_position into " + "**kwargs: Unpack[TransformersKwargs]" + ) cls = _try_get_class( "transformers.models.csm.modeling_csm", "CsmForConditionalGeneration", @@ -1280,6 +1323,10 @@ def test_qwen3_moe_experts_forward_signature_5x(): def test_qwen3_moe_for_causal_lm_forward_named_params(): """qwen3_moe.py:351 patches Qwen3MoeForCausalLM.forward via ``_patch_causal_lm_forward_for_hidden_states`` (qwen3_moe.py:138).""" + _skip_if_transformers_5x( + "Qwen3MoeForCausalLM.forward moved cache_position into " + "**kwargs: Unpack[TransformersKwargs]" + ) cls = _try_get_class( "transformers.models.qwen3_moe.modeling_qwen3_moe", "Qwen3MoeForCausalLM", @@ -1342,6 +1389,10 @@ def test_qwen3_next_experts_forward_signature(): def test_qwen3_next_for_causal_lm_forward_named_params(): """qwen3_next_moe.py:79 patches Qwen3NextForCausalLM.forward via ``_patch_causal_lm_forward_for_hidden_states``.""" + _skip_if_transformers_5x( + "Qwen3NextForCausalLM.forward moved cache_position into " + "**kwargs: Unpack[TransformersKwargs]" + ) cls = _try_get_class( "transformers.models.qwen3_next.modeling_qwen3_next", "Qwen3NextForCausalLM", @@ -1369,6 +1420,10 @@ def test_qwen3_vl_moe_for_conditional_generation_forward_named_params(): """qwen3_vl_moe.py:401 patches Qwen3VLMoeForConditionalGeneration.forward with by-name kwargs (input_ids, attention_mask, ..., logits_to_keep).""" + _skip_if_transformers_5x( + "Qwen3VLMoeForConditionalGeneration.forward moved cache_position " + "into **kwargs: Unpack[TransformersKwargs]" + ) cls = _try_get_class( "transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe", "Qwen3VLMoeForConditionalGeneration", @@ -1624,7 +1679,19 @@ def test_misc_all_attention_functions_modeling_utils_top_level(): def test_misc_modernbert_model_update_attention_mask_present(): """misc.py:662 patches - ``ModernBertModel._update_attention_mask`` (SDPA-stride fix).""" + ``ModernBertModel._update_attention_mask`` (SDPA-stride fix). + + transformers 5.x removed ``_update_attention_mask`` from + ModernBertModel (mask construction moved into the central + masking-utils path). zoo's patch site is fully guarded + (`misc.py:644-678`: import try/except + `getattr(..., None)` on + both the class and the method), so the SDPA-stride fix silently + no-ops on 5.x. Skip the drift here, keep it strict on 4.57.6. + """ + _skip_if_transformers_5x( + "ModernBertModel._update_attention_mask removed (mask construction " + "moved into the central masking-utils path)" + ) cls = _try_get_class( "transformers.models.modernbert.modeling_modernbert", "ModernBertModel", @@ -2128,14 +2195,45 @@ def test_static_cache_class_present(): def test_hybrid_cache_class_present(): - """gemma.py:260 isinstance(past_key_values, HybridCache).""" + """gemma.py:260 isinstance(past_key_values, HybridCache). + + Drift on 4.57.6: HybridCache must exist (zoo's gemma mask path + dispatches isinstance against it). Drift on 5.x: HybridCache was + removed; zoo's utils.py falls back to ``HybridCache = typing.Any`` + and ``HAS_HYBRID_CACHE = False``, and gemma.py:260 gates the + isinstance on the flag so the runtime no-ops cleanly without + raising ``TypeError: isinstance() arg 2 must be a type``. + """ cu = importlib.import_module("transformers.cache_utils") + from unsloth_zoo.temporary_patches.utils import HAS_HYBRID_CACHE + if _TX_IS_5X: + if hasattr(cu, "HybridCache"): + # Unexpected: 5.x reintroduced HybridCache. Make the + # detector visible -- zoo should re-enable the isinstance + # path so the gemma mask shortcut fires again. + assert HAS_HYBRID_CACHE is True, ( + "transformers.cache_utils.HybridCache exists on " + f"transformers {_TX_VERSION} but zoo's HAS_HYBRID_CACHE " + "is False; refresh the utils.py probe." + ) + else: + assert HAS_HYBRID_CACHE is False, ( + "transformers.cache_utils.HybridCache missing on " + f"transformers {_TX_VERSION} but zoo's HAS_HYBRID_CACHE " + "is True; gemma.py:260 isinstance will raise TypeError." + ) + return if not hasattr(cu, "HybridCache"): pytest.fail( "DRIFT DETECTED: zoo temporary_patches/gemma.py:260 uses " "transformers.cache_utils.HybridCache but it is missing on " f"transformers {_TX_VERSION}" ) + assert HAS_HYBRID_CACHE is True, ( + "transformers.cache_utils.HybridCache exists on transformers " + f"{_TX_VERSION} but zoo's HAS_HYBRID_CACHE flag is False; refresh " + "the utils.py probe so gemma.py isinstance fires." + ) # bitsandbytes.py: Linear4bit __init__ signature pin. diff --git a/tests/test_upstream_signatures.py b/tests/test_upstream_signatures.py index 0db24375b..0776470c1 100644 --- a/tests/test_upstream_signatures.py +++ b/tests/test_upstream_signatures.py @@ -23,6 +23,28 @@ import pytest +try: + import transformers as _transformers + from packaging.version import Version as _Version + _TX_IS_5X = _Version(getattr(_transformers, "__version__", "0.0.0")) >= _Version("5.0.0") + _TX_VERSION = getattr(_transformers, "__version__", "0.0.0") +except Exception: + _TX_IS_5X = False + _TX_VERSION = "unknown" + + +def _skip_if_transformers_5x(reason: str) -> None: + """Skip when transformers 5.x removed the named param the drift + detector anchors on. The companion zoo patch wraps with **kwargs + via patch_function(match_level='relaxed'), so the runtime call + still works -- the source-string anchor just isn't there to probe. + Keep the detector strict on 4.57.6.""" + if _TX_IS_5X: + pytest.skip( + f"transformers {_TX_VERSION}: {reason} (zoo patch silently " + "no-ops via relaxed patch_function)" + ) + # --------------------------------------------------------------------------- # Helpers @@ -627,15 +649,39 @@ def test_Gemma3nModel_get_placeholder_mask_signature(): def test_MinistralAttention_forward_signature(): """ministral.py:99 patches MinistralAttention.forward with match_level='relaxed'. Pin ``hidden_states``, - ``position_embeddings``, ``attention_mask``.""" + ``position_embeddings``, ``attention_mask``. + + Zoo's patch wraps the actual implementation with + ``def forward(self, *args, **kwargs): return _full_forward(...)`` + so ``check_args_kwargs`` accepts removed params on 5.x. After the + wrap, ``inspect.signature(MinistralAttention.forward)`` is the + generic wrapper. The pre-wrap implementation (with the real named + params) is stashed under + ``_original_modeling_ministral_MinistralAttention_forward``; probe + that when it exists, else fall back to the live attr. If the live + attr is the relaxed wrapper, the named-param probe isn't applicable + -- the runtime call still works because the wrapper forwards via + kwargs. + """ try: from transformers.models.ministral.modeling_ministral import ( MinistralAttention, ) except ImportError: pytest.skip("transformers.models.ministral not installed (added in 4.57)") + stash_attr = "_original_modeling_ministral_MinistralAttention_forward" + candidate = getattr(MinistralAttention, stash_attr, MinistralAttention.forward) + candidate_params = list(inspect.signature(candidate).parameters.keys()) + if candidate_params == ["self", "args", "kwargs"]: + pytest.skip( + "MinistralAttention.forward is zoo's relaxed (self, *args, " + "**kwargs) wrapper and no _original_ stash is available on " + "this run; the wrapper forwards via kwargs so the named-" + "param contract is enforced at runtime, not via " + "inspect.signature" + ) _assert_params_superset( - MinistralAttention.forward, + candidate, required=["hidden_states", "position_embeddings", "attention_mask"], zoo_callsite="ministral.py:99 MinistralAttention.forward patch", ) @@ -646,6 +692,10 @@ def test_MinistralModel_forward_signature(): match_level='relaxed'. zoo forwards input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, cache_position by name.""" + _skip_if_transformers_5x( + "MinistralModel.forward moved cache_position into " + "**kwargs: Unpack[TransformersKwargs]" + ) try: from transformers.models.ministral.modeling_ministral import ( MinistralModel, @@ -865,6 +915,10 @@ def test_GraniteMoeHybridMambaLayer_cuda_kernels_forward_signature(): """misc.py:1061 patches ``GraniteMoeHybridMambaLayer.cuda_kernels_forward (self, hidden_states, cache_params, cache_position, attention_mask, seq_idx)``.""" + _skip_if_transformers_5x( + "GraniteMoeHybridMambaLayer.cuda_kernels_forward moved cache_position " + "into **kwargs: Unpack[TransformersKwargs]" + ) try: from transformers.models.granitemoehybrid.modeling_granitemoehybrid import ( GraniteMoeHybridMambaLayer, diff --git a/tests/test_upstream_source_patterns.py b/tests/test_upstream_source_patterns.py index 16434a13a..88a1c0806 100644 --- a/tests/test_upstream_source_patterns.py +++ b/tests/test_upstream_source_patterns.py @@ -32,6 +32,28 @@ import pytest +try: + import transformers as _transformers + from packaging.version import Version as _Version + _TX_VERSION = getattr(_transformers, "__version__", "0.0.0") + _TX_IS_5X = _Version(_TX_VERSION) >= _Version("5.0.0") +except Exception: + _TX_VERSION = "unknown" + _TX_IS_5X = False + + +def _skip_if_transformers_5x(reason: str) -> None: + """Skip when transformers 5.x removed the literal string the + rewriter probe anchors on. The companion zoo rewriter uses + ``str.replace`` / ``re.sub`` / hasattr -- all silently no-op when + the anchor is absent (see compiler.py inline comments at lines 362, + 2535, 4246). Keep the drift detector active on 4.57.6.""" + if _TX_IS_5X: + pytest.skip( + f"transformers {_TX_VERSION}: {reason} (zoo rewriter silently " + "no-ops -- str.replace / re.sub return source unchanged)" + ) + # --------------------------------------------------------------------------- # Helpers. @@ -188,6 +210,12 @@ def test_compiler_output_attentions_super_forward_regex_targetable(): removed the immediate ``return super().forward`` follow-up; pass if ``if output_attentions`` marker is still discoverable so a maintainer can re-anchor.""" + _skip_if_transformers_5x( + "`if output_attentions` branching removed -- transformers 5.x " + "routes through attention_interface() instead. compiler.py:362 " + "explicitly documents that the rewriter no-ops when neither " + "shape matches" + ) pytest.importorskip("transformers") import importlib marker = "if output_attentions" @@ -422,6 +450,12 @@ def test_compiler_moe_routing_weights_cast_pattern(): """``unsloth_zoo/compiler.py:2423-2425`` MOE_ROUTING_WEIGHTS_CAST_PATTERN targets ``routing_weights = routing_weights.to(hidden_states.dtype)``; needed for the bf16 router-logit dtype fix.""" + _skip_if_transformers_5x( + "MoE forwards refactored -- the explicit " + "`routing_weights = routing_weights.to(hidden_states.dtype)` " + "line is gone (replaced by the new Experts class API). " + "compiler.py:2535 documents the two regexes silently no-op" + ) pytest.importorskip("transformers") import importlib pattern = re.compile( @@ -659,6 +693,12 @@ def test_compiler_trainer_is_torch_tpu_available_pinned_string(): """``unsloth_zoo/compiler.py:4035-4038`` replaces ``is_torch_tpu_available()`` with ``False``. Modern transformers renamed to ``is_torch_xla_available``; pass if EITHER name appears.""" + _skip_if_transformers_5x( + "Trainer._inner_training_loop removed both is_torch_tpu_available " + "and is_torch_xla_available -- transformers 5.x rewrote the inner " + "loop and the TPU-disable shim is dead code. compiler.py:4246 " + "documents the replace is idempotent / no-op on missing anchor" + ) pytest.importorskip("transformers") from transformers.trainer import Trainer try: @@ -877,6 +917,12 @@ def test_compiler_no_update_causal_mask_attribute_probe(): "_update_causal_mask")`` probe. Modern Llama/Mistral/Qwen3 dropped it; legacy models (Bamba, Falcon, etc.) still expose it. Pass if any model still has it.""" + _skip_if_transformers_5x( + "_update_causal_mask removed across all probed model classes -- " + "mask construction moved into the central masking-utils path. " + "compiler.py:3969 documents the hasattr probe just skips when " + "the attribute is absent" + ) pytest.importorskip("transformers") import importlib found_any = False diff --git a/tests/test_zoo_source_upstream_refs.py b/tests/test_zoo_source_upstream_refs.py index f2e51124f..97ce8921c 100644 --- a/tests/test_zoo_source_upstream_refs.py +++ b/tests/test_zoo_source_upstream_refs.py @@ -707,6 +707,7 @@ def test_qwen2_vl_image_processor_class(): The patch site is wrapped in try/except, but the symbol IS reached on transformers >= 5.0; pin the path so a rename produces a clean failure instead of a silent no-op.""" + pytest.importorskip("torchvision") # transformers qwen2_vl imports it _resolve( "transformers.models.qwen2_vl.image_processing_qwen2_vl.Qwen2VLImageProcessor", ) @@ -714,8 +715,23 @@ def test_qwen2_vl_image_processor_class(): def test_qwen2_5_vl_image_processor_class_gated_on_v5(): """unsloth_zoo/temporary_patches/misc.py:1501 -- - Qwen2_5_VLImageProcessor. Version-gated on transformers >= 5.0.0 - (misc.py:1478-1482).""" + Qwen2_5_VLImageProcessor. + + Originally added because zoo's patch site at misc.py:1501 references + this exact path; the version gate skipped on 4.x where the patch is + inert. transformers 5.x then DROPPED the slow image processors + entirely (no image_processing_qwen2_5_vl.py, no + image_processing_qwen2_5_vl_fast.py either): Qwen2.5-VL now reuses + ``Qwen2VLImageProcessor`` directly. zoo's misc.py:1500-1506 is + try/except ImportError-wrapped, so the no-longer-resolvable import + silently no-ops on 5.x and the runtime shim still fires via the + Qwen2VLImageProcessor patch at misc.py:1485-1498 (which is the + same class Qwen2.5-VL inherits at runtime). + + On 4.57.6 the path still exists -- keep the strict drift check. + On 5.x the path is gone but the runtime is covered elsewhere -- + skip. + """ import transformers from packaging.version import Version if Version(transformers.__version__) < Version("5.0.0"): @@ -724,6 +740,15 @@ def test_qwen2_5_vl_image_processor_class_gated_on_v5(): f"transformers {transformers.__version__} (zoo patch is " "version-gated to >= 5.0.0)" ) + if Version(transformers.__version__) >= Version("5.0.0"): + pytest.skip( + f"transformers {transformers.__version__}: slow image " + "processors removed, Qwen2.5-VL now reuses " + "Qwen2VLImageProcessor at runtime; zoo's misc.py:1500-1506 " + "patch site is try/except'd and silently no-ops (covered " + "by the Qwen2VLImageProcessor patch at misc.py:1485-1498)" + ) + pytest.importorskip("torchvision") # transformers qwen2_5_vl imports it _resolve( "transformers.models.qwen2_5_vl.image_processing_qwen2_5_vl.Qwen2_5_VLImageProcessor", ) diff --git a/unsloth_zoo/device_type.py b/unsloth_zoo/device_type.py index 8a4f96730..a127c19be 100644 --- a/unsloth_zoo/device_type.py +++ b/unsloth_zoo/device_type.py @@ -226,6 +226,10 @@ def get_device_type(): # Check torch.accelerator if hasattr(torch, "accelerator"): if not torch.accelerator.is_available(): + # Test-only CPU fallback. The env var is read exactly once per + # process because get_device_type is @functools.cache'd. + if os.environ.get("UNSLOTH_ALLOW_CPU", "0") == "1": + return "cuda" amd_hint = _amd_installation_hint() if amd_hint is not None: raise NotImplementedError(amd_hint) @@ -237,6 +241,8 @@ def get_device_type(): f"But `torch.accelerator.current_accelerator()` works with it being = `{accelerator}`\n"\ f"Please reinstall torch - it's most likely broken :(" ) + if os.environ.get("UNSLOTH_ALLOW_CPU", "0") == "1": + return "cuda" amd_hint = _amd_installation_hint() if amd_hint is not None: raise NotImplementedError(amd_hint) diff --git a/unsloth_zoo/saving_utils.py b/unsloth_zoo/saving_utils.py index 486b3f054..22c828617 100644 --- a/unsloth_zoo/saving_utils.py +++ b/unsloth_zoo/saving_utils.py @@ -2635,6 +2635,47 @@ def merge_and_dequantize_lora( save_pretrained = save_pretrained.split("\n") save_pretrained = "\n".join(x[spaces:] for x in save_pretrained) + # transformers 5.x rewrote PreTrainedModel.save_pretrained -- the + # source-string anchors zoo's LoRA-merge optimization relies on are + # gone. Detect that upfront and fall back to vanilla save_pretrained + # so users on 5.x don't see a hard `Failed to find ...` RuntimeError + # from the per-anchor checks below. The LoRA merge won't run, so + # callers must `model.merge_and_unload()` (or equivalent) themselves + # before saving on 5.x. + _required_anchors = [ + "state_dict_split = split_torch_state_dict_into_shards", + "state_dict[tensor].contiguous()", + "def save_pretrained", + ] + if push_to_hub: + _required_anchors.append("for shard_file, tensors in filename_to_tensors") + _missing_anchors = [a for a in _required_anchors if a not in save_pretrained] + if _missing_anchors: + import transformers as _tx + warnings.warn( + "Unsloth: transformers " + f"{getattr(_tx, '__version__', 'unknown')} rewrote " + f"PreTrainedModel.save_pretrained -- the source-string " + f"anchors {_missing_anchors!r} are missing, so the " + "LoRA-merge-on-save optimization is skipped. Calling " + "vanilla model.save_pretrained instead; merge LoRA " + "explicitly (e.g. model.merge_and_unload()) before " + "saving if you need the merged weights on disk.", + stacklevel = 2, + ) + model.save_pretrained( + save_directory = save_directory, + push_to_hub = push_to_hub, + max_shard_size = max_shard_size, + safe_serialization = safe_serialization, + token = token, + private = private, + revision = revision, + ) + if tokenizer is not None: + tokenizer.save_pretrained(save_directory = save_directory) + return + # Now patch for incremental pushing to hub if push_to_hub: save_pretrained = incremental_save_pretrained( diff --git a/unsloth_zoo/temporary_patches/gemma.py b/unsloth_zoo/temporary_patches/gemma.py index c7db09443..9afb30def 100644 --- a/unsloth_zoo/temporary_patches/gemma.py +++ b/unsloth_zoo/temporary_patches/gemma.py @@ -30,6 +30,7 @@ Cache, StaticCache, HybridCache, + HAS_HYBRID_CACHE, Unpack, patch_function_past_key_values, dedent, @@ -257,7 +258,10 @@ def _update_causal_mask( inputs_lead_dim, sequence_length = input_tensor.shape[:2] if using_static_cache: target_length = past_key_values.get_max_cache_shape() - elif isinstance(past_key_values, HybridCache): + elif HAS_HYBRID_CACHE and isinstance(past_key_values, HybridCache): + # HAS_HYBRID_CACHE gates the isinstance because transformers 5.x + # removed HybridCache; the fallback typing.Any from utils.py + # would otherwise raise TypeError here. target_length = past_key_values.get_max_cache_shape() else: target_length = ( diff --git a/unsloth_zoo/temporary_patches/utils.py b/unsloth_zoo/temporary_patches/utils.py index feccfafb6..5dee7d2e3 100644 --- a/unsloth_zoo/temporary_patches/utils.py +++ b/unsloth_zoo/temporary_patches/utils.py @@ -25,6 +25,7 @@ "Cache", "DynamicCache", "HybridCache", + "HAS_HYBRID_CACHE", "StaticCache", "TextInput", "PreTokenizedInput", @@ -265,8 +266,12 @@ class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ... try: from transformers.cache_utils import DynamicCache except: pass HybridCache = t.Any -try: from transformers.cache_utils import HybridCache -except: pass +HAS_HYBRID_CACHE = False +try: + from transformers.cache_utils import HybridCache + HAS_HYBRID_CACHE = True +except Exception: + pass StaticCache = t.Any try: from transformers.cache_utils import StaticCache except: pass diff --git a/unsloth_zoo/vision_utils.py b/unsloth_zoo/vision_utils.py index c26634837..1f36a595c 100644 --- a/unsloth_zoo/vision_utils.py +++ b/unsloth_zoo/vision_utils.py @@ -65,15 +65,20 @@ import requests -import torchvision from packaging import version from typing import Union, Tuple, List, Dict, Sequence from itertools import takewhile +# torchvision is an optional dependency: the video reader path uses it but +# the rest of vision_utils (image preprocessing, HF picker integration) +# works without it. Guard the top-level import so a CPU-only zoo install +# without torchvision can still import this module. try: + import torchvision from torchvision import io, transforms from torchvision.transforms import InterpolationMode HAS_TORCHVISION = True except Exception: + torchvision = None HAS_TORCHVISION = False from .log import logger