From 9e25ba57e6482c56a423efef79b556d1b777fcdd Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 19 Apr 2026 07:52:55 +0000 Subject: [PATCH 01/13] Add .gemini/config.yaml for gemini-code-assist bot configuration --- .gemini/config.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .gemini/config.yaml diff --git a/.gemini/config.yaml b/.gemini/config.yaml new file mode 100644 index 000000000..9cd83708f --- /dev/null +++ b/.gemini/config.yaml @@ -0,0 +1,13 @@ +have_fun: false +memory_config: + disabled: false +code_review: + disable: false + comment_severity_threshold: LOW + max_review_comments: -1 + pull_request_opened: + help: false + summary: false + code_review: false + include_drafts: false +ignore_patterns: [] From 3614701ab8334168a8d4ca6db9cd1ec24880274c Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 19 Apr 2026 07:52:56 +0000 Subject: [PATCH 02/13] Add .gitattributes with merge=ours for staging-only files --- .gitattributes | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..e805e6c08 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# Normalize Python files to LF line endings +*.py text eol=lf + +# Preserve staging-only files during upstream merges +.gemini/** merge=ours +.gitattributes merge=ours From 9f155fc5ccb1bf3ffc77304e86395e334f7b5a21 Mon Sep 17 00:00:00 2001 From: Lyxot Date: Wed, 20 May 2026 01:00:56 +0800 Subject: [PATCH 03/13] fix: persist MLX LoRA adapter metadata --- unsloth_zoo/mlx/utils.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py index e8b7e1881..e8c56c4ce 100644 --- a/unsloth_zoo/mlx/utils.py +++ b/unsloth_zoo/mlx/utils.py @@ -2723,15 +2723,34 @@ def _enrich_mlx_adapter_config(model, adapter_config): requires_runtime = True adapter_config["requires_unsloth_mlx_runtime_quantization"] = bool(requires_runtime) - # why: record LoRA module paths so reload recreates vision/projector LoRA - # layers (mlx-lm.load_adapters only knows the language tower). + # why: record LoRA module paths and parameters so reload recreates the same + # adapter topology. Without scale metadata, reload falls back to scale=1.0 + # even when training used alpha/r > 1, changing post-reload logits. try: lora_paths = [] + lora_rank = None + lora_scale = None + lora_dropout = None for name, module in model.named_modules(): if hasattr(module, "lora_a") and hasattr(module, "lora_b"): lora_paths.append(name) + if lora_rank is None: + lora_rank = int(module.lora_a.shape[-1]) + lora_scale = float(getattr(module, "scale", 1.0)) + drop = getattr(module, "dropout", None) + lora_dropout = float(getattr(drop, "p", 0.0) if drop else 0.0) if lora_paths: adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths + if lora_rank is not None: + lora_parameters = dict(adapter_config.get("lora_parameters") or {}) + lora_parameters.setdefault("rank", lora_rank) + lora_parameters.setdefault("scale", lora_scale) + lora_parameters.setdefault("dropout", lora_dropout) + adapter_config["lora_parameters"] = lora_parameters + adapter_config.setdefault("rank", lora_parameters["rank"]) + adapter_config.setdefault("scale", lora_parameters["scale"]) + adapter_config.setdefault("dropout", lora_parameters["dropout"]) + adapter_config.setdefault("peft_type", "LORA") except Exception: pass return adapter_config From a0bce35e43ffed6ff146d8223ba2e26f17afcdb9 Mon Sep 17 00:00:00 2001 From: Lyxot Date: Wed, 20 May 2026 01:17:56 +0800 Subject: [PATCH 04/13] fix: preserve MLX LoRA dropout metadata --- unsloth_zoo/mlx/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py index e8c56c4ce..c9fa8553d 100644 --- a/unsloth_zoo/mlx/utils.py +++ b/unsloth_zoo/mlx/utils.py @@ -2738,7 +2738,13 @@ def _enrich_mlx_adapter_config(model, adapter_config): lora_rank = int(module.lora_a.shape[-1]) lora_scale = float(getattr(module, "scale", 1.0)) drop = getattr(module, "dropout", None) - lora_dropout = float(getattr(drop, "p", 0.0) if drop else 0.0) + if drop is None: + lora_dropout = 0.0 + elif hasattr(drop, "p"): + lora_dropout = float(drop.p) + else: + keep_probability = getattr(drop, "_p_1", 1.0) + lora_dropout = float(1.0 - keep_probability) if lora_paths: adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths if lora_rank is not None: From f0dd9009bdc1689dd1b276b9ab473897cc3cbe39 Mon Sep 17 00:00:00 2001 From: Lyxot Date: Wed, 20 May 2026 01:49:02 +0800 Subject: [PATCH 05/13] fix: sync MLX LoRA adapter config fields --- unsloth_zoo/mlx/utils.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py index c9fa8553d..729177c88 100644 --- a/unsloth_zoo/mlx/utils.py +++ b/unsloth_zoo/mlx/utils.py @@ -2745,17 +2745,23 @@ def _enrich_mlx_adapter_config(model, adapter_config): else: keep_probability = getattr(drop, "_p_1", 1.0) lora_dropout = float(1.0 - keep_probability) - if lora_paths: + if lora_paths and "unsloth_mlx_lora_module_paths" not in adapter_config: adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths if lora_rank is not None: lora_parameters = dict(adapter_config.get("lora_parameters") or {}) - lora_parameters.setdefault("rank", lora_rank) - lora_parameters.setdefault("scale", lora_scale) - lora_parameters.setdefault("dropout", lora_dropout) + inferred_lora_parameters = { + "rank": lora_rank, + "scale": lora_scale, + "dropout": lora_dropout, + } + for key, value in inferred_lora_parameters.items(): + if key not in lora_parameters: + explicit_value = adapter_config.get(key) + lora_parameters[key] = value if explicit_value is None else explicit_value adapter_config["lora_parameters"] = lora_parameters - adapter_config.setdefault("rank", lora_parameters["rank"]) - adapter_config.setdefault("scale", lora_parameters["scale"]) - adapter_config.setdefault("dropout", lora_parameters["dropout"]) + adapter_config["rank"] = lora_parameters["rank"] + adapter_config["scale"] = lora_parameters["scale"] + adapter_config["dropout"] = lora_parameters["dropout"] adapter_config.setdefault("peft_type", "LORA") except Exception: pass From fc1297939d636643cb1643aaa88e856750a70409 Mon Sep 17 00:00:00 2001 From: Lyxot Date: Wed, 20 May 2026 02:04:24 +0800 Subject: [PATCH 06/13] fix: prefer live MLX LoRA metadata --- unsloth_zoo/mlx/trainer.py | 9 ++++++++- unsloth_zoo/mlx/utils.py | 4 +--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/unsloth_zoo/mlx/trainer.py b/unsloth_zoo/mlx/trainer.py index fd2390932..71c685938 100644 --- a/unsloth_zoo/mlx/trainer.py +++ b/unsloth_zoo/mlx/trainer.py @@ -1397,7 +1397,14 @@ def save_model(self, output_dir=None): _lora_scale = getattr(m, "scale", 1.0) _drop = getattr(m, "dropout", None) - _lora_dropout = getattr(_drop, "p", 0.0) if _drop else 0.0 + if _drop is None: + _lora_dropout = 0.0 + elif hasattr(_drop, "p"): + _lora_dropout = float(_drop.p) + else: + _lora_dropout = float( + 1.0 - getattr(_drop, "_p_1", 1.0) + ) break diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py index 729177c88..d70f9d52e 100644 --- a/unsloth_zoo/mlx/utils.py +++ b/unsloth_zoo/mlx/utils.py @@ -2755,9 +2755,7 @@ def _enrich_mlx_adapter_config(model, adapter_config): "dropout": lora_dropout, } for key, value in inferred_lora_parameters.items(): - if key not in lora_parameters: - explicit_value = adapter_config.get(key) - lora_parameters[key] = value if explicit_value is None else explicit_value + lora_parameters[key] = value adapter_config["lora_parameters"] = lora_parameters adapter_config["rank"] = lora_parameters["rank"] adapter_config["scale"] = lora_parameters["scale"] From 9f3d11c80fd4d11cc393e6cfb301cc1ab699e9e9 Mon Sep 17 00:00:00 2001 From: Lyxot Date: Wed, 20 May 2026 02:17:03 +0800 Subject: [PATCH 07/13] fix: handle MLX adapter reload edge cases --- unsloth_zoo/mlx/loader.py | 9 +++++++++ unsloth_zoo/mlx/trainer.py | 21 +++++++++------------ unsloth_zoo/mlx/utils.py | 38 +++++++++++++++++++++++++++++--------- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/unsloth_zoo/mlx/loader.py b/unsloth_zoo/mlx/loader.py index 1cfc0f742..a23b13769 100644 --- a/unsloth_zoo/mlx/loader.py +++ b/unsloth_zoo/mlx/loader.py @@ -1103,6 +1103,14 @@ def _apply_lora_at_paths(model, module_paths, adapter_cfg): setattr(parent, leaf, wrapped) +def _eval_mlx_model_after_adapter_reload(model): + try: + model.eval() + except Exception: + pass + return model + + def _adapter_actual_quant_config(adapter_cfg, resolved_map): expected = _global_quant_params(adapter_cfg.get("base_quantization_config")) if expected is not None: @@ -2435,6 +2443,7 @@ def from_pretrained( else: from mlx_lm.tuner.utils import load_adapters model = load_adapters(model, local_path) + model = _eval_mlx_model_after_adapter_reload(model) loaded_model_config = getattr(model, "_config", None) is_vlm_model = bool(getattr(model, "_is_vlm_model", False)) processor = getattr(model, "_processor", None) diff --git a/unsloth_zoo/mlx/trainer.py b/unsloth_zoo/mlx/trainer.py index 71c685938..d2a12dec5 100644 --- a/unsloth_zoo/mlx/trainer.py +++ b/unsloth_zoo/mlx/trainer.py @@ -1380,7 +1380,11 @@ def _prepare_data(self, is_vlm): def save_model(self, output_dir=None): """Save LoRA adapters or full merged model (if no LoRA).""" - from .utils import save_merged_model + from .utils import ( + _get_mlx_dropout_probability, + _infer_mlx_lora_rank, + save_merged_model, + ) output_dir = output_dir or self.args.output_dir trainable = dict(tree_flatten(self.model.trainable_parameters())) @@ -1393,18 +1397,11 @@ def save_model(self, output_dir=None): _lora_rank, _lora_scale, _lora_dropout = 8, 1.0, 0.0 for _, m in self.model.named_modules(): if hasattr(m, "lora_a"): - _lora_rank = m.lora_a.shape[-1] + _lora_rank = _infer_mlx_lora_rank(m) or _lora_rank _lora_scale = getattr(m, "scale", 1.0) - - _drop = getattr(m, "dropout", None) - if _drop is None: - _lora_dropout = 0.0 - elif hasattr(_drop, "p"): - _lora_dropout = float(_drop.p) - else: - _lora_dropout = float( - 1.0 - getattr(_drop, "_p_1", 1.0) - ) + _lora_dropout = _get_mlx_dropout_probability( + getattr(m, "dropout", None) + ) break diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py index d70f9d52e..23a1a55c7 100644 --- a/unsloth_zoo/mlx/utils.py +++ b/unsloth_zoo/mlx/utils.py @@ -2664,6 +2664,31 @@ def _get_mlx_config_quantization(model): return config.get("quantization") or config.get("quantization_config") +def _get_mlx_dropout_probability(drop): + if drop is None: + return 0.0 + if hasattr(drop, "p"): + return float(drop.p) + keep_probability = getattr(drop, "_p_1", 1.0) + return float(1.0 - keep_probability) + + +def _infer_mlx_lora_rank(module): + lora_a = getattr(module, "lora_a", None) + lora_b = getattr(module, "lora_b", None) + lora_a_shape = tuple(getattr(lora_a, "shape", ()) or ()) + lora_b_shape = tuple(getattr(lora_b, "shape", ()) or ()) + if len(lora_a_shape) >= 3: + rank = lora_a_shape[-2] + if not lora_b_shape or lora_b_shape[-1] == rank: + return int(rank) + if lora_a_shape and lora_b_shape and lora_a_shape[-1] == lora_b_shape[0]: + return int(lora_a_shape[-1]) + if lora_a_shape: + return int(lora_a_shape[-1]) + return None + + def _enrich_mlx_adapter_config(model, adapter_config): adapter_config = dict(adapter_config or {}) hf_repo = getattr(model, "_hf_repo", None) or adapter_config.get("base_model_name_or_path") @@ -2735,16 +2760,11 @@ def _enrich_mlx_adapter_config(model, adapter_config): if hasattr(module, "lora_a") and hasattr(module, "lora_b"): lora_paths.append(name) if lora_rank is None: - lora_rank = int(module.lora_a.shape[-1]) + lora_rank = _infer_mlx_lora_rank(module) lora_scale = float(getattr(module, "scale", 1.0)) - drop = getattr(module, "dropout", None) - if drop is None: - lora_dropout = 0.0 - elif hasattr(drop, "p"): - lora_dropout = float(drop.p) - else: - keep_probability = getattr(drop, "_p_1", 1.0) - lora_dropout = float(1.0 - keep_probability) + lora_dropout = _get_mlx_dropout_probability( + getattr(module, "dropout", None) + ) if lora_paths and "unsloth_mlx_lora_module_paths" not in adapter_config: adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths if lora_rank is not None: From 91535bc6eef043c9581a7a3f1fb4c15a273d73a1 Mon Sep 17 00:00:00 2001 From: Lyxot Date: Thu, 21 May 2026 00:14:43 +0800 Subject: [PATCH 08/13] fix: handle zero-token MLX CCE inputs --- tests/test_mlx_runtime_cce_compile.py | 73 +++++++++++++++++++++++++++ unsloth_zoo/mlx/cce/runtime_cce.py | 13 +++++ 2 files changed, 86 insertions(+) diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py index 9168cfe0f..e646412d9 100644 --- a/tests/test_mlx_runtime_cce_compile.py +++ b/tests/test_mlx_runtime_cce_compile.py @@ -38,6 +38,79 @@ def _skip_torch_shim(): pytest.skip("requires real MLX runtime") +def test_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients(): + _skip_torch_shim() + from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss + + runtime_cce, _ = make_chunked_cross_entropy_loss( + ignore_index=-100, + chunk_size=16, + ) + hidden = mx.zeros((0, 16), dtype=mx.float32) + weight = mx.zeros((32, 16), dtype=mx.float32) + targets = mx.zeros((0,), dtype=mx.int32) + + losses = runtime_cce(hidden, weight, targets) + mx.eval(losses) + assert losses.shape == (0,) + + def loss_fn(h, w): + return runtime_cce(h, w, targets).astype(mx.float32).sum() + + loss, grads = mx.value_and_grad(loss_fn, argnums=(0, 1))(hidden, weight) + mx.eval(loss, *grads) + + assert loss.item() == pytest.approx(0.0) + assert grads[0].shape == hidden.shape + assert grads[1].shape == weight.shape + assert mx.sum(mx.abs(grads[0]).astype(mx.float32)).item() == pytest.approx(0.0) + assert mx.sum(mx.abs(grads[1]).astype(mx.float32)).item() == pytest.approx(0.0) + + +def test_quantized_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients(): + _skip_torch_shim() + import mlx.nn as nn + + from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss + + linear = nn.Linear(32, 32, bias=False) + qlinear = nn.QuantizedLinear.from_linear(linear, group_size=32, bits=4) + runtime_cce, _ = make_chunked_cross_entropy_loss( + ignore_index=-100, + chunk_size=16, + quantized=True, + group_size=qlinear.group_size, + bits=qlinear.bits, + ) + hidden = mx.zeros((0, 32), dtype=mx.float32) + targets = mx.zeros((0,), dtype=mx.int32) + + losses = runtime_cce( + hidden, + qlinear.weight, + qlinear.scales, + qlinear.biases, + targets, + ) + mx.eval(losses) + assert losses.shape == (0,) + + def loss_fn(h): + return runtime_cce( + h, + qlinear.weight, + qlinear.scales, + qlinear.biases, + targets, + ).astype(mx.float32).sum() + + loss, grad = mx.value_and_grad(loss_fn)(hidden) + mx.eval(loss, grad) + + assert loss.item() == pytest.approx(0.0) + assert grad.shape == hidden.shape + assert mx.sum(mx.abs(grad).astype(mx.float32)).item() == pytest.approx(0.0) + def test_compiled_runtime_cce_preserves_aux_lse_for_gradients(): _skip_torch_shim() from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py index 464051c97..f65668f21 100644 --- a/unsloth_zoo/mlx/cce/runtime_cce.py +++ b/unsloth_zoo/mlx/cce/runtime_cce.py @@ -464,6 +464,9 @@ def _forward_chunked_fused_finalize( n, _ = hidden_compute.shape vocab_size = weight_compute.shape[0] + if n == 0: + empty = mx.zeros((0,), dtype=mx.float32) + return empty, empty compute_bytes = 2 if hidden_compute.dtype in (mx.float16, mx.bfloat16) else 4 chunk_size = _resolve_chunk_size( chunk_size, @@ -680,6 +683,14 @@ def runtime_cce_loss_vjp(primals, cotangents, outputs): hidden_compute = hidden weight_compute = weight targets32 = targets.astype(mx.int32) + if hidden_compute.shape[0] == 0: + return ( + mx.zeros_like(hidden), + mx.zeros_like(weight), + mx.zeros_like(scales), + mx.zeros_like(biases), + mx.zeros_like(targets), + ) if grad_output is None: grad_output = mx.zeros_like(outputs[0]) grad_output32 = grad_output.astype(mx.float32) @@ -805,6 +816,8 @@ def runtime_cce_loss_vjp(primals, cotangents, outputs): hidden_compute = hidden weight_compute = weight targets32 = targets.astype(mx.int32) + if hidden_compute.shape[0] == 0: + return mx.zeros_like(hidden), mx.zeros_like(weight), mx.zeros_like(targets) if grad_output is None: grad_output = mx.zeros_like(outputs[0]) grad_output32 = grad_output.astype(mx.float32) From 41d5d411b73c7b78475923d4232c7bdc88407f43 Mon Sep 17 00:00:00 2001 From: Lyxot Date: Thu, 21 May 2026 00:17:06 +0800 Subject: [PATCH 09/13] fix: poison invalid MLX CCE labels --- tests/test_mlx_runtime_cce_compile.py | 85 +++++++++++++++++++++++++++ unsloth_zoo/mlx/cce/runtime_cce.py | 25 +++++++- 2 files changed, 109 insertions(+), 1 deletion(-) diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py index e646412d9..cbe8e9d64 100644 --- a/tests/test_mlx_runtime_cce_compile.py +++ b/tests/test_mlx_runtime_cce_compile.py @@ -9,6 +9,7 @@ from __future__ import annotations +import math import sys import pytest @@ -111,6 +112,90 @@ def loss_fn(h): assert grad.shape == hidden.shape assert mx.sum(mx.abs(grad).astype(mx.float32)).item() == pytest.approx(0.0) +@pytest.mark.parametrize("bad_target", [-1, 32]) +def test_runtime_cce_invalid_labels_poison_loss_and_gradients(bad_target): + _skip_torch_shim() + from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss + + runtime_cce, _ = make_chunked_cross_entropy_loss( + ignore_index=-100, + chunk_size=16, + ) + hidden = mx.ones((3, 16), dtype=mx.float32) + weight = mx.ones((32, 16), dtype=mx.float32) + targets = mx.array([0, bad_target, -100], dtype=mx.int32) + + losses = runtime_cce(hidden, weight, targets) + mx.eval(losses) + + assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5) + assert math.isnan(losses[1].item()) + assert losses[2].item() == pytest.approx(0.0) + + def loss_fn(h, w): + return runtime_cce(h, w, targets).astype(mx.float32).sum() + + loss, grads = mx.value_and_grad(loss_fn, argnums=(0, 1))(hidden, weight) + grad_norm = _stable_norm(grads) + mx.eval(loss, grad_norm) + + assert math.isnan(loss.item()) + assert math.isnan(grad_norm.item()) + + +def test_compiled_runtime_cce_invalid_labels_poison_loss(): + _skip_torch_shim() + from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss + + runtime_cce, _ = make_chunked_cross_entropy_loss( + ignore_index=-100, + chunk_size=16, + ) + hidden = mx.ones((2, 16), dtype=mx.float32) + weight = mx.ones((32, 16), dtype=mx.float32) + targets = mx.array([0, 32], dtype=mx.int32) + + def losses_fn(h, w, t): + return runtime_cce(h, w, t) + + losses = mx.compile(losses_fn)(hidden, weight, targets) + mx.eval(losses) + + assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5) + assert math.isnan(losses[1].item()) + + +def test_quantized_runtime_cce_invalid_labels_poison_loss(): + _skip_torch_shim() + import mlx.nn as nn + + from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss + + linear = nn.Linear(32, 32, bias=False) + linear.weight = mx.ones((32, 32), dtype=mx.float32) + qlinear = nn.QuantizedLinear.from_linear(linear, group_size=32, bits=4) + runtime_cce, _ = make_chunked_cross_entropy_loss( + ignore_index=-100, + chunk_size=16, + quantized=True, + group_size=qlinear.group_size, + bits=qlinear.bits, + ) + hidden = mx.ones((2, 32), dtype=mx.float32) + targets = mx.array([0, 32], dtype=mx.int32) + + losses = runtime_cce( + hidden, + qlinear.weight, + qlinear.scales, + qlinear.biases, + targets, + ) + mx.eval(losses) + + assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5) + assert math.isnan(losses[1].item()) + def test_compiled_runtime_cce_preserves_aux_lse_for_gradients(): _skip_torch_shim() from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py index f65668f21..99081e5fe 100644 --- a/unsloth_zoo/mlx/cce/runtime_cce.py +++ b/unsloth_zoo/mlx/cce/runtime_cce.py @@ -115,6 +115,24 @@ def _apply_softcap(logits: mx.array, logit_softcap: float) -> mx.array: return softcap * mx.tanh(logits / softcap) +def _target_validity_masks( + targets: mx.array, + vocab_size: int, + ignore_index: int, +) -> tuple[mx.array, mx.array]: + in_vocab = (targets >= 0) & (targets < vocab_size) + not_ignored = targets != ignore_index + return not_ignored & in_vocab, not_ignored & ~in_vocab + + +def _poison_invalid_targets(values: mx.array, invalid: mx.array) -> mx.array: + return mx.where( + invalid, + mx.full(values.shape, float("nan"), dtype=values.dtype), + values, + ) + + def _chunk_matmul( x: mx.array, weight: mx.array, @@ -509,8 +527,10 @@ def _forward_chunked_fused_finalize( target_logit = mx.where(in_chunk, chunk_target, target_logit) lse = running_max + mx.log(running_sum_exp + 1e-9) - valid = targets != ignore_index + valid, invalid = _target_validity_masks(targets, vocab_size, ignore_index) loss = mx.where(valid, lse - target_logit, mx.zeros_like(lse)) + loss = _poison_invalid_targets(loss, invalid) + lse = _poison_invalid_targets(lse, invalid) return loss, lse ignore_arr = mx.array([ignore_index], dtype=mx.int32) @@ -551,6 +571,9 @@ def _forward_chunked_fused_finalize( grid=(n * 256, 1, 1), threadgroup=(256, 1, 1), ) + _, invalid = _target_validity_masks(targets, vocab_size, ignore_index) + loss = _poison_invalid_targets(loss, invalid) + lse = _poison_invalid_targets(lse, invalid) return loss, lse running_max, running_sum_exp, target_logit = forward_update_kernel( From 76cc2dad6f94c36cc87b6773558d3ff5e5f7ea6b Mon Sep 17 00:00:00 2001 From: Lyxot Date: Sat, 23 May 2026 02:27:23 +0800 Subject: [PATCH 10/13] fix(mlx): save only adapter tensors --- unsloth_zoo/mlx/trainer.py | 4 ++-- unsloth_zoo/mlx/utils.py | 43 ++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/unsloth_zoo/mlx/trainer.py b/unsloth_zoo/mlx/trainer.py index fd2390932..79d8b80ab 100644 --- a/unsloth_zoo/mlx/trainer.py +++ b/unsloth_zoo/mlx/trainer.py @@ -64,7 +64,7 @@ normalize_mlx_chat_template, normalize_vlm_processor_chat_template, collect_mlx_texts, - save_lora_adapters, + save_trainable_adapters, apply_gradient_checkpointing, remove_gradient_checkpointing, _is_vlm_model, @@ -1235,7 +1235,7 @@ def step_fn(batch_data, prev_state, do_update): # Checkpointing if args.save_steps > 0 and current_step % args.save_steps == 0: ckpt_dir = f"{args.output_dir}/checkpoint-{current_step}" - save_lora_adapters(model, ckpt_dir) + save_trainable_adapters(model, ckpt_dir) print(f" Saved checkpoint to {ckpt_dir}") total_time = time.perf_counter() - start_time diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py index e8b7e1881..4b8444177 100644 --- a/unsloth_zoo/mlx/utils.py +++ b/unsloth_zoo/mlx/utils.py @@ -2541,6 +2541,25 @@ def iterate_training_batches(dataset, tokenizer, batch_size, max_seq_length, yield batch, lengths_info, None +def _save_adapter_artifacts(model, path, tensors, adapter_config=None): + path = Path(path) + path.mkdir(parents=True, exist_ok=True) + + if tensors: + mx.save_safetensors(str(path / "adapters.safetensors"), tensors) + + adapter_config = _enrich_mlx_adapter_config(model, adapter_config or {}) + if adapter_config: + with open(path / "adapter_config.json", "w") as f: + json.dump(adapter_config, f, indent=2) + + +def save_trainable_adapters(model, path, adapter_config=None): + """Save the current trainable parameter tree for training checkpoints.""" + trainable = dict(mlx.utils.tree_flatten(model.trainable_parameters())) + _save_adapter_artifacts(model, path, trainable, adapter_config=adapter_config) + + def save_lora_adapters(model, path, adapter_config=None): """Save LoRA adapter weights to disk. @@ -2549,19 +2568,21 @@ def save_lora_adapters(model, path, adapter_config=None): path: Directory to save adapters. adapter_config: Optional dict with LoRA config metadata. """ - path = Path(path) - path.mkdir(parents=True, exist_ok=True) - - # Collect only trainable (LoRA) parameters — flatten nested dict for safetensors - trainable = dict(mlx.utils.tree_flatten(model.trainable_parameters())) + parameters = dict(mlx.utils.tree_flatten(model.parameters())) + adapter_tensors = { + name: value + for name, value in parameters.items() + if "lora_" in name.lower() + } - if trainable: - mx.save_safetensors(str(path / "adapters.safetensors"), trainable) + if not adapter_tensors: + raise ValueError( + "Unsloth: no MLX LoRA adapter tensors were found to save." + ) - adapter_config = _enrich_mlx_adapter_config(model, adapter_config or {}) - if adapter_config: - with open(path / "adapter_config.json", "w") as f: - json.dump(adapter_config, f, indent=2) + _save_adapter_artifacts( + model, path, adapter_tensors, adapter_config=adapter_config + ) def _infer_snapshot_commit(path): From 969a23d54d6d16f691e28543ea798c224fc7a77b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 24 May 2026 12:04:01 +0000 Subject: [PATCH 11/13] [CI-validation] Run unsloth-zoo MLX PRs 679/682/692 on real CI runners Combines three open MLX-only PRs against unslothai/unsloth-zoo into one staging branch and wires up real ubuntu-latest / macos-14 / windows-latest GitHub Actions runs to validate them together: - unslothai/unsloth-zoo#679 fix(mlx): persist LoRA adapter metadata on save - unslothai/unsloth-zoo#682 fix(mlx): handle zero-token and invalid labels in CCE - unslothai/unsloth-zoo#692 fix(mlx): save only LoRA adapter tensors This branch is intentionally throwaway; do not merge into staging/main. It is the iteration unit for the three workflow files below. PR #679 + PR #692 both touch unsloth_zoo/mlx/{utils,trainer}.py. Git's ort strategy auto-resolved cleanly: - utils.py keeps PR #692's _save_adapter_artifacts helper + lora_-name filter in save_lora_adapters, PLUS PR #679's _get_mlx_dropout_probability, _infer_mlx_lora_rank, and the expanded _enrich_mlx_adapter_config that writes lora_parameters / rank / scale / dropout / peft_type=LORA. - trainer.py imports both save_trainable_adapters (PR #692) and the helpers from PR #679, with checkpoint saves switched to the new save_trainable_adapters and final adapter export still calling save_lora_adapters. New scaffolding: - tests/test_mlx_save_lora_adapters_filter.py: four tests over the combined PR #692 + PR #679 surface (LoRA-only filter, metadata fields, no-adapter ValueError, trainable-checkpoint preserves everything). Closes the PR #692 coverage gap Copilot flagged. Uses mlx_simulation so it runs on Linux + Windows too. - tests/_zoo_aggressive_cuda_spoof.py: deeper torch.cuda spoof copied from danielhanchen/unsloth-staging-2, kept available for harder import paths that escape tests/conftest.py's device-type preload. - .github/workflows/mlx-pr-mac.yml: macos-14, real MLX install, PR-specific pytest set. Primary green signal. - .github/workflows/mlx-pr-linux.yml: ubuntu-latest, CPU torch + no-MLX install, import smoke + the new save_lora_adapters_filter shim test. - .github/workflows/mlx-pr-windows.yml: same as Linux but pinned to shell: bash everywhere; no triton. All three workflows trigger only on push to this staging branch with paths: filters and cancel-in-progress so force-pushes during iteration do not queue. --- .github/workflows/mlx-pr-linux.yml | 96 ++++++++ .github/workflows/mlx-pr-mac.yml | 115 ++++++++++ .github/workflows/mlx-pr-windows.yml | 86 +++++++ tests/_zoo_aggressive_cuda_spoof.py | 214 ++++++++++++++++++ tests/test_mlx_save_lora_adapters_filter.py | 238 ++++++++++++++++++++ 5 files changed, 749 insertions(+) create mode 100644 .github/workflows/mlx-pr-linux.yml create mode 100644 .github/workflows/mlx-pr-mac.yml create mode 100644 .github/workflows/mlx-pr-windows.yml create mode 100644 tests/_zoo_aggressive_cuda_spoof.py create mode 100644 tests/test_mlx_save_lora_adapters_filter.py diff --git a/.github/workflows/mlx-pr-linux.yml b/.github/workflows/mlx-pr-linux.yml new file mode 100644 index 000000000..070c94f1a --- /dev/null +++ b/.github/workflows/mlx-pr-linux.yml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. + +# Linux import-smoke guardrail for the three MLX-only PRs validated by this +# staging branch. MLX wheels do not install on Linux, so this job CANNOT +# exercise Metal kernels -- it confirms (a) the PR diffs do not break +# non-MLX imports of unsloth_zoo, and (b) the new mlx_simulation-backed +# adapter-save test catches the LoRA filter / metadata regressions on a +# CPU runner. + +name: MLX PR validation (Linux import smoke) + +on: + push: + branches: [staging/mlx-prs-679-682-692] + paths: + - 'unsloth_zoo/**.py' + - 'tests/test_mlx_save_lora_adapters_filter.py' + - 'tests/mlx_simulation/**' + - 'tests/conftest.py' + - 'tests/_zoo_aggressive_cuda_spoof.py' + - 'pyproject.toml' + - '.github/workflows/mlx-pr-linux.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + mlx-pr-linux: + name: MLX PR import smoke on Linux + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + UNSLOTH_IS_PRESENT: '1' + UNSLOTH_COMPILE_DISABLE: '1' + PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python + steps: + - name: Harden runner (audit) + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + + - name: Install CPU-only torch + unsloth_zoo[core] + # MLX extras intentionally skipped -- mlx/mlx-lm/mlx-vlm have no + # Linux wheel. The package's pyproject already gates these on + # darwin+arm64 so `.[core]` resolves cleanly here. + run: | + python -m pip install --upgrade pip + pip install --index-url https://download.pytorch.org/whl/cpu \ + "torch>=2.4.0,<2.11.0" + pip install -e .[core] + pip install pytest==9.0.3 safetensors + + - name: Import smoke — unsloth_zoo top-level + MLX runtime probe + # tests/conftest.py already preloads unsloth_zoo.device_type under a + # spoofed torch.cuda.is_available(); this step proves the diff did + # not move the goal posts. + run: | + python -c " + import sys; sys.path.insert(0, 'tests') + import _zoo_aggressive_cuda_spoof as s; s.apply() + import unsloth_zoo + from unsloth_zoo.mlx import is_mlx_available + assert is_mlx_available() is False, 'MLX should be unavailable on Linux' + print('OK: unsloth_zoo imports cleanly; MLX correctly reports unavailable') + " + + - name: PR #692 + PR #679 — save_lora_adapters filter + metadata (shim) + # Runs against the mlx_simulation shim; this is the only PR-specific + # signal the Linux job can produce, but it's a real one. + run: python -m pytest tests/test_mlx_save_lora_adapters_filter.py -v + + - name: Source-level MLX tests (no mlx.core needed) + # These read MLX module source as text and assert on signatures / + # constants. They run identically on every platform and catch + # accidental drift introduced by the merged PRs. + continue-on-error: true + run: | + python -m pytest -v \ + tests/test_mlx_baseline_loss_parity.py \ + tests/test_mlx_get_peft_model_seed_ordering.py \ + tests/test_mlx_max_grad_value_none.py diff --git a/.github/workflows/mlx-pr-mac.yml b/.github/workflows/mlx-pr-mac.yml new file mode 100644 index 000000000..0c9c61df2 --- /dev/null +++ b/.github/workflows/mlx-pr-mac.yml @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. + +# Throwaway staging-fork CI validation for three open MLX-only PRs against +# unslothai/unsloth-zoo: +# - PR #679 fix(mlx): persist LoRA adapter metadata on save +# - PR #682 fix(mlx): handle zero-token and invalid labels in CCE +# - PR #692 fix(mlx): save only LoRA adapter tensors +# +# macos-14 is the only platform where MLX wheels resolve, so this is the +# only workflow that exercises real MLX kernels + the real LoRA save/reload +# round-trip these PRs touch. +# +# Push-only trigger on the staging branch -- avoids burning macOS minutes on +# unrelated upstream PR events. cancel-in-progress so iterating with +# force-pushes does not queue up multiple concurrent runs. + +name: MLX PR validation (macOS Apple Silicon) + +on: + push: + branches: [staging/mlx-prs-679-682-692] + paths: + - 'unsloth_zoo/mlx/**' + - 'tests/test_mlx_*.py' + - 'tests/mlx_simulation/**' + - 'tests/conftest.py' + - 'pyproject.toml' + - '.github/workflows/mlx-pr-mac.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + mlx-pr-mac: + name: MLX PR validation on Apple Silicon + runs-on: macos-14 + timeout-minutes: 30 + steps: + - name: Harden runner (audit) + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + + - name: Confirm runner is real Apple Silicon + run: | + python -c " + import platform + assert platform.system() == 'Darwin', platform.system() + assert platform.machine() == 'arm64', platform.machine() + print('OK: macOS arm64 confirmed') + " + + - name: Install unsloth_zoo with MLX extras + run: | + python -m pip install --upgrade pip + pip install -e .[mlx] + pip install pytest==9.0.3 safetensors + + - name: Smoke-import MLX submodules (PRs land here) + run: | + python -c " + import importlib + for name in [ + 'unsloth_zoo.mlx.loader', + 'unsloth_zoo.mlx.trainer', + 'unsloth_zoo.mlx.utils', + 'unsloth_zoo.mlx.compile', + 'unsloth_zoo.mlx.runtime', + 'unsloth_zoo.mlx.cce', + 'unsloth_zoo.mlx.cce.runtime_cce', + ]: + importlib.import_module(name) + print('OK:', name) + import mlx.core as mx + print('OK: mlx.core', mx.__version__ if hasattr(mx, '__version__') else '(version unknown)') + " + + - name: PR #692 + PR #679 — save_lora_adapters filter + metadata + run: python -m pytest tests/test_mlx_save_lora_adapters_filter.py -v + + - name: PR #682 — CCE zero-token + invalid-label + compile-mode + run: python -m pytest tests/test_mlx_runtime_cce_compile.py -v + + - name: PR #679 — get_peft_model finetune_last_n_layers passthrough + # Pre-existing FakeModel/trainable_parameters issue on upstream main + # for one case -- continue-on-error so we still get the rest of the + # signal, and remove this once upstream lands a fix. + continue-on-error: true + run: python -m pytest tests/test_mlx_finetune_last_n_layers.py -v + + - name: Regression breadth — remaining MLX-related tests + continue-on-error: true + run: | + python -m pytest -v \ + tests/test_mlx_torch_shim_smoke.py \ + tests/test_mlx_baseline_loss_parity.py \ + tests/test_mlx_batch_padding.py \ + tests/test_mlx_dtype_downcast_warning.py \ + tests/test_mlx_max_grad_value_none.py \ + tests/test_mlx_get_peft_model_seed_ordering.py diff --git a/.github/workflows/mlx-pr-windows.yml b/.github/workflows/mlx-pr-windows.yml new file mode 100644 index 000000000..e5f36852e --- /dev/null +++ b/.github/workflows/mlx-pr-windows.yml @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. + +# Windows import-smoke guardrail for the three MLX-only PRs. Same purpose +# as the Linux job: prove the diffs do not break non-MLX imports + run the +# shim-backed adapter-save test. Two Windows-specific differences from +# Linux: +# - shell: bash on every step (Git Bash; PowerShell would force +# rewriting the python -c heredocs). +# - no triton dep (no Windows wheel). + +name: MLX PR validation (Windows import smoke) + +on: + push: + branches: [staging/mlx-prs-679-682-692] + paths: + - 'unsloth_zoo/**.py' + - 'tests/test_mlx_save_lora_adapters_filter.py' + - 'tests/mlx_simulation/**' + - 'tests/conftest.py' + - 'tests/_zoo_aggressive_cuda_spoof.py' + - 'pyproject.toml' + - '.github/workflows/mlx-pr-windows.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + mlx-pr-windows: + name: MLX PR import smoke on Windows + runs-on: windows-latest + timeout-minutes: 25 + env: + UNSLOTH_IS_PRESENT: '1' + UNSLOTH_COMPILE_DISABLE: '1' + PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + cache: 'pip' + + - name: Install CPU-only torch + unsloth_zoo[core] (no triton) + # pyproject already gates triton on linux, so [core] resolves + # cleanly on Windows without dragging it in. + run: | + python -m pip install --upgrade pip + pip install --index-url https://download.pytorch.org/whl/cpu \ + "torch>=2.4.0,<2.11.0" + pip install -e .[core] + pip install pytest==9.0.3 safetensors + + - name: Import smoke — unsloth_zoo top-level + MLX runtime probe + run: | + python -c " + import sys; sys.path.insert(0, 'tests') + import _zoo_aggressive_cuda_spoof as s; s.apply() + import unsloth_zoo + from unsloth_zoo.mlx import is_mlx_available + assert is_mlx_available() is False, 'MLX should be unavailable on Windows' + print('OK: unsloth_zoo imports cleanly; MLX correctly reports unavailable') + " + + - name: PR #692 + PR #679 — save_lora_adapters filter + metadata (shim) + run: python -m pytest tests/test_mlx_save_lora_adapters_filter.py -v + + - name: Source-level MLX tests (no mlx.core needed) + continue-on-error: true + run: | + python -m pytest -v \ + tests/test_mlx_baseline_loss_parity.py \ + tests/test_mlx_get_peft_model_seed_ordering.py \ + tests/test_mlx_max_grad_value_none.py diff --git a/tests/_zoo_aggressive_cuda_spoof.py b/tests/_zoo_aggressive_cuda_spoof.py new file mode 100644 index 000000000..eaafe445f --- /dev/null +++ b/tests/_zoo_aggressive_cuda_spoof.py @@ -0,0 +1,214 @@ +# Auto-generated by .github/workflows/consolidated-tests-ci.yml. +# Aggressive CUDA spoof for the consolidated CPU-only CI job. Extends +# tests/conftest.py:84-141's import-time harness with deeper patches that +# unblock more patch_* functions and unsloth_zoo init paths on a GPU-less +# runner. Imported by every shim test file in this workflow before any +# unsloth / unsloth_zoo / transformers import. +# +# Design: only no-op or value-returning patches. We do NOT replace tensor +# allocators. The single exception is `pin_memory=True` kwarg dropping, +# which converts a hard CUDA-required call into a CPU-OK call -- the +# intent of pin_memory is a CUDA-host fast-copy, which simply has no +# meaning on this runner; downgrading silently is the right behavior here. + +from __future__ import annotations + +import sys +import types +from typing import Any + + +def apply() -> None: + """Apply the spoof. Idempotent: calling again has no effect.""" + import torch + + if getattr(torch.cuda, "_unsloth_consolidated_spoof", False): + return + + # ----- device probes (cheap, value-returning) ------------------------- + torch.cuda.is_available = lambda: True + torch.cuda.device_count = lambda: 1 + torch.cuda.current_device = lambda: 0 + torch.cuda.is_initialized = lambda: True + torch.cuda.set_device = lambda *a, **k: None + torch.cuda.synchronize = lambda *a, **k: None + torch.cuda.empty_cache = lambda *a, **k: None + torch.cuda.get_device_name = lambda *a, **k: "NVIDIA A100-SPOOFED" + torch.cuda.get_device_capability = lambda *a, **k: (8, 0) + torch.cuda.is_bf16_supported = lambda *a, **k: True + torch.cuda._is_in_bad_fork = lambda *a, **k: False # type: ignore[attr-defined] + + class _Props: + name = "NVIDIA A100-SPOOFED" + major = 8 + minor = 0 + total_memory = 80 * 1024**3 + multi_processor_count = 108 + is_integrated = False + is_multi_gpu_board = False + + torch.cuda.get_device_properties = lambda *a, **k: _Props() # type: ignore[assignment] + + # ----- cudart() wrapper ----------------------------------------------- + class _CudaRt: + @staticmethod + def cudaMemGetInfo(device: int = 0): + return (0, 80 * 1024**3) + + @staticmethod + def cudaGetDeviceCount(*_a, **_k): + return 0 # Not used on the spoof path + + @staticmethod + def cudaSetDevice(*_a, **_k): + return 0 + + torch.cuda.cudart = lambda: _CudaRt() # type: ignore[assignment] + + # ----- memory module -------------------------------------------------- + try: + import torch.cuda.memory as _cuda_memory # type: ignore + + _cuda_memory.mem_get_info = lambda *a, **k: (0, 80 * 1024**3) + _cuda_memory.memory_stats = lambda *a, **k: {} + _cuda_memory.memory_allocated = lambda *a, **k: 0 + _cuda_memory.max_memory_allocated = lambda *a, **k: 0 + _cuda_memory.memory_reserved = lambda *a, **k: 0 + _cuda_memory.max_memory_reserved = lambda *a, **k: 0 + _cuda_memory.reset_peak_memory_stats = lambda *a, **k: None + except Exception: + pass + + # ----- nvtx no-op stub ------------------------------------------------ + nvtx_stub = types.ModuleType("torch.cuda.nvtx") + nvtx_stub.range_push = lambda *a, **k: None # type: ignore[attr-defined] + nvtx_stub.range_pop = lambda *a, **k: None # type: ignore[attr-defined] + nvtx_stub.mark = lambda *a, **k: None # type: ignore[attr-defined] + sys.modules.setdefault("torch.cuda.nvtx", nvtx_stub) + torch.cuda.nvtx = nvtx_stub # type: ignore[attr-defined] + + # ----- random API ---------------------------------------------------- + # CRITICAL: torch.manual_seed() internally calls torch.cuda.manual_seed_all(), + # so routing the cuda seed APIs back through torch.manual_seed would + # infinite-recurse (observed as RecursionError in run #8 cells 2/3 of the + # consolidated CI matrix). No-op them: callers that explicitly seed CUDA + # have already paid the cost of seeding CPU via torch.manual_seed; the + # CUDA-side seeding has no meaning on a GPU-less runner. + torch.cuda.manual_seed = lambda *a, **k: None # type: ignore[assignment] + torch.cuda.manual_seed_all = lambda *a, **k: None # type: ignore[assignment] + # rng_state APIs: return a CPU-shaped placeholder and accept anything for + # set; do NOT route through torch.set_rng_state / get_rng_state -- those + # operate on the CPU RNG directly and are independent of the cuda surface. + import torch as _t + + _empty_rng_state = _t.empty(0, dtype = _t.uint8) + torch.cuda.get_rng_state = lambda *a, **k: _empty_rng_state.clone() # type: ignore[assignment] + torch.cuda.set_rng_state = lambda *a, **k: None # type: ignore[assignment] + torch.cuda.get_rng_state_all = lambda *a, **k: [_empty_rng_state.clone()] # type: ignore[attr-defined] + torch.cuda.set_rng_state_all = lambda *a, **k: None # type: ignore[attr-defined] + torch.cuda.initial_seed = lambda *a, **k: 0 # type: ignore[assignment] + torch.cuda.seed = lambda *a, **k: None # type: ignore[assignment] + torch.cuda.seed_all = lambda *a, **k: None # type: ignore[assignment] + + # ----- Stream / Event no-op classes ----------------------------------- + class _NoopStream: + def __init__(self, *a, **k): ... + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def synchronize(self, *a, **k): ... + def wait_stream(self, *a, **k): ... + def query(self): + return True + + class _NoopEvent: + def __init__(self, *a, **k): ... + def record(self, *a, **k): ... + def wait(self, *a, **k): ... + def query(self): + return True + + def synchronize(self, *a, **k): ... + def elapsed_time(self, *a, **k): + return 0.0 + + torch.cuda.Stream = _NoopStream # type: ignore[assignment] + torch.cuda.Event = _NoopEvent # type: ignore[assignment] + torch.cuda.stream = lambda s: s if s is not None else _NoopStream() # type: ignore[assignment] + torch.cuda.current_stream = lambda *a, **k: _NoopStream() # type: ignore[assignment] + torch.cuda.default_stream = lambda *a, **k: _NoopStream() # type: ignore[assignment] + + # ----- pin_memory drop ------------------------------------------------- + # `torch.empty(..., pin_memory=True)` and friends raise on a CPU-only + # build. Strip the kwarg — pin_memory has no meaning here. + for _name in ( + "empty", + "zeros", + "ones", + "empty_like", + "zeros_like", + "ones_like", + "rand", + "randn", + "randint", + ): + _orig = getattr(torch, _name, None) + if _orig is None: + continue + + def _wrap(*args: Any, _orig = _orig, **kwargs: Any): + kwargs.pop("pin_memory", None) + return _orig(*args, **kwargs) + + setattr(torch, _name, _wrap) + + # Tensor.pin_memory() instance method: also a no-op (return self). + if hasattr(torch.Tensor, "pin_memory"): + torch.Tensor.pin_memory = lambda self, *a, **k: self # type: ignore[assignment] + if hasattr(torch.Tensor, "is_pinned"): + torch.Tensor.is_pinned = lambda self, *a, **k: False # type: ignore[assignment] + + # ----- amp.GradScaler: use the real one if torch ships a CPU-friendly + # path, else stub. Newer torch ships torch.amp.GradScaler that handles + # CPU; torch.cuda.amp.GradScaler is a wrapper. Both should work; just + # guard against import error. + try: + import torch.cuda.amp # type: ignore + except Exception: + cuda_amp = types.ModuleType("torch.cuda.amp") + + class _StubScaler: + def __init__(self, *a, **k): ... + def scale(self, x): + return x + + def step(self, opt): + opt.step() + + def update(self, *a, **k): ... + def unscale_(self, *a, **k): ... + def get_scale(self): + return 1.0 + + def is_enabled(self): + return False + + def state_dict(self): + return {} + + def load_state_dict(self, *a, **k): ... + + cuda_amp.GradScaler = _StubScaler # type: ignore[attr-defined] + sys.modules.setdefault("torch.cuda.amp", cuda_amp) + torch.cuda.amp = cuda_amp # type: ignore[attr-defined] + + # ----- Sentinel ------------------------------------------------------ + torch.cuda._unsloth_consolidated_spoof = True # type: ignore[attr-defined] + + +if __name__ == "__main__": + apply() + print("CUDA spoof applied.") diff --git a/tests/test_mlx_save_lora_adapters_filter.py b/tests/test_mlx_save_lora_adapters_filter.py new file mode 100644 index 000000000..b227081fa --- /dev/null +++ b/tests/test_mlx_save_lora_adapters_filter.py @@ -0,0 +1,238 @@ +# Unsloth Zoo - Utilities for Unsloth +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Combined coverage for unslothai/unsloth-zoo PR #692 + PR #679. + +PR #692 (fix-mlx-export-adapters) makes ``save_lora_adapters`` keep only +tensors whose flattened name contains ``lora_``. PR #679 +(fix/mlx-lora-adapter-metadata) makes the same save path persist live +``rank`` / ``scale`` / ``dropout`` plus ``peft_type=LORA`` into +``adapter_config.json``. + +Neither PR shipped a test that exercises the combined surface; this +file closes that gap. Uses the ``mlx_simulation`` shim so it runs on +non-Apple CI (Linux/Windows) as well as macOS. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +import torch + + +@pytest.fixture(autouse=True, scope="module") +def _install_shim(): + from mlx_simulation import simulate_mlx_on_torch + simulate_mlx_on_torch() + + +# --------------------------------------------------------------------------- +# Minimal mock LoRA module: exposes the attributes _enrich_mlx_adapter_config +# inspects (``lora_a``, ``lora_b``, ``scale``, ``dropout``) and the iteration +# helpers ``save_lora_adapters`` walks (``parameters``, ``named_modules``). +# --------------------------------------------------------------------------- +class _MockDropout: + """Mirrors MLX's nn.Dropout: stores keep-probability as ``_p_1``. + + _get_mlx_dropout_probability reads ``.p`` first, then falls back to + ``1.0 - _p_1``; this fixture exercises the fallback branch (PR #679 + edge case). + """ + + def __init__(self, p: float): + self._p_1 = 1.0 - p + + +class _MockLoRALinear: + """LoRA-wrapped Linear. Shape convention matches mlx-lm's LoRALinear: + ``lora_a`` is ``(in_features, rank)`` and ``lora_b`` is + ``(rank, out_features)`` so the matmul partners pair up as + ``lora_a_shape[-1] == lora_b_shape[0] == rank`` -- which is what + PR #679's ``_infer_mlx_lora_rank`` reads. + """ + + def __init__(self, in_features: int, out_features: int, rank: int, scale: float, dropout: float): + self.weight = torch.zeros(out_features, in_features) + self.lora_a = torch.zeros(in_features, rank) + self.lora_b = torch.zeros(rank, out_features) + self.scale = scale + self.dropout = _MockDropout(dropout) + + +class _MockPlainLinear: + def __init__(self, in_features: int, out_features: int): + self.weight = torch.zeros(out_features, in_features) + + +class _MockModel: + """A tiny model with one LoRA-wrapped attention proj + one plain MLP proj. + + Module attribute names are picked to **not** contain ``lora_`` so the + substring filter in ``save_lora_adapters`` is exercised against the + realistic case where only the leaf parameter (``...lora_a`` / + ``...lora_b``) carries the prefix -- mirroring real MLX models where + LoRA is attached on e.g. ``model.layers.N.self_attn.q_proj.lora_*``. + + ``parameters()`` returns the flat name→tensor map ``save_lora_adapters`` + consumes via ``mlx.utils.tree_flatten``. ``named_modules()`` is the + iteration ``_enrich_mlx_adapter_config`` walks looking for objects with + ``lora_a`` + ``lora_b`` to record rank/scale/dropout. + """ + + def __init__(self): + self.q_proj = _MockLoRALinear( + in_features=8, out_features=16, rank=4, scale=2.5, dropout=0.25, + ) + self.up_proj = _MockPlainLinear(in_features=16, out_features=32) + # _enrich_mlx_adapter_config probes these; supplying None keeps the + # config helper on the cheap fast path. + self._hf_repo = "unsloth/tiny-test-model" + self._config = None + self._unsloth_quantization_config = None + self._unsloth_quantization_policy = None + self._unsloth_quantized_source = None + self._unsloth_base_revision = None + self._unsloth_base_commit_hash = None + self._src_path = None + + def parameters(self): + return { + "q_proj.weight": self.q_proj.weight, + "q_proj.lora_a": self.q_proj.lora_a, + "q_proj.lora_b": self.q_proj.lora_b, + "up_proj.weight": self.up_proj.weight, + } + + def trainable_parameters(self): + return self.parameters() + + def named_modules(self): + yield "", self + yield "q_proj", self.q_proj + yield "up_proj", self.up_proj + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +def test_save_lora_adapters_filters_to_lora_only(tmp_path: Path): + """PR #692: adapter export keeps only ``lora_*`` keys. + + Both ``lora_layer.weight`` (base weight inside the LoRA module) and + ``base_layer.weight`` (plain Linear) must be excluded — only the two + ``lora_a`` / ``lora_b`` tensors survive. + """ + from unsloth_zoo.mlx.utils import save_lora_adapters + + model = _MockModel() + out_dir = tmp_path / "adapter_lora_only" + save_lora_adapters(model, out_dir) + + safe_path = out_dir / "adapters.safetensors" + assert safe_path.is_file(), "adapters.safetensors must be written" + + from safetensors.torch import load_file + saved = load_file(str(safe_path)) + keys = set(saved.keys()) + + assert keys == {"q_proj.lora_a", "q_proj.lora_b"}, ( + f"adapter export leaked non-LoRA tensors: {sorted(keys)}" + ) + + +def test_save_lora_adapters_writes_pr679_metadata(tmp_path: Path): + """PR #679: adapter_config.json carries live rank/scale/dropout + + peft_type=LORA + the keep-probability dropout fallback. + """ + from unsloth_zoo.mlx.utils import save_lora_adapters + + model = _MockModel() + out_dir = tmp_path / "adapter_metadata" + save_lora_adapters(model, out_dir) + + cfg_path = out_dir / "adapter_config.json" + assert cfg_path.is_file(), "adapter_config.json must be written" + cfg = json.loads(cfg_path.read_text()) + + assert cfg.get("peft_type") == "LORA", cfg + assert cfg.get("rank") == 4, cfg + assert cfg.get("scale") == pytest.approx(2.5), cfg + # PR #679 fix: read dropout via the ``_p_1`` keep-prob fallback when + # ``.p`` is absent (real MLX nn.Dropout stores it that way). + assert cfg.get("dropout") == pytest.approx(0.25), cfg + + params = cfg.get("lora_parameters") or {} + assert params.get("rank") == 4, params + assert params.get("scale") == pytest.approx(2.5), params + assert params.get("dropout") == pytest.approx(0.25), params + + # PR #679 also records the LoRA topology so reload reconstructs the + # same attachment surface. + assert "q_proj" in (cfg.get("unsloth_mlx_lora_module_paths") or []), cfg + + +def test_save_lora_adapters_raises_when_no_lora_tensors_present(tmp_path: Path): + """PR #692: explicit ValueError when nothing matched the ``lora_`` filter. + + Guards against silently saving an empty/garbage adapter export when the + model has no LoRA layers (e.g. merged adapter state). + """ + from unsloth_zoo.mlx.utils import save_lora_adapters + + class _NoLoRAModel(_MockModel): + def parameters(self): + return {"up_proj.weight": self.up_proj.weight} + + def named_modules(self): + yield "", self + yield "up_proj", self.up_proj + + out_dir = tmp_path / "adapter_empty" + with pytest.raises(ValueError, match="LoRA adapter tensors"): + save_lora_adapters(_NoLoRAModel(), out_dir) + + +def test_save_trainable_adapters_keeps_all_trainable(tmp_path: Path): + """PR #692 separation: ``save_trainable_adapters`` (used by mid-training + checkpoints) keeps ALL trainable tensors, including base weights — it + must NOT inherit the LoRA-only filter. + """ + from unsloth_zoo.mlx.utils import save_trainable_adapters + + model = _MockModel() + out_dir = tmp_path / "adapter_trainable" + save_trainable_adapters(model, out_dir) + + safe_path = out_dir / "adapters.safetensors" + assert safe_path.is_file(), "adapters.safetensors must be written" + + from safetensors.torch import load_file + saved = load_file(str(safe_path)) + keys = set(saved.keys()) + + assert keys == { + "q_proj.weight", + "q_proj.lora_a", + "q_proj.lora_b", + "up_proj.weight", + }, ( + "training checkpoint should preserve every trainable tensor, " + f"got {sorted(keys)}" + ) From c35d0c33460445904ec8f7409d20db0cb582da1b Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 24 May 2026 12:05:46 +0000 Subject: [PATCH 12/13] [CI-validation] Drop inherited upstream workflows from staging branch The seven upstream workflows (consolidated-tests-ci, lint-ci, mlx-ci, security-audit, stale, studio-export-fix-ci, wheel-smoke) would fire on every push and PR-event to this throwaway staging branch and burn runner minutes that have nothing to do with validating MLX PRs #679, #682, #692. Keep only the three mlx-pr-* workflows on this branch. They stay in upstream main / origin/main untouched -- this deletion is scoped to the staging branch only. --- .github/workflows/consolidated-tests-ci.yml | 255 -------------------- .github/workflows/lint-ci.yml | 122 ---------- .github/workflows/mlx-ci.yml | 70 ------ .github/workflows/security-audit.yml | 226 ----------------- .github/workflows/stale.yml | 37 --- .github/workflows/studio-export-fix-ci.yml | 62 ----- .github/workflows/wheel-smoke.yml | 118 --------- 7 files changed, 890 deletions(-) delete mode 100644 .github/workflows/consolidated-tests-ci.yml delete mode 100644 .github/workflows/lint-ci.yml delete mode 100644 .github/workflows/mlx-ci.yml delete mode 100644 .github/workflows/security-audit.yml delete mode 100644 .github/workflows/stale.yml delete mode 100644 .github/workflows/studio-export-fix-ci.yml delete mode 100644 .github/workflows/wheel-smoke.yml diff --git a/.github/workflows/consolidated-tests-ci.yml b/.github/workflows/consolidated-tests-ci.yml deleted file mode 100644 index 6ab589c20..000000000 --- a/.github/workflows/consolidated-tests-ci.yml +++ /dev/null @@ -1,255 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# Python compatibility + repo test gate. Adapted from unsloth's consolidated-tests-ci.yml. -# Jobs: python-version-collect (pytest --collect-only on 3.10-3.13), repo-tests-cpu -# (tests/security HARD GATE + CPU-pure zoo tests), core-upstream-matrix (HF/TRL/peft -# drift detector across 3 cells -- the high-value zoo coverage). - -name: Tests CI - -on: - pull_request: - push: - branches: [main] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - # Python compatibility: pytest --collect-only per interpreter. - python-version-collect: - name: (Python ${{ matrix.python-version }}) - runs-on: ubuntu-latest - timeout-minutes: 15 - strategy: - fail-fast: false - matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] - steps: - - name: Harden runner (audit) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: audit - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - - - name: Install CPU-only torch + zoo runtime deps - # CPU index avoids the multi-GB CUDA wheel set. `--no-deps unsloth` - # satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128. - run: | - python -m pip install --upgrade pip - pip install --index-url https://download.pytorch.org/whl/cpu \ - "torch>=2.4.0,<2.11.0" - pip install -e .[core] - pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true - pip install pytest==9.0.3 - - - name: pytest --collect-only - continue-on-error: true - run: python -m pytest tests/ --collect-only -q - - # CPU-only repo tests. HARD GATE on tests/security. - repo-tests-cpu: - name: Repo tests (CPU) - runs-on: ubuntu-latest - timeout-minutes: 20 - steps: - - name: Harden runner (audit) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: audit - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - cache: 'pip' - - - name: Install runtime + test deps - # --no-deps unsloth satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128. - run: | - python -m pip install --upgrade pip - pip install --index-url https://download.pytorch.org/whl/cpu \ - "torch>=2.4.0,<2.11.0" - pip install -e .[core] - pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true - pip install pytest==9.0.3 pyyaml==6.0.2 - - - name: pytest tests/security (HARD GATE) - run: python -m pytest tests/security -v - - - name: pytest tests/test_pr_a_imports + zoo-specific CPU tests - # Run as SEPARATE pytest invocation: tests/security/conftest.py installs a - # session-scoped network_blocker autouse fixture that would otherwise block - # test_pypi_version_sync from reaching pypi.org. - continue-on-error: true - run: | - python -m pytest \ - tests/test_pr_a_imports.py \ - tests/test_rl_replacements_cpu.py \ - tests/test_temporary_patches_imports.py \ - tests/test_zoo_history_regressions.py \ - tests/test_pypi_version_sync.py \ - -v - - # Core (HF/TRL/peft) drift matrix. Three cells: HF=4.57.6+TRL<1, HF=latest+TRL=latest, - # and pyproject defaults. fail-fast=false; drift in one cell shouldn't cancel others. - core-upstream-matrix: - name: "Core (${{ matrix.combo.label }})" - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - combo: - - id: t4576-trl0latest - label: "HF=4.57.6 + TRL<1" - transformers_spec: "transformers==4.57.6" - trl_spec: "trl>=0.18.2,<1.0.0" - peft_spec: "peft>=0.18,<0.20" - - id: tlatest5-trl1latest - label: "HF=latest + TRL=latest" - transformers_spec: "transformers>=5,<6" - trl_spec: "trl>=1,<2" - peft_spec: "peft" - - id: pyproject - label: "HF=default + TRL=default" - transformers_spec: "__from_pyproject__" - trl_spec: "__from_pyproject__" - peft_spec: "__from_pyproject__" - env: - MATRIX_TRANSFORMERS_SPEC: ${{ matrix.combo.transformers_spec }} - MATRIX_TRL_SPEC: ${{ matrix.combo.trl_spec }} - MATRIX_PEFT_SPEC: ${{ matrix.combo.peft_spec }} - MATRIX_COMBO_ID: ${{ matrix.combo.id }} - # Pure-Python protobuf parser; transformers' bundled *_pb2.py is rejected by C++ protobuf 4+/5+. - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python - UNSLOTH_COMPILE_DISABLE: '1' - # Secondary handshake after find_spec("unsloth") guard at unsloth_zoo/__init__.py:128. - UNSLOTH_IS_PRESENT: '1' - steps: - - name: Harden runner (audit) - # audit (not block): matrix pulls arbitrary transformers/TRL/peft pins. - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: audit - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - cache: 'pip' - - - name: Resolve matrix specs (handle __from_pyproject__ sentinel) - # Resolve transformers/trl/peft from pyproject.toml when the sentinel is used. - run: | - set -euxo pipefail - python <<'PY' >> "$GITHUB_ENV" - import os, re, tomllib - spec_t = os.environ["MATRIX_TRANSFORMERS_SPEC"] - spec_r = os.environ["MATRIX_TRL_SPEC"] - spec_p = os.environ["MATRIX_PEFT_SPEC"] - - def _pkg_name(spec: str) -> str: - m = re.match(r"\s*([A-Za-z0-9_.-]+)", spec) - return (m.group(1).lower() if m else "") - - if "__from_pyproject__" in (spec_t, spec_r, spec_p): - with open("pyproject.toml", "rb") as f: - doc = tomllib.load(f) - proj = doc.get("project", {}) - all_deps: list[str] = list(proj.get("dependencies", [])) - for _name, dep_list in proj.get("optional-dependencies", {}).items(): - all_deps.extend(dep_list) - - # Strip environment markers so the resolved spec is pip-installable. - def _strip_marker(s: str) -> str: - return s.split(";", 1)[0].strip() - - if spec_t == "__from_pyproject__": - spec_t = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "transformers"), - "transformers") - if spec_r == "__from_pyproject__": - spec_r = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "trl"), - "trl") - if spec_p == "__from_pyproject__": - spec_p = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "peft"), - "peft") - print(f"RESOLVED_TRANSFORMERS_SPEC={spec_t}") - print(f"RESOLVED_TRL_SPEC={spec_r}") - print(f"RESOLVED_PEFT_SPEC={spec_p}") - PY - grep RESOLVED_ "$GITHUB_ENV" || true - - - name: Install torch CPU + zoo + matrix-specified upstream libs - # Two-phase: `-e .[core]` for pyproject defaults, then `-U ` to override. - # The -U is critical so pip will downgrade transformers (e.g. cell-1 pin 4.57.6). - # --no-deps unsloth satisfies the find_spec guard at unsloth_zoo/__init__.py:128. - run: | - set -euxo pipefail - python -m pip install --upgrade pip - pip install --index-url https://download.pytorch.org/whl/cpu \ - "torch>=2.4.0,<2.11.0" "torchvision<0.26" - # torchvision: transitive import of transformers.models.qwen2_vl - # / qwen2_5_vl image processors. The Qwen2_VL image-processor - # zoo references chains through `from torchvision...` at module - # top, so a missing torchvision turns the existence-probe drift - # tests RED on "ModuleNotFoundError: No module named 'torchvision'". - # CPU build is plenty; we don't need the CUDA variant. - pip install -e .[core] - pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true - # Override with matrix-resolved specs. - pip install -U "$RESOLVED_TRANSFORMERS_SPEC" "$RESOLVED_TRL_SPEC" "$RESOLVED_PEFT_SPEC" - # bitsandbytes: imported at module scope in saving_utils.py (_active_merge_device path). - pip install 'bitsandbytes>=0.45' - # IPython + ipywidgets: logging_utils.py:50 imports transformers.utils.notebook. - # Required so drift detector only fires on real drift, not missing CI deps. - pip install 'ipython>=8' 'ipywidgets>=8' - pip install pytest==9.0.3 packaging - echo "::group::Installed transformers + trl + peft + torch versions" - pip show transformers - pip show trl - pip show peft - pip show torch - echo "::endgroup::" - - - name: pytest upstream-regression suite (94 pinned + 117 expanded) - # 626 drift-detector tests / cell across 12 files. HARD GATE: a red cell - # means real upstream drift (transformers/trl/peft/vllm/datasets renamed - # or removed a symbol zoo references). Zoo PRs #4 through #635 mined. - run: | - python -m pytest -v --tb=short -rs \ - tests/test_upstream_pinned_symbols_transformers.py \ - tests/test_upstream_pinned_symbols_trl_vllm.py \ - tests/test_upstream_pinned_symbols_accelerator.py \ - tests/test_zoo_history_regressions_deep.py \ - tests/test_upstream_import_fixes_drift.py \ - tests/test_zoo_source_upstream_refs.py \ - tests/test_upstream_signatures.py \ - tests/test_extended_dep_api_pins.py \ - tests/test_upstream_source_patterns.py \ - tests/test_compiler_rewriter_exhaustive.py \ - tests/test_compiler_dynamic_exec.py \ - tests/test_temporary_patches_exhaustive.py \ - tests/test_unsloth_zoo_lora_merge.py \ - tests/test_peft_paramwrapper_layout_drift.py \ - tests/test_transformers_moe_structure_drift.py \ - tests/test_moe_merge_e2e_cpu.py diff --git a/.github/workflows/lint-ci.yml b/.github/workflows/lint-ci.yml deleted file mode 100644 index 75446a499..000000000 --- a/.github/workflows/lint-ci.yml +++ /dev/null @@ -1,122 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# Whole-repo Python source-lint gate. Adapted from unsloth's lint-ci.yml: -# Python (compileall + narrow ruff) + YAML/JSON round-trip. Dropped vs unsloth: -# shell lint (zoo has no committed *.sh), TypeScript/Rust (Studio/Tauri are unsloth-side). - -name: Lint CI - -on: - pull_request: - push: - branches: [main] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - source-lint: - name: Source lint (Python + YAML + JSON) - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - name: Harden runner (audit) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: audit - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - cache: 'pip' - - - run: pip install 'ruff==0.15.12' 'pyyaml>=6' - - - name: Python AST/syntax check (every committed .py must compile) - # continue-on-error during CI bootstrap: pyproject.toml declares - # `requires-python = ">=3.9,<3.15"` but temporary_patches/gpt_oss.py - # uses a 3.10+ `match` statement. Tracked as a separate cleanup PR. - continue-on-error: true - run: | - python -m compileall -q -j 0 unsloth_zoo tests scripts - - - name: Python ruff check (narrow gate) - # E9 / F63 / F7 / F82: syntax errors, broken comparisons, undefined names. - # continue-on-error during CI bootstrap: first run on main surfaced 13 - # latent findings (rl_replacements.py L1128 F821, gpt_oss match-on-3.9). - continue-on-error: true - run: | - ruff check --select E9,F63,F7,F82 unsloth_zoo tests scripts - - - name: No leftover debugger / pdb / breakpoint calls - # Catches `import pdb`, `pdb.set_trace()`, `breakpoint()`, `import ipdb`. - # continue-on-error during bootstrap: rl_replacements.py has a - # `#breakpoint()` comment the regex matches (# is [^A-Za-z_]). - continue-on-error: true - run: | - set -e - if grep -rnE '(^|[^A-Za-z_])(pdb\.set_trace|breakpoint)\(|^import (pdb|ipdb)$|^from (pdb|ipdb) import' \ - --include='*.py' unsloth_zoo scripts; then - echo "::error::Leftover debugger call found above. Remove it." >&2 - exit 1 - fi - - - name: YAML round-trip for every committed YAML - run: | - python <<'PY' - import pathlib, sys, yaml - fails = [] - for p in pathlib.Path(".").rglob("*.yml"): - if any(part.startswith(".") and part not in (".github",) for part in p.parts): - continue - try: - yaml.safe_load(p.read_text()) - except Exception as exc: - fails.append(f"{p}: {exc}") - for p in pathlib.Path(".").rglob("*.yaml"): - if any(part.startswith(".") and part not in (".github",) for part in p.parts): - continue - try: - yaml.safe_load(p.read_text()) - except Exception as exc: - fails.append(f"{p}: {exc}") - if fails: - for f in fails: - print("::error::", f) - sys.exit(1) - print(f"YAML round-trip OK") - PY - - - name: JSON round-trip for every committed JSON - run: | - python <<'PY' - import pathlib, json, sys - fails = [] - for p in pathlib.Path(".").rglob("*.json"): - if any(part in (".git", "node_modules", "__pycache__", "build", "dist") for part in p.parts): - continue - try: - json.loads(p.read_text()) - except Exception as exc: - fails.append(f"{p}: {exc}") - if fails: - for f in fails: - print("::error::", f) - sys.exit(1) - print("JSON round-trip OK") - PY - - - name: enforce kwargs spacing - # Style rule mirrored from unsloth: kwargs use `name = value` not `name=value`. - continue-on-error: true - run: | - python3 scripts/enforce_kwargs_spacing.py unsloth_zoo diff --git a/.github/workflows/mlx-ci.yml b/.github/workflows/mlx-ci.yml deleted file mode 100644 index 3df8be9d9..000000000 --- a/.github/workflows/mlx-ci.yml +++ /dev/null @@ -1,70 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# MLX-specific CI on macOS arm64 (Apple Silicon) so mlx / mlx-lm / mlx-vlm wheels -# resolve. Installs `unsloth_zoo[mlx]`, smoke-imports unsloth_zoo/mlx_*.py modules, -# runs tests/test_mlx_torch_shim_smoke.py. Opt-in via `mlx` label to save macOS minutes. - -name: MLX CI on Mac M1 - -on: - pull_request: - types: [opened, synchronize, reopened, labeled] - workflow_dispatch: - schedule: - # Daily @ 04:23 UTC -- off the security-audit cron rush at 04:13. - - cron: '23 4 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - mlx-smoke: - name: MLX install + import smoke (Apple Silicon) - # Opt-in: schedule / workflow_dispatch always run; PR runs only with `mlx` label. - if: >- - github.event_name == 'schedule' || - github.event_name == 'workflow_dispatch' || - contains(github.event.pull_request.labels.*.name, 'mlx') - runs-on: macos-14 # Apple Silicon (M1) hosted runner - timeout-minutes: 30 - steps: - # harden-runner block-mode is Linux-only; stay in audit on macOS for parity. - - name: Harden runner (audit) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: audit - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - cache: 'pip' - - - name: Install zoo with MLX extras - # pyproject gates MLX deps on darwin+arm64; `.[mlx]` picks them up - # without the torch-on-Linux-CUDA path. - run: | - python -m pip install --upgrade pip - pip install -e .[mlx] - pip install pytest==9.0.3 - - - name: MLX module import smoke - run: | - python -c "import unsloth_zoo.mlx_loader; print('mlx_loader OK')" - python -c "import unsloth_zoo.mlx_compile; print('mlx_compile OK')" - python -c "import unsloth_zoo.mlx_utils; print('mlx_utils OK')" - python -c "import unsloth_zoo.mlx_trainer; print('mlx_trainer OK')" - python -c "import unsloth_zoo.mlx_cce; print('mlx_cce OK')" - - - name: tests/test_mlx_torch_shim_smoke.py - # Exercises the MLX-on-torch shim end-to-end against the real mlx runtime - # on Apple Silicon; on Linux runners it would run against tests/mlx_simulation/ stubs. - run: python -m pytest tests/test_mlx_torch_shim_smoke.py -v diff --git a/.github/workflows/security-audit.yml b/.github/workflows/security-audit.yml deleted file mode 100644 index 28a73eed0..000000000 --- a/.github/workflows/security-audit.yml +++ /dev/null @@ -1,226 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# Pure-Python supply-chain audit for unsloth_zoo. Mirrors unslothai/unsloth's -# security-audit.yml with npm/Cargo/Studio jobs stripped (zoo is pure Python). -# Jobs: advisory-audit (pip-audit + trufflehog), pip-scan-packages (transitive -# closure pattern scan), workflow-trigger-lint, tests-security (HARD GATE). - -name: Security audit - -on: - pull_request: - paths: - - 'pyproject.toml' - - 'scripts/scan_packages.py' - - 'scripts/lint_workflow_triggers.py' - - 'tests/security/**' - - '.github/workflows/security-audit.yml' - push: - branches: [main] - schedule: - - cron: '13 4 * * *' # 04:13 UTC daily, off the cron rush - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - # Advisory-DB audit: pip-audit + trufflehog. Non-blocking while baseline settles. - advisory-audit: - name: advisory audit (pip + secrets) - runs-on: ubuntu-latest - timeout-minutes: 15 - steps: - - name: Harden runner (egress block) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: block - disable-sudo: true - allowed-endpoints: > - api.github.com:443 - github.com:443 - codeload.github.com:443 - objects.githubusercontent.com:443 - pypi.org:443 - files.pythonhosted.org:443 - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 # trufflehog needs full history for diff scans - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - - - name: Install pip-audit - run: python -m pip install --upgrade pip pip-audit - - - name: Build filtered requirements set - # Reads pyproject.toml deps + extras into a flat requirements file. - # git+ specs are skipped (advisory-DB can't resolve them). - run: | - mkdir -p audit-reqs - python <<'PY' > audit-reqs/zoo-deps.txt - import tomllib - with open("pyproject.toml", "rb") as f: - d = tomllib.load(f) - core = d["project"]["dependencies"] - all_extras = [] - for extra_name, specs in d["project"].get("optional-dependencies", {}).items(): - # Skip self-referential extras like "huggingface = ['unsloth_zoo[core]']". - all_extras += [s for s in specs if "unsloth_zoo" not in s] - print("# Auto-generated from pyproject.toml by security-audit.yml.") - for spec in core + all_extras: - if "git+" in spec: - print(f"# [security-audit] skipped git+ spec: {spec}") - continue - print(spec) - PY - - - name: pip-audit (advisory DB lookup) - continue-on-error: true - run: pip-audit --requirement audit-reqs/zoo-deps.txt --disable-pip --strict || true - - - name: Trufflehog secret scan - continue-on-error: true - uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26 # v3.95.2 - with: - base: ${{ github.event.repository.default_branch }} - head: HEAD - extra_args: --only-verified - - # pip-scan-packages: downloads every PyPI archive in zoo's transitive closure and - # pattern-scans (catches the malicious-upload class that precedes CVE publication). - pip-scan-packages: - name: pip scan-packages (zoo transitive closure) - runs-on: ubuntu-latest - timeout-minutes: 25 - steps: - - name: Harden runner (egress block) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: block - disable-sudo: true - allowed-endpoints: > - api.github.com:443 - github.com:443 - codeload.github.com:443 - objects.githubusercontent.com:443 - pypi.org:443 - files.pythonhosted.org:443 - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - cache: 'pip' - - - name: Install scan_packages.py runtime deps - # requests + packaging for PyPI's JSON API. Scanned packages are - # downloaded raw and inspected, never `pip install`-ed. - run: python -m pip install --upgrade pip requests packaging - - - name: Build filtered requirements set - run: | - mkdir -p audit-reqs - python <<'PY' > audit-reqs/zoo-deps.txt - import tomllib - with open("pyproject.toml", "rb") as f: - d = tomllib.load(f) - core = d["project"]["dependencies"] - all_extras = [] - for extra_name, specs in d["project"].get("optional-dependencies", {}).items(): - all_extras += [s for s in specs if "unsloth_zoo" not in s] - print("# Auto-generated from pyproject.toml by security-audit.yml.") - for spec in core + all_extras: - if "git+" in spec: - print(f"# [security-audit] skipped git+ spec: {spec}") - continue - print(spec) - PY - - - name: scan-packages (with deps) - continue-on-error: true - # --with-deps makes scan transitive. Archives are downloaded and - # pattern-scanned WITHOUT installing -- malicious wheels cannot execute. - run: python3 scripts/scan_packages.py --requirements audit-reqs/zoo-deps.txt --with-deps - - # workflow-trigger-lint: refuses pull_request_target with PR-head checkout, - # restricted workflow_run without justification, and cache-key collisions. - workflow-trigger-lint: - name: workflow-trigger lint (pull_request_target / cache-poisoning) - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - name: Harden runner (egress block) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: block - disable-sudo: true - allowed-endpoints: > - api.github.com:443 - github.com:443 - codeload.github.com:443 - objects.githubusercontent.com:443 - pypi.org:443 - files.pythonhosted.org:443 - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - - - name: Install PyYAML - run: pip install pyyaml==6.0.2 - - - name: Run workflow-trigger lint - run: python3 scripts/lint_workflow_triggers.py - - # HARD GATE: regression tests for scanner + lint scripts. Drift in IOC tables - # or scanner exit semantics fails this PR at review time. - tests-security: - name: pytest tests/security - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - name: Harden runner (egress block) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: block - disable-sudo: true - allowed-endpoints: > - api.github.com:443 - github.com:443 - codeload.github.com:443 - objects.githubusercontent.com:443 - pypi.org:443 - files.pythonhosted.org:443 - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - - - name: Install pytest + PyYAML - # PyYAML needed by scripts/lint_workflow_triggers.py, exercised via subprocess - # by tests/security/test_lint_workflow_triggers.py. (See unsloth PR #5397: without - # pyyaml the lint script exits 2.) - run: pip install pytest==9.0.3 pyyaml==6.0.2 - - - name: Run security regression tests - run: python3 -m pytest tests/security -v diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml deleted file mode 100644 index 1a4cf841d..000000000 --- a/.github/workflows/stale.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: 'Inactive Issue Pinger' - -on: - schedule: - - cron: '30 5 * * *' # Runs at 5:30 UTC every day - -jobs: - stale: - runs-on: ubuntu-latest - permissions: - issues: write - - steps: - - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0 - with: - # The message to post on stale issues. - # This message will ping the issue author. - # Note: The stale bot action does not currently support a direct placeholder for the last commenter. - # As a workaround, this message encourages any participant to reply. - stale-issue-message: > - Is this issue still important to you? - Apologies in advance we might have missed this issue as well. - For faster response times, please post on our Reddit server - https://www.reddit.com/r/unsloth or our Discord - https://discord.com/invite/unsloth - - # The number of days of inactivity before an issue is considered stale. - days-before-issue-stale: 9999 - - # Set to -1 to never close stale issues. - days-before-issue-close: -1 - - # A label to apply to stale issues. - stale-issue-label: 'inactive' - - # The number of operations to perform per run to avoid rate limiting. - operations-per-run: 500 - - enable-statistics: false diff --git a/.github/workflows/studio-export-fix-ci.yml b/.github/workflows/studio-export-fix-ci.yml deleted file mode 100644 index 699b78d16..000000000 --- a/.github/workflows/studio-export-fix-ci.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: studio-export-fix-ci - -on: - push: - branches: [main, nightly] - paths: - - "unsloth_zoo/llama_cpp.py" - - "tests/test_quantize_gguf_q2_k_l.py" - - "tests/test_convert_hf_to_gguf_patcher.py" - - ".github/workflows/studio-export-fix-ci.yml" - pull_request: - paths: - - "unsloth_zoo/llama_cpp.py" - - "tests/test_quantize_gguf_q2_k_l.py" - - "tests/test_convert_hf_to_gguf_patcher.py" - - ".github/workflows/studio-export-fix-ci.yml" - -concurrency: - group: studio-export-fix-${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - studio-export-fix: - name: ${{ matrix.os }} - strategy: - fail-fast: false - # Cap matrix at 3 in flight so Windows stays under the repo-level - # 5-concurrent-Windows-runner limit when this job runs alongside others. - max-parallel: 3 - matrix: - os: [ubuntu-latest, macos-14, windows-latest] - runs-on: ${{ matrix.os }} - timeout-minutes: 15 - env: - # 5000/h vs 60/h on raw.githubusercontent.com for the live-upstream tests. - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - UNSLOTH_COMPILE_DISABLE: '1' - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - cache: pip - - - name: Install minimal test deps - run: | - python -m pip install --upgrade pip - # Pure-Python tests: monkeypatch subprocess + AST-parse upstream files. - # No torch / transformers needed. Keep slim so Windows cold start stays under a minute. - python -m pip install pytest psutil requests tqdm - - - name: Run patcher + q2_k_l unit tests - shell: bash - run: | - pytest -v \ - tests/test_quantize_gguf_q2_k_l.py \ - tests/test_convert_hf_to_gguf_patcher.py diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml deleted file mode 100644 index 626e8dccb..000000000 --- a/.github/workflows/wheel-smoke.yml +++ /dev/null @@ -1,118 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# Build PyPI wheel + sdist, verify content sanity, import-smoke in a clean venv. -# Adapted from unsloth's wheel-smoke.yml; zoo's content checks: package present, -# no tests/ shipped, no stray .pyc, real version string, import smoke succeeds. - -name: Wheel CI - -on: - pull_request: - paths: - - 'pyproject.toml' - - 'unsloth_zoo/**' - - 'tests/**' - - '.github/workflows/wheel-smoke.yml' - push: - branches: [main] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - wheel: - name: Wheel build + content sanity + import smoke - runs-on: ubuntu-latest - timeout-minutes: 15 - steps: - - name: Harden runner (audit) - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 - with: - egress-policy: audit - - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: '3.12' - - - name: Build wheel + sdist - run: | - python -m pip install --upgrade pip build - rm -rf dist build ./*.egg-info - python -m build - - - name: Wheel content sanity - run: | - python - <<'PY' - import zipfile, glob, sys, re - wheels = glob.glob("dist/unsloth_zoo-*.whl") - if not wheels: - print("FAIL: no wheel produced"); sys.exit(2) - w = wheels[0] - print(f"wheel: {w}") - # Version sanity: dynamic metadata pulls from unsloth_zoo.__init__.__version__. - m = re.match(r"dist/unsloth_zoo-([^-]+)-py3-none-any\.whl", w) - version = m.group(1) if m else None - print(f"wheel version: {version}") - with zipfile.ZipFile(w) as z: - n = z.namelist() - # Hard checks: must hold for any zoo release wheel. - hard_checks = { - "unsloth_zoo/__init__.py shipped": any(s == "unsloth_zoo/__init__.py" for s in n), - "unsloth_zoo/rl_replacements.py shipped": any(s == "unsloth_zoo/rl_replacements.py" for s in n), - "unsloth_zoo/temporary_patches/__init__.py shipped": any(s == "unsloth_zoo/temporary_patches/__init__.py" for s in n), - "no .pyc files": not any(s.endswith(".pyc") for s in n), - "no .git tree": not any(s.startswith(".git/") for s in n), - "version is not 0.0.0": version is not None and version != "0.0.0", - "METADATA present": any(s.endswith(".dist-info/METADATA") for s in n), - } - # Soft checks (warn only). Zoo's pyproject doesn't exclude tests/scripts; - # tightening the packaging config is a separate follow-up. - soft_checks = { - "no tests/ shipped": not any(s.startswith("tests/") for s in n), - "no scripts/ shipped": not any(s.startswith("scripts/") for s in n), - } - print("Hard checks:") - for k, v in hard_checks.items(): - print(f" [{'PASS' if v else 'FAIL'}] {k}") - print() - print("Soft checks (warnings):") - for k, v in soft_checks.items(): - status = "PASS" if v else "WARN" - print(f" [{status}] {k}") - # Exit non-zero ONLY if a hard check failed. - sys.exit(0 if all(hard_checks.values()) else 1) - PY - - - name: Import smoke (clean venv) - # unsloth_zoo/__init__.py:128 raises ImportError when parent `unsloth` is - # absent (deliberate guardrail). A bare `import unsloth_zoo` in a wheel-only - # venv will fail by design, so the smoke pivots to reading the version - # string from dist-info METADATA via importlib.metadata. - run: | - python -m venv /tmp/v - /tmp/v/bin/pip install --upgrade pip - /tmp/v/bin/pip install dist/unsloth_zoo-*.whl - # Read version from dist-info METADATA via importlib.metadata. - WHEEL_VERSION=$(/tmp/v/bin/python -c " - from importlib.metadata import version - print(version('unsloth_zoo')) - ") - echo "installed unsloth_zoo version: $WHEEL_VERSION" - test -n "$WHEEL_VERSION" && test "$WHEEL_VERSION" != "0.0.0" - - - name: Upload wheel on failure - if: failure() - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: unsloth-zoo-wheel - path: dist/ - retention-days: 7 From cc4278df80407b6a6597adfbf281eec8ccc0a724 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Sun, 24 May 2026 12:08:09 +0000 Subject: [PATCH 13/13] [CI-validation] fix install steps for first-round failures macOS: pip install -e .[mlx] does not pull torch (correct in production since MLX replaces torch on Apple Silicon), but the new test_mlx_save_lora_adapters_filter.py uses torch via the shim. Add an explicit torch==2.10.0 install from the PyTorch CPU index (same pattern as danielhanchen/unsloth-staging-2/.github/workflows/mlx-ci.yml). Linux + Windows: unsloth_zoo/__init__.py:198 has a find_spec("unsloth") hard gate that fires before UNSLOTH_IS_PRESENT is read. Install unsloth --no-deps from git main so the import survives without dragging in unsloth's heavy CUDA-only deps. Mirror of upstream consolidated-tests-ci.yml. --- .github/workflows/mlx-pr-linux.yml | 7 ++++++- .github/workflows/mlx-pr-mac.yml | 9 ++++++++- .github/workflows/mlx-pr-windows.yml | 7 ++++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mlx-pr-linux.yml b/.github/workflows/mlx-pr-linux.yml index 070c94f1a..c8b69c230 100644 --- a/.github/workflows/mlx-pr-linux.yml +++ b/.github/workflows/mlx-pr-linux.yml @@ -54,15 +54,20 @@ jobs: python-version: '3.12' cache: 'pip' - - name: Install CPU-only torch + unsloth_zoo[core] + - name: Install CPU-only torch + unsloth_zoo[core] + unsloth shim # MLX extras intentionally skipped -- mlx/mlx-lm/mlx-vlm have no # Linux wheel. The package's pyproject already gates these on # darwin+arm64 so `.[core]` resolves cleanly here. + # + # unsloth --no-deps satisfies the find_spec("unsloth") guard at + # unsloth_zoo/__init__.py:198 without dragging in unsloth's heavy + # deps. Mirror of the upstream consolidated-tests-ci.yml pattern. run: | python -m pip install --upgrade pip pip install --index-url https://download.pytorch.org/whl/cpu \ "torch>=2.4.0,<2.11.0" pip install -e .[core] + pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true pip install pytest==9.0.3 safetensors - name: Import smoke — unsloth_zoo top-level + MLX runtime probe diff --git a/.github/workflows/mlx-pr-mac.yml b/.github/workflows/mlx-pr-mac.yml index 0c9c61df2..f2f7ba814 100644 --- a/.github/workflows/mlx-pr-mac.yml +++ b/.github/workflows/mlx-pr-mac.yml @@ -65,9 +65,16 @@ jobs: print('OK: macOS arm64 confirmed') " - - name: Install unsloth_zoo with MLX extras + - name: Install unsloth_zoo with MLX extras (+ torch for tests) + # The .[mlx] extras intentionally skip torch (pyproject gates torch + # off on darwin+arm64 since MLX replaces it). But the new + # save_lora_adapters_filter test imports torch via the shim, so we + # explicitly pull the CPU-index Apple Silicon wheel (the same one + # danielhanchen/unsloth-staging-2/mlx-ci.yml uses). run: | python -m pip install --upgrade pip + pip install --index-url https://download.pytorch.org/whl/cpu \ + 'torch==2.10.0' pip install -e .[mlx] pip install pytest==9.0.3 safetensors diff --git a/.github/workflows/mlx-pr-windows.yml b/.github/workflows/mlx-pr-windows.yml index e5f36852e..407382770 100644 --- a/.github/workflows/mlx-pr-windows.yml +++ b/.github/workflows/mlx-pr-windows.yml @@ -53,14 +53,19 @@ jobs: python-version: '3.12' cache: 'pip' - - name: Install CPU-only torch + unsloth_zoo[core] (no triton) + - name: Install CPU-only torch + unsloth_zoo[core] + unsloth shim # pyproject already gates triton on linux, so [core] resolves # cleanly on Windows without dragging it in. + # + # unsloth --no-deps satisfies the find_spec("unsloth") guard at + # unsloth_zoo/__init__.py:198 without dragging in unsloth's heavy + # deps. Mirror of the upstream consolidated-tests-ci.yml pattern. run: | python -m pip install --upgrade pip pip install --index-url https://download.pytorch.org/whl/cpu \ "torch>=2.4.0,<2.11.0" pip install -e .[core] + pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true pip install pytest==9.0.3 safetensors - name: Import smoke — unsloth_zoo top-level + MLX runtime probe