From 9e25ba57e6482c56a423efef79b556d1b777fcdd Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 19 Apr 2026 07:52:55 +0000
Subject: [PATCH 01/13] Add .gemini/config.yaml for gemini-code-assist bot
 configuration

---
 .gemini/config.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .gemini/config.yaml

diff --git a/.gemini/config.yaml b/.gemini/config.yaml
new file mode 100644
index 000000000..9cd83708f
--- /dev/null
+++ b/.gemini/config.yaml
@@ -0,0 +1,13 @@
+have_fun: false
+memory_config:
+  disabled: false
+code_review:
+  disable: false
+  comment_severity_threshold: LOW
+  max_review_comments: -1
+  pull_request_opened:
+    help: false
+    summary: false
+    code_review: false
+    include_drafts: false
+ignore_patterns: []

From 3614701ab8334168a8d4ca6db9cd1ec24880274c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 19 Apr 2026 07:52:56 +0000
Subject: [PATCH 02/13] Add .gitattributes with merge=ours for staging-only
 files

---
 .gitattributes | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..e805e6c08
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,6 @@
+# Normalize Python files to LF line endings
+*.py text eol=lf
+
+# Preserve staging-only files during upstream merges
+.gemini/**  merge=ours
+.gitattributes  merge=ours

From 9f155fc5ccb1bf3ffc77304e86395e334f7b5a21 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Wed, 20 May 2026 01:00:56 +0800
Subject: [PATCH 03/13] fix: persist MLX LoRA adapter metadata

---
 unsloth_zoo/mlx/utils.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py
index e8b7e1881..e8c56c4ce 100644
--- a/unsloth_zoo/mlx/utils.py
+++ b/unsloth_zoo/mlx/utils.py
@@ -2723,15 +2723,34 @@ def _enrich_mlx_adapter_config(model, adapter_config):
         requires_runtime = True
     adapter_config["requires_unsloth_mlx_runtime_quantization"] = bool(requires_runtime)
 
-    # why: record LoRA module paths so reload recreates vision/projector LoRA
-    # layers (mlx-lm.load_adapters only knows the language tower).
+    # why: record LoRA module paths and parameters so reload recreates the same
+    # adapter topology. Without scale metadata, reload falls back to scale=1.0
+    # even when training used alpha/r > 1, changing post-reload logits.
     try:
         lora_paths = []
+        lora_rank = None
+        lora_scale = None
+        lora_dropout = None
         for name, module in model.named_modules():
             if hasattr(module, "lora_a") and hasattr(module, "lora_b"):
                 lora_paths.append(name)
+                if lora_rank is None:
+                    lora_rank = int(module.lora_a.shape[-1])
+                    lora_scale = float(getattr(module, "scale", 1.0))
+                    drop = getattr(module, "dropout", None)
+                    lora_dropout = float(getattr(drop, "p", 0.0) if drop else 0.0)
         if lora_paths:
             adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths
+        if lora_rank is not None:
+            lora_parameters = dict(adapter_config.get("lora_parameters") or {})
+            lora_parameters.setdefault("rank", lora_rank)
+            lora_parameters.setdefault("scale", lora_scale)
+            lora_parameters.setdefault("dropout", lora_dropout)
+            adapter_config["lora_parameters"] = lora_parameters
+            adapter_config.setdefault("rank", lora_parameters["rank"])
+            adapter_config.setdefault("scale", lora_parameters["scale"])
+            adapter_config.setdefault("dropout", lora_parameters["dropout"])
+            adapter_config.setdefault("peft_type", "LORA")
     except Exception:
         pass
     return adapter_config

From a0bce35e43ffed6ff146d8223ba2e26f17afcdb9 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Wed, 20 May 2026 01:17:56 +0800
Subject: [PATCH 04/13] fix: preserve MLX LoRA dropout metadata

---
 unsloth_zoo/mlx/utils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py
index e8c56c4ce..c9fa8553d 100644
--- a/unsloth_zoo/mlx/utils.py
+++ b/unsloth_zoo/mlx/utils.py
@@ -2738,7 +2738,13 @@ def _enrich_mlx_adapter_config(model, adapter_config):
                     lora_rank = int(module.lora_a.shape[-1])
                     lora_scale = float(getattr(module, "scale", 1.0))
                     drop = getattr(module, "dropout", None)
-                    lora_dropout = float(getattr(drop, "p", 0.0) if drop else 0.0)
+                    if drop is None:
+                        lora_dropout = 0.0
+                    elif hasattr(drop, "p"):
+                        lora_dropout = float(drop.p)
+                    else:
+                        keep_probability = getattr(drop, "_p_1", 1.0)
+                        lora_dropout = float(1.0 - keep_probability)
         if lora_paths:
             adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths
         if lora_rank is not None:

From f0dd9009bdc1689dd1b276b9ab473897cc3cbe39 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Wed, 20 May 2026 01:49:02 +0800
Subject: [PATCH 05/13] fix: sync MLX LoRA adapter config fields

---
 unsloth_zoo/mlx/utils.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py
index c9fa8553d..729177c88 100644
--- a/unsloth_zoo/mlx/utils.py
+++ b/unsloth_zoo/mlx/utils.py
@@ -2745,17 +2745,23 @@ def _enrich_mlx_adapter_config(model, adapter_config):
                     else:
                         keep_probability = getattr(drop, "_p_1", 1.0)
                         lora_dropout = float(1.0 - keep_probability)
-        if lora_paths:
+        if lora_paths and "unsloth_mlx_lora_module_paths" not in adapter_config:
             adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths
         if lora_rank is not None:
             lora_parameters = dict(adapter_config.get("lora_parameters") or {})
-            lora_parameters.setdefault("rank", lora_rank)
-            lora_parameters.setdefault("scale", lora_scale)
-            lora_parameters.setdefault("dropout", lora_dropout)
+            inferred_lora_parameters = {
+                "rank": lora_rank,
+                "scale": lora_scale,
+                "dropout": lora_dropout,
+            }
+            for key, value in inferred_lora_parameters.items():
+                if key not in lora_parameters:
+                    explicit_value = adapter_config.get(key)
+                    lora_parameters[key] = value if explicit_value is None else explicit_value
             adapter_config["lora_parameters"] = lora_parameters
-            adapter_config.setdefault("rank", lora_parameters["rank"])
-            adapter_config.setdefault("scale", lora_parameters["scale"])
-            adapter_config.setdefault("dropout", lora_parameters["dropout"])
+            adapter_config["rank"] = lora_parameters["rank"]
+            adapter_config["scale"] = lora_parameters["scale"]
+            adapter_config["dropout"] = lora_parameters["dropout"]
             adapter_config.setdefault("peft_type", "LORA")
     except Exception:
         pass

From fc1297939d636643cb1643aaa88e856750a70409 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Wed, 20 May 2026 02:04:24 +0800
Subject: [PATCH 06/13] fix: prefer live MLX LoRA metadata

---
 unsloth_zoo/mlx/trainer.py | 9 ++++++++-
 unsloth_zoo/mlx/utils.py   | 4 +---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/unsloth_zoo/mlx/trainer.py b/unsloth_zoo/mlx/trainer.py
index fd2390932..71c685938 100644
--- a/unsloth_zoo/mlx/trainer.py
+++ b/unsloth_zoo/mlx/trainer.py
@@ -1397,7 +1397,14 @@ def save_model(self, output_dir=None):
                     _lora_scale = getattr(m, "scale", 1.0)
 
                     _drop = getattr(m, "dropout", None)
-                    _lora_dropout = getattr(_drop, "p", 0.0) if _drop else 0.0
+                    if _drop is None:
+                        _lora_dropout = 0.0
+                    elif hasattr(_drop, "p"):
+                        _lora_dropout = float(_drop.p)
+                    else:
+                        _lora_dropout = float(
+                            1.0 - getattr(_drop, "_p_1", 1.0)
+                        )
                     break
 
 
diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py
index 729177c88..d70f9d52e 100644
--- a/unsloth_zoo/mlx/utils.py
+++ b/unsloth_zoo/mlx/utils.py
@@ -2755,9 +2755,7 @@ def _enrich_mlx_adapter_config(model, adapter_config):
                 "dropout": lora_dropout,
             }
             for key, value in inferred_lora_parameters.items():
-                if key not in lora_parameters:
-                    explicit_value = adapter_config.get(key)
-                    lora_parameters[key] = value if explicit_value is None else explicit_value
+                lora_parameters[key] = value
             adapter_config["lora_parameters"] = lora_parameters
             adapter_config["rank"] = lora_parameters["rank"]
             adapter_config["scale"] = lora_parameters["scale"]

From 9f3d11c80fd4d11cc393e6cfb301cc1ab699e9e9 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Wed, 20 May 2026 02:17:03 +0800
Subject: [PATCH 07/13] fix: handle MLX adapter reload edge cases

---
 unsloth_zoo/mlx/loader.py  |  9 +++++++++
 unsloth_zoo/mlx/trainer.py | 21 +++++++++------------
 unsloth_zoo/mlx/utils.py   | 38 +++++++++++++++++++++++++++++---------
 3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/unsloth_zoo/mlx/loader.py b/unsloth_zoo/mlx/loader.py
index 1cfc0f742..a23b13769 100644
--- a/unsloth_zoo/mlx/loader.py
+++ b/unsloth_zoo/mlx/loader.py
@@ -1103,6 +1103,14 @@ def _apply_lora_at_paths(model, module_paths, adapter_cfg):
             setattr(parent, leaf, wrapped)
 
 
+def _eval_mlx_model_after_adapter_reload(model):
+    try:
+        model.eval()
+    except Exception:
+        pass
+    return model
+
+
 def _adapter_actual_quant_config(adapter_cfg, resolved_map):
     expected = _global_quant_params(adapter_cfg.get("base_quantization_config"))
     if expected is not None:
@@ -2435,6 +2443,7 @@ def from_pretrained(
                     else:
                         from mlx_lm.tuner.utils import load_adapters
                         model = load_adapters(model, local_path)
+                    model = _eval_mlx_model_after_adapter_reload(model)
                     loaded_model_config = getattr(model, "_config", None)
                     is_vlm_model = bool(getattr(model, "_is_vlm_model", False))
                     processor = getattr(model, "_processor", None)
diff --git a/unsloth_zoo/mlx/trainer.py b/unsloth_zoo/mlx/trainer.py
index 71c685938..d2a12dec5 100644
--- a/unsloth_zoo/mlx/trainer.py
+++ b/unsloth_zoo/mlx/trainer.py
@@ -1380,7 +1380,11 @@ def _prepare_data(self, is_vlm):
 
     def save_model(self, output_dir=None):
         """Save LoRA adapters or full merged model (if no LoRA)."""
-        from .utils import save_merged_model
+        from .utils import (
+            _get_mlx_dropout_probability,
+            _infer_mlx_lora_rank,
+            save_merged_model,
+        )
         output_dir = output_dir or self.args.output_dir
 
         trainable = dict(tree_flatten(self.model.trainable_parameters()))
@@ -1393,18 +1397,11 @@ def save_model(self, output_dir=None):
             _lora_rank, _lora_scale, _lora_dropout = 8, 1.0, 0.0
             for _, m in self.model.named_modules():
                 if hasattr(m, "lora_a"):
-                    _lora_rank = m.lora_a.shape[-1]
+                    _lora_rank = _infer_mlx_lora_rank(m) or _lora_rank
                     _lora_scale = getattr(m, "scale", 1.0)
-
-                    _drop = getattr(m, "dropout", None)
-                    if _drop is None:
-                        _lora_dropout = 0.0
-                    elif hasattr(_drop, "p"):
-                        _lora_dropout = float(_drop.p)
-                    else:
-                        _lora_dropout = float(
-                            1.0 - getattr(_drop, "_p_1", 1.0)
-                        )
+                    _lora_dropout = _get_mlx_dropout_probability(
+                        getattr(m, "dropout", None)
+                    )
                     break
 
 
diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py
index d70f9d52e..23a1a55c7 100644
--- a/unsloth_zoo/mlx/utils.py
+++ b/unsloth_zoo/mlx/utils.py
@@ -2664,6 +2664,31 @@ def _get_mlx_config_quantization(model):
     return config.get("quantization") or config.get("quantization_config")
 
 
+def _get_mlx_dropout_probability(drop):
+    if drop is None:
+        return 0.0
+    if hasattr(drop, "p"):
+        return float(drop.p)
+    keep_probability = getattr(drop, "_p_1", 1.0)
+    return float(1.0 - keep_probability)
+
+
+def _infer_mlx_lora_rank(module):
+    lora_a = getattr(module, "lora_a", None)
+    lora_b = getattr(module, "lora_b", None)
+    lora_a_shape = tuple(getattr(lora_a, "shape", ()) or ())
+    lora_b_shape = tuple(getattr(lora_b, "shape", ()) or ())
+    if len(lora_a_shape) >= 3:
+        rank = lora_a_shape[-2]
+        if not lora_b_shape or lora_b_shape[-1] == rank:
+            return int(rank)
+    if lora_a_shape and lora_b_shape and lora_a_shape[-1] == lora_b_shape[0]:
+        return int(lora_a_shape[-1])
+    if lora_a_shape:
+        return int(lora_a_shape[-1])
+    return None
+
+
 def _enrich_mlx_adapter_config(model, adapter_config):
     adapter_config = dict(adapter_config or {})
     hf_repo = getattr(model, "_hf_repo", None) or adapter_config.get("base_model_name_or_path")
@@ -2735,16 +2760,11 @@ def _enrich_mlx_adapter_config(model, adapter_config):
             if hasattr(module, "lora_a") and hasattr(module, "lora_b"):
                 lora_paths.append(name)
                 if lora_rank is None:
-                    lora_rank = int(module.lora_a.shape[-1])
+                    lora_rank = _infer_mlx_lora_rank(module)
                     lora_scale = float(getattr(module, "scale", 1.0))
-                    drop = getattr(module, "dropout", None)
-                    if drop is None:
-                        lora_dropout = 0.0
-                    elif hasattr(drop, "p"):
-                        lora_dropout = float(drop.p)
-                    else:
-                        keep_probability = getattr(drop, "_p_1", 1.0)
-                        lora_dropout = float(1.0 - keep_probability)
+                    lora_dropout = _get_mlx_dropout_probability(
+                        getattr(module, "dropout", None)
+                    )
         if lora_paths and "unsloth_mlx_lora_module_paths" not in adapter_config:
             adapter_config["unsloth_mlx_lora_module_paths"] = lora_paths
         if lora_rank is not None:

From 91535bc6eef043c9581a7a3f1fb4c15a273d73a1 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Thu, 21 May 2026 00:14:43 +0800
Subject: [PATCH 08/13] fix: handle zero-token MLX CCE inputs

---
 tests/test_mlx_runtime_cce_compile.py | 73 +++++++++++++++++++++++++++
 unsloth_zoo/mlx/cce/runtime_cce.py    | 13 +++++
 2 files changed, 86 insertions(+)

diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py
index 9168cfe0f..e646412d9 100644
--- a/tests/test_mlx_runtime_cce_compile.py
+++ b/tests/test_mlx_runtime_cce_compile.py
@@ -38,6 +38,79 @@ def _skip_torch_shim():
         pytest.skip("requires real MLX runtime")
 
 
+def test_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients():
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.zeros((0, 16), dtype=mx.float32)
+    weight = mx.zeros((32, 16), dtype=mx.float32)
+    targets = mx.zeros((0,), dtype=mx.int32)
+
+    losses = runtime_cce(hidden, weight, targets)
+    mx.eval(losses)
+    assert losses.shape == (0,)
+
+    def loss_fn(h, w):
+        return runtime_cce(h, w, targets).astype(mx.float32).sum()
+
+    loss, grads = mx.value_and_grad(loss_fn, argnums=(0, 1))(hidden, weight)
+    mx.eval(loss, *grads)
+
+    assert loss.item() == pytest.approx(0.0)
+    assert grads[0].shape == hidden.shape
+    assert grads[1].shape == weight.shape
+    assert mx.sum(mx.abs(grads[0]).astype(mx.float32)).item() == pytest.approx(0.0)
+    assert mx.sum(mx.abs(grads[1]).astype(mx.float32)).item() == pytest.approx(0.0)
+
+
+def test_quantized_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients():
+    _skip_torch_shim()
+    import mlx.nn as nn
+
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    linear = nn.Linear(32, 32, bias=False)
+    qlinear = nn.QuantizedLinear.from_linear(linear, group_size=32, bits=4)
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+        quantized=True,
+        group_size=qlinear.group_size,
+        bits=qlinear.bits,
+    )
+    hidden = mx.zeros((0, 32), dtype=mx.float32)
+    targets = mx.zeros((0,), dtype=mx.int32)
+
+    losses = runtime_cce(
+        hidden,
+        qlinear.weight,
+        qlinear.scales,
+        qlinear.biases,
+        targets,
+    )
+    mx.eval(losses)
+    assert losses.shape == (0,)
+
+    def loss_fn(h):
+        return runtime_cce(
+            h,
+            qlinear.weight,
+            qlinear.scales,
+            qlinear.biases,
+            targets,
+        ).astype(mx.float32).sum()
+
+    loss, grad = mx.value_and_grad(loss_fn)(hidden)
+    mx.eval(loss, grad)
+
+    assert loss.item() == pytest.approx(0.0)
+    assert grad.shape == hidden.shape
+    assert mx.sum(mx.abs(grad).astype(mx.float32)).item() == pytest.approx(0.0)
+
 def test_compiled_runtime_cce_preserves_aux_lse_for_gradients():
     _skip_torch_shim()
     from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 464051c97..f65668f21 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -464,6 +464,9 @@ def _forward_chunked_fused_finalize(
 
     n, _ = hidden_compute.shape
     vocab_size = weight_compute.shape[0]
+    if n == 0:
+        empty = mx.zeros((0,), dtype=mx.float32)
+        return empty, empty
     compute_bytes = 2 if hidden_compute.dtype in (mx.float16, mx.bfloat16) else 4
     chunk_size = _resolve_chunk_size(
         chunk_size,
@@ -680,6 +683,14 @@ def runtime_cce_loss_vjp(primals, cotangents, outputs):
             hidden_compute = hidden
             weight_compute = weight
             targets32 = targets.astype(mx.int32)
+            if hidden_compute.shape[0] == 0:
+                return (
+                    mx.zeros_like(hidden),
+                    mx.zeros_like(weight),
+                    mx.zeros_like(scales),
+                    mx.zeros_like(biases),
+                    mx.zeros_like(targets),
+                )
             if grad_output is None:
                 grad_output = mx.zeros_like(outputs[0])
             grad_output32 = grad_output.astype(mx.float32)
@@ -805,6 +816,8 @@ def runtime_cce_loss_vjp(primals, cotangents, outputs):
         hidden_compute = hidden
         weight_compute = weight
         targets32 = targets.astype(mx.int32)
+        if hidden_compute.shape[0] == 0:
+            return mx.zeros_like(hidden), mx.zeros_like(weight), mx.zeros_like(targets)
         if grad_output is None:
             grad_output = mx.zeros_like(outputs[0])
         grad_output32 = grad_output.astype(mx.float32)

From 41d5d411b73c7b78475923d4232c7bdc88407f43 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Thu, 21 May 2026 00:17:06 +0800
Subject: [PATCH 09/13] fix: poison invalid MLX CCE labels

---
 tests/test_mlx_runtime_cce_compile.py | 85 +++++++++++++++++++++++++++
 unsloth_zoo/mlx/cce/runtime_cce.py    | 25 +++++++-
 2 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py
index e646412d9..cbe8e9d64 100644
--- a/tests/test_mlx_runtime_cce_compile.py
+++ b/tests/test_mlx_runtime_cce_compile.py
@@ -9,6 +9,7 @@
 
 from __future__ import annotations
 
+import math
 import sys
 
 import pytest
@@ -111,6 +112,90 @@ def loss_fn(h):
     assert grad.shape == hidden.shape
     assert mx.sum(mx.abs(grad).astype(mx.float32)).item() == pytest.approx(0.0)
 
+@pytest.mark.parametrize("bad_target", [-1, 32])
+def test_runtime_cce_invalid_labels_poison_loss_and_gradients(bad_target):
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.ones((3, 16), dtype=mx.float32)
+    weight = mx.ones((32, 16), dtype=mx.float32)
+    targets = mx.array([0, bad_target, -100], dtype=mx.int32)
+
+    losses = runtime_cce(hidden, weight, targets)
+    mx.eval(losses)
+
+    assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5)
+    assert math.isnan(losses[1].item())
+    assert losses[2].item() == pytest.approx(0.0)
+
+    def loss_fn(h, w):
+        return runtime_cce(h, w, targets).astype(mx.float32).sum()
+
+    loss, grads = mx.value_and_grad(loss_fn, argnums=(0, 1))(hidden, weight)
+    grad_norm = _stable_norm(grads)
+    mx.eval(loss, grad_norm)
+
+    assert math.isnan(loss.item())
+    assert math.isnan(grad_norm.item())
+
+
+def test_compiled_runtime_cce_invalid_labels_poison_loss():
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.ones((2, 16), dtype=mx.float32)
+    weight = mx.ones((32, 16), dtype=mx.float32)
+    targets = mx.array([0, 32], dtype=mx.int32)
+
+    def losses_fn(h, w, t):
+        return runtime_cce(h, w, t)
+
+    losses = mx.compile(losses_fn)(hidden, weight, targets)
+    mx.eval(losses)
+
+    assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5)
+    assert math.isnan(losses[1].item())
+
+
+def test_quantized_runtime_cce_invalid_labels_poison_loss():
+    _skip_torch_shim()
+    import mlx.nn as nn
+
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    linear = nn.Linear(32, 32, bias=False)
+    linear.weight = mx.ones((32, 32), dtype=mx.float32)
+    qlinear = nn.QuantizedLinear.from_linear(linear, group_size=32, bits=4)
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+        quantized=True,
+        group_size=qlinear.group_size,
+        bits=qlinear.bits,
+    )
+    hidden = mx.ones((2, 32), dtype=mx.float32)
+    targets = mx.array([0, 32], dtype=mx.int32)
+
+    losses = runtime_cce(
+        hidden,
+        qlinear.weight,
+        qlinear.scales,
+        qlinear.biases,
+        targets,
+    )
+    mx.eval(losses)
+
+    assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5)
+    assert math.isnan(losses[1].item())
+
 def test_compiled_runtime_cce_preserves_aux_lse_for_gradients():
     _skip_torch_shim()
     from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index f65668f21..99081e5fe 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -115,6 +115,24 @@ def _apply_softcap(logits: mx.array, logit_softcap: float) -> mx.array:
     return softcap * mx.tanh(logits / softcap)
 
 
+def _target_validity_masks(
+    targets: mx.array,
+    vocab_size: int,
+    ignore_index: int,
+) -> tuple[mx.array, mx.array]:
+    in_vocab = (targets >= 0) & (targets < vocab_size)
+    not_ignored = targets != ignore_index
+    return not_ignored & in_vocab, not_ignored & ~in_vocab
+
+
+def _poison_invalid_targets(values: mx.array, invalid: mx.array) -> mx.array:
+    return mx.where(
+        invalid,
+        mx.full(values.shape, float("nan"), dtype=values.dtype),
+        values,
+    )
+
+
 def _chunk_matmul(
     x: mx.array,
     weight: mx.array,
@@ -509,8 +527,10 @@ def _forward_chunked_fused_finalize(
             target_logit = mx.where(in_chunk, chunk_target, target_logit)
 
         lse = running_max + mx.log(running_sum_exp + 1e-9)
-        valid = targets != ignore_index
+        valid, invalid = _target_validity_masks(targets, vocab_size, ignore_index)
         loss = mx.where(valid, lse - target_logit, mx.zeros_like(lse))
+        loss = _poison_invalid_targets(loss, invalid)
+        lse = _poison_invalid_targets(lse, invalid)
         return loss, lse
 
     ignore_arr = mx.array([ignore_index], dtype=mx.int32)
@@ -551,6 +571,9 @@ def _forward_chunked_fused_finalize(
                 grid=(n * 256, 1, 1),
                 threadgroup=(256, 1, 1),
             )
+            _, invalid = _target_validity_masks(targets, vocab_size, ignore_index)
+            loss = _poison_invalid_targets(loss, invalid)
+            lse = _poison_invalid_targets(lse, invalid)
             return loss, lse
 
         running_max, running_sum_exp, target_logit = forward_update_kernel(

From 76cc2dad6f94c36cc87b6773558d3ff5e5f7ea6b Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Sat, 23 May 2026 02:27:23 +0800
Subject: [PATCH 10/13] fix(mlx): save only adapter tensors

---
 unsloth_zoo/mlx/trainer.py |  4 ++--
 unsloth_zoo/mlx/utils.py   | 43 ++++++++++++++++++++++++++++----------
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/unsloth_zoo/mlx/trainer.py b/unsloth_zoo/mlx/trainer.py
index fd2390932..79d8b80ab 100644
--- a/unsloth_zoo/mlx/trainer.py
+++ b/unsloth_zoo/mlx/trainer.py
@@ -64,7 +64,7 @@
     normalize_mlx_chat_template,
     normalize_vlm_processor_chat_template,
     collect_mlx_texts,
-    save_lora_adapters,
+    save_trainable_adapters,
     apply_gradient_checkpointing,
     remove_gradient_checkpointing,
     _is_vlm_model,
@@ -1235,7 +1235,7 @@ def step_fn(batch_data, prev_state, do_update):
             # Checkpointing
             if args.save_steps > 0 and current_step % args.save_steps == 0:
                 ckpt_dir = f"{args.output_dir}/checkpoint-{current_step}"
-                save_lora_adapters(model, ckpt_dir)
+                save_trainable_adapters(model, ckpt_dir)
                 print(f"  Saved checkpoint to {ckpt_dir}")
 
         total_time = time.perf_counter() - start_time
diff --git a/unsloth_zoo/mlx/utils.py b/unsloth_zoo/mlx/utils.py
index e8b7e1881..4b8444177 100644
--- a/unsloth_zoo/mlx/utils.py
+++ b/unsloth_zoo/mlx/utils.py
@@ -2541,6 +2541,25 @@ def iterate_training_batches(dataset, tokenizer, batch_size, max_seq_length,
         yield batch, lengths_info, None
 
 
+def _save_adapter_artifacts(model, path, tensors, adapter_config=None):
+    path = Path(path)
+    path.mkdir(parents=True, exist_ok=True)
+
+    if tensors:
+        mx.save_safetensors(str(path / "adapters.safetensors"), tensors)
+
+    adapter_config = _enrich_mlx_adapter_config(model, adapter_config or {})
+    if adapter_config:
+        with open(path / "adapter_config.json", "w") as f:
+            json.dump(adapter_config, f, indent=2)
+
+
+def save_trainable_adapters(model, path, adapter_config=None):
+    """Save the current trainable parameter tree for training checkpoints."""
+    trainable = dict(mlx.utils.tree_flatten(model.trainable_parameters()))
+    _save_adapter_artifacts(model, path, trainable, adapter_config=adapter_config)
+
+
 def save_lora_adapters(model, path, adapter_config=None):
     """Save LoRA adapter weights to disk.
 
@@ -2549,19 +2568,21 @@ def save_lora_adapters(model, path, adapter_config=None):
         path: Directory to save adapters.
         adapter_config: Optional dict with LoRA config metadata.
     """
-    path = Path(path)
-    path.mkdir(parents=True, exist_ok=True)
-
-    # Collect only trainable (LoRA) parameters — flatten nested dict for safetensors
-    trainable = dict(mlx.utils.tree_flatten(model.trainable_parameters()))
+    parameters = dict(mlx.utils.tree_flatten(model.parameters()))
+    adapter_tensors = {
+        name: value
+        for name, value in parameters.items()
+        if "lora_" in name.lower()
+    }
 
-    if trainable:
-        mx.save_safetensors(str(path / "adapters.safetensors"), trainable)
+    if not adapter_tensors:
+        raise ValueError(
+            "Unsloth: no MLX LoRA adapter tensors were found to save."
+        )
 
-    adapter_config = _enrich_mlx_adapter_config(model, adapter_config or {})
-    if adapter_config:
-        with open(path / "adapter_config.json", "w") as f:
-            json.dump(adapter_config, f, indent=2)
+    _save_adapter_artifacts(
+        model, path, adapter_tensors, adapter_config=adapter_config
+    )
 
 
 def _infer_snapshot_commit(path):

From 969a23d54d6d16f691e28543ea798c224fc7a77b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 12:04:01 +0000
Subject: [PATCH 11/13] [CI-validation] Run unsloth-zoo MLX PRs 679/682/692 on
 real CI runners

Combines three open MLX-only PRs against unslothai/unsloth-zoo into one
staging branch and wires up real ubuntu-latest / macos-14 / windows-latest
GitHub Actions runs to validate them together:

  - unslothai/unsloth-zoo#679 fix(mlx): persist LoRA adapter metadata on save
  - unslothai/unsloth-zoo#682 fix(mlx): handle zero-token and invalid labels in CCE
  - unslothai/unsloth-zoo#692 fix(mlx): save only LoRA adapter tensors

This branch is intentionally throwaway; do not merge into staging/main.
It is the iteration unit for the three workflow files below.

PR #679 + PR #692 both touch unsloth_zoo/mlx/{utils,trainer}.py. Git's
ort strategy auto-resolved cleanly:
  - utils.py keeps PR #692's _save_adapter_artifacts helper + lora_-name
    filter in save_lora_adapters, PLUS PR #679's _get_mlx_dropout_probability,
    _infer_mlx_lora_rank, and the expanded _enrich_mlx_adapter_config that
    writes lora_parameters / rank / scale / dropout / peft_type=LORA.
  - trainer.py imports both save_trainable_adapters (PR #692) and the
    helpers from PR #679, with checkpoint saves switched to the new
    save_trainable_adapters and final adapter export still calling
    save_lora_adapters.

New scaffolding:
  - tests/test_mlx_save_lora_adapters_filter.py: four tests over the
    combined PR #692 + PR #679 surface (LoRA-only filter, metadata fields,
    no-adapter ValueError, trainable-checkpoint preserves everything).
    Closes the PR #692 coverage gap Copilot flagged. Uses mlx_simulation
    so it runs on Linux + Windows too.
  - tests/_zoo_aggressive_cuda_spoof.py: deeper torch.cuda spoof copied
    from danielhanchen/unsloth-staging-2, kept available for harder
    import paths that escape tests/conftest.py's device-type preload.
  - .github/workflows/mlx-pr-mac.yml: macos-14, real MLX install,
    PR-specific pytest set. Primary green signal.
  - .github/workflows/mlx-pr-linux.yml: ubuntu-latest, CPU torch + no-MLX
    install, import smoke + the new save_lora_adapters_filter shim test.
  - .github/workflows/mlx-pr-windows.yml: same as Linux but pinned to
    shell: bash everywhere; no triton.

All three workflows trigger only on push to this staging branch with
paths: filters and cancel-in-progress so force-pushes during iteration
do not queue.
---
 .github/workflows/mlx-pr-linux.yml          |  96 ++++++++
 .github/workflows/mlx-pr-mac.yml            | 115 ++++++++++
 .github/workflows/mlx-pr-windows.yml        |  86 +++++++
 tests/_zoo_aggressive_cuda_spoof.py         | 214 ++++++++++++++++++
 tests/test_mlx_save_lora_adapters_filter.py | 238 ++++++++++++++++++++
 5 files changed, 749 insertions(+)
 create mode 100644 .github/workflows/mlx-pr-linux.yml
 create mode 100644 .github/workflows/mlx-pr-mac.yml
 create mode 100644 .github/workflows/mlx-pr-windows.yml
 create mode 100644 tests/_zoo_aggressive_cuda_spoof.py
 create mode 100644 tests/test_mlx_save_lora_adapters_filter.py

diff --git a/.github/workflows/mlx-pr-linux.yml b/.github/workflows/mlx-pr-linux.yml
new file mode 100644
index 000000000..070c94f1a
--- /dev/null
+++ b/.github/workflows/mlx-pr-linux.yml
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+
+# Linux import-smoke guardrail for the three MLX-only PRs validated by this
+# staging branch. MLX wheels do not install on Linux, so this job CANNOT
+# exercise Metal kernels -- it confirms (a) the PR diffs do not break
+# non-MLX imports of unsloth_zoo, and (b) the new mlx_simulation-backed
+# adapter-save test catches the LoRA filter / metadata regressions on a
+# CPU runner.
+
+name: MLX PR validation (Linux import smoke)
+
+on:
+  push:
+    branches: [staging/mlx-prs-679-682-692]
+    paths:
+      - 'unsloth_zoo/**.py'
+      - 'tests/test_mlx_save_lora_adapters_filter.py'
+      - 'tests/mlx_simulation/**'
+      - 'tests/conftest.py'
+      - 'tests/_zoo_aggressive_cuda_spoof.py'
+      - 'pyproject.toml'
+      - '.github/workflows/mlx-pr-linux.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  mlx-pr-linux:
+    name: MLX PR import smoke on Linux
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    env:
+      UNSLOTH_IS_PRESENT: '1'
+      UNSLOTH_COMPILE_DISABLE: '1'
+      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
+    steps:
+      - name: Harden runner (audit)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install CPU-only torch + unsloth_zoo[core]
+        # MLX extras intentionally skipped -- mlx/mlx-lm/mlx-vlm have no
+        # Linux wheel. The package's pyproject already gates these on
+        # darwin+arm64 so `.[core]` resolves cleanly here.
+        run: |
+          python -m pip install --upgrade pip
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            "torch>=2.4.0,<2.11.0"
+          pip install -e .[core]
+          pip install pytest==9.0.3 safetensors
+
+      - name: Import smoke — unsloth_zoo top-level + MLX runtime probe
+        # tests/conftest.py already preloads unsloth_zoo.device_type under a
+        # spoofed torch.cuda.is_available(); this step proves the diff did
+        # not move the goal posts.
+        run: |
+          python -c "
+          import sys; sys.path.insert(0, 'tests')
+          import _zoo_aggressive_cuda_spoof as s; s.apply()
+          import unsloth_zoo
+          from unsloth_zoo.mlx import is_mlx_available
+          assert is_mlx_available() is False, 'MLX should be unavailable on Linux'
+          print('OK: unsloth_zoo imports cleanly; MLX correctly reports unavailable')
+          "
+
+      - name: PR #692 + PR #679 — save_lora_adapters filter + metadata (shim)
+        # Runs against the mlx_simulation shim; this is the only PR-specific
+        # signal the Linux job can produce, but it's a real one.
+        run: python -m pytest tests/test_mlx_save_lora_adapters_filter.py -v
+
+      - name: Source-level MLX tests (no mlx.core needed)
+        # These read MLX module source as text and assert on signatures /
+        # constants. They run identically on every platform and catch
+        # accidental drift introduced by the merged PRs.
+        continue-on-error: true
+        run: |
+          python -m pytest -v \
+            tests/test_mlx_baseline_loss_parity.py \
+            tests/test_mlx_get_peft_model_seed_ordering.py \
+            tests/test_mlx_max_grad_value_none.py
diff --git a/.github/workflows/mlx-pr-mac.yml b/.github/workflows/mlx-pr-mac.yml
new file mode 100644
index 000000000..0c9c61df2
--- /dev/null
+++ b/.github/workflows/mlx-pr-mac.yml
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+
+# Throwaway staging-fork CI validation for three open MLX-only PRs against
+# unslothai/unsloth-zoo:
+#   - PR #679  fix(mlx): persist LoRA adapter metadata on save
+#   - PR #682  fix(mlx): handle zero-token and invalid labels in CCE
+#   - PR #692  fix(mlx): save only LoRA adapter tensors
+#
+# macos-14 is the only platform where MLX wheels resolve, so this is the
+# only workflow that exercises real MLX kernels + the real LoRA save/reload
+# round-trip these PRs touch.
+#
+# Push-only trigger on the staging branch -- avoids burning macOS minutes on
+# unrelated upstream PR events. cancel-in-progress so iterating with
+# force-pushes does not queue up multiple concurrent runs.
+
+name: MLX PR validation (macOS Apple Silicon)
+
+on:
+  push:
+    branches: [staging/mlx-prs-679-682-692]
+    paths:
+      - 'unsloth_zoo/mlx/**'
+      - 'tests/test_mlx_*.py'
+      - 'tests/mlx_simulation/**'
+      - 'tests/conftest.py'
+      - 'pyproject.toml'
+      - '.github/workflows/mlx-pr-mac.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  mlx-pr-mac:
+    name: MLX PR validation on Apple Silicon
+    runs-on: macos-14
+    timeout-minutes: 30
+    steps:
+      - name: Harden runner (audit)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Confirm runner is real Apple Silicon
+        run: |
+          python -c "
+          import platform
+          assert platform.system() == 'Darwin', platform.system()
+          assert platform.machine() == 'arm64', platform.machine()
+          print('OK: macOS arm64 confirmed')
+          "
+
+      - name: Install unsloth_zoo with MLX extras
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[mlx]
+          pip install pytest==9.0.3 safetensors
+
+      - name: Smoke-import MLX submodules (PRs land here)
+        run: |
+          python -c "
+          import importlib
+          for name in [
+              'unsloth_zoo.mlx.loader',
+              'unsloth_zoo.mlx.trainer',
+              'unsloth_zoo.mlx.utils',
+              'unsloth_zoo.mlx.compile',
+              'unsloth_zoo.mlx.runtime',
+              'unsloth_zoo.mlx.cce',
+              'unsloth_zoo.mlx.cce.runtime_cce',
+          ]:
+              importlib.import_module(name)
+              print('OK:', name)
+          import mlx.core as mx
+          print('OK: mlx.core', mx.__version__ if hasattr(mx, '__version__') else '(version unknown)')
+          "
+
+      - name: PR #692 + PR #679 — save_lora_adapters filter + metadata
+        run: python -m pytest tests/test_mlx_save_lora_adapters_filter.py -v
+
+      - name: PR #682 — CCE zero-token + invalid-label + compile-mode
+        run: python -m pytest tests/test_mlx_runtime_cce_compile.py -v
+
+      - name: PR #679 — get_peft_model finetune_last_n_layers passthrough
+        # Pre-existing FakeModel/trainable_parameters issue on upstream main
+        # for one case -- continue-on-error so we still get the rest of the
+        # signal, and remove this once upstream lands a fix.
+        continue-on-error: true
+        run: python -m pytest tests/test_mlx_finetune_last_n_layers.py -v
+
+      - name: Regression breadth — remaining MLX-related tests
+        continue-on-error: true
+        run: |
+          python -m pytest -v \
+            tests/test_mlx_torch_shim_smoke.py \
+            tests/test_mlx_baseline_loss_parity.py \
+            tests/test_mlx_batch_padding.py \
+            tests/test_mlx_dtype_downcast_warning.py \
+            tests/test_mlx_max_grad_value_none.py \
+            tests/test_mlx_get_peft_model_seed_ordering.py
diff --git a/.github/workflows/mlx-pr-windows.yml b/.github/workflows/mlx-pr-windows.yml
new file mode 100644
index 000000000..e5f36852e
--- /dev/null
+++ b/.github/workflows/mlx-pr-windows.yml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+
+# Windows import-smoke guardrail for the three MLX-only PRs. Same purpose
+# as the Linux job: prove the diffs do not break non-MLX imports + run the
+# shim-backed adapter-save test. Two Windows-specific differences from
+# Linux:
+#   - shell: bash on every step (Git Bash; PowerShell would force
+#     rewriting the python -c heredocs).
+#   - no triton dep (no Windows wheel).
+
+name: MLX PR validation (Windows import smoke)
+
+on:
+  push:
+    branches: [staging/mlx-prs-679-682-692]
+    paths:
+      - 'unsloth_zoo/**.py'
+      - 'tests/test_mlx_save_lora_adapters_filter.py'
+      - 'tests/mlx_simulation/**'
+      - 'tests/conftest.py'
+      - 'tests/_zoo_aggressive_cuda_spoof.py'
+      - 'pyproject.toml'
+      - '.github/workflows/mlx-pr-windows.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  mlx-pr-windows:
+    name: MLX PR import smoke on Windows
+    runs-on: windows-latest
+    timeout-minutes: 25
+    env:
+      UNSLOTH_IS_PRESENT: '1'
+      UNSLOTH_COMPILE_DISABLE: '1'
+      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install CPU-only torch + unsloth_zoo[core] (no triton)
+        # pyproject already gates triton on linux, so [core] resolves
+        # cleanly on Windows without dragging it in.
+        run: |
+          python -m pip install --upgrade pip
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            "torch>=2.4.0,<2.11.0"
+          pip install -e .[core]
+          pip install pytest==9.0.3 safetensors
+
+      - name: Import smoke — unsloth_zoo top-level + MLX runtime probe
+        run: |
+          python -c "
+          import sys; sys.path.insert(0, 'tests')
+          import _zoo_aggressive_cuda_spoof as s; s.apply()
+          import unsloth_zoo
+          from unsloth_zoo.mlx import is_mlx_available
+          assert is_mlx_available() is False, 'MLX should be unavailable on Windows'
+          print('OK: unsloth_zoo imports cleanly; MLX correctly reports unavailable')
+          "
+
+      - name: PR #692 + PR #679 — save_lora_adapters filter + metadata (shim)
+        run: python -m pytest tests/test_mlx_save_lora_adapters_filter.py -v
+
+      - name: Source-level MLX tests (no mlx.core needed)
+        continue-on-error: true
+        run: |
+          python -m pytest -v \
+            tests/test_mlx_baseline_loss_parity.py \
+            tests/test_mlx_get_peft_model_seed_ordering.py \
+            tests/test_mlx_max_grad_value_none.py
diff --git a/tests/_zoo_aggressive_cuda_spoof.py b/tests/_zoo_aggressive_cuda_spoof.py
new file mode 100644
index 000000000..eaafe445f
--- /dev/null
+++ b/tests/_zoo_aggressive_cuda_spoof.py
@@ -0,0 +1,214 @@
+# Auto-generated by .github/workflows/consolidated-tests-ci.yml.
+# Aggressive CUDA spoof for the consolidated CPU-only CI job. Extends
+# tests/conftest.py:84-141's import-time harness with deeper patches that
+# unblock more patch_* functions and unsloth_zoo init paths on a GPU-less
+# runner. Imported by every shim test file in this workflow before any
+# unsloth / unsloth_zoo / transformers import.
+#
+# Design: only no-op or value-returning patches. We do NOT replace tensor
+# allocators. The single exception is `pin_memory=True` kwarg dropping,
+# which converts a hard CUDA-required call into a CPU-OK call -- the
+# intent of pin_memory is a CUDA-host fast-copy, which simply has no
+# meaning on this runner; downgrading silently is the right behavior here.
+
+from __future__ import annotations
+
+import sys
+import types
+from typing import Any
+
+
+def apply() -> None:
+    """Apply the spoof. Idempotent: calling again has no effect."""
+    import torch
+
+    if getattr(torch.cuda, "_unsloth_consolidated_spoof", False):
+        return
+
+    # ----- device probes (cheap, value-returning) -------------------------
+    torch.cuda.is_available = lambda: True
+    torch.cuda.device_count = lambda: 1
+    torch.cuda.current_device = lambda: 0
+    torch.cuda.is_initialized = lambda: True
+    torch.cuda.set_device = lambda *a, **k: None
+    torch.cuda.synchronize = lambda *a, **k: None
+    torch.cuda.empty_cache = lambda *a, **k: None
+    torch.cuda.get_device_name = lambda *a, **k: "NVIDIA A100-SPOOFED"
+    torch.cuda.get_device_capability = lambda *a, **k: (8, 0)
+    torch.cuda.is_bf16_supported = lambda *a, **k: True
+    torch.cuda._is_in_bad_fork = lambda *a, **k: False  # type: ignore[attr-defined]
+
+    class _Props:
+        name = "NVIDIA A100-SPOOFED"
+        major = 8
+        minor = 0
+        total_memory = 80 * 1024**3
+        multi_processor_count = 108
+        is_integrated = False
+        is_multi_gpu_board = False
+
+    torch.cuda.get_device_properties = lambda *a, **k: _Props()  # type: ignore[assignment]
+
+    # ----- cudart() wrapper -----------------------------------------------
+    class _CudaRt:
+        @staticmethod
+        def cudaMemGetInfo(device: int = 0):
+            return (0, 80 * 1024**3)
+
+        @staticmethod
+        def cudaGetDeviceCount(*_a, **_k):
+            return 0  # Not used on the spoof path
+
+        @staticmethod
+        def cudaSetDevice(*_a, **_k):
+            return 0
+
+    torch.cuda.cudart = lambda: _CudaRt()  # type: ignore[assignment]
+
+    # ----- memory module --------------------------------------------------
+    try:
+        import torch.cuda.memory as _cuda_memory  # type: ignore
+
+        _cuda_memory.mem_get_info = lambda *a, **k: (0, 80 * 1024**3)
+        _cuda_memory.memory_stats = lambda *a, **k: {}
+        _cuda_memory.memory_allocated = lambda *a, **k: 0
+        _cuda_memory.max_memory_allocated = lambda *a, **k: 0
+        _cuda_memory.memory_reserved = lambda *a, **k: 0
+        _cuda_memory.max_memory_reserved = lambda *a, **k: 0
+        _cuda_memory.reset_peak_memory_stats = lambda *a, **k: None
+    except Exception:
+        pass
+
+    # ----- nvtx no-op stub ------------------------------------------------
+    nvtx_stub = types.ModuleType("torch.cuda.nvtx")
+    nvtx_stub.range_push = lambda *a, **k: None  # type: ignore[attr-defined]
+    nvtx_stub.range_pop = lambda *a, **k: None  # type: ignore[attr-defined]
+    nvtx_stub.mark = lambda *a, **k: None  # type: ignore[attr-defined]
+    sys.modules.setdefault("torch.cuda.nvtx", nvtx_stub)
+    torch.cuda.nvtx = nvtx_stub  # type: ignore[attr-defined]
+
+    # ----- random API ----------------------------------------------------
+    # CRITICAL: torch.manual_seed() internally calls torch.cuda.manual_seed_all(),
+    # so routing the cuda seed APIs back through torch.manual_seed would
+    # infinite-recurse (observed as RecursionError in run #8 cells 2/3 of the
+    # consolidated CI matrix). No-op them: callers that explicitly seed CUDA
+    # have already paid the cost of seeding CPU via torch.manual_seed; the
+    # CUDA-side seeding has no meaning on a GPU-less runner.
+    torch.cuda.manual_seed = lambda *a, **k: None  # type: ignore[assignment]
+    torch.cuda.manual_seed_all = lambda *a, **k: None  # type: ignore[assignment]
+    # rng_state APIs: return a CPU-shaped placeholder and accept anything for
+    # set; do NOT route through torch.set_rng_state / get_rng_state -- those
+    # operate on the CPU RNG directly and are independent of the cuda surface.
+    import torch as _t
+
+    _empty_rng_state = _t.empty(0, dtype = _t.uint8)
+    torch.cuda.get_rng_state = lambda *a, **k: _empty_rng_state.clone()  # type: ignore[assignment]
+    torch.cuda.set_rng_state = lambda *a, **k: None  # type: ignore[assignment]
+    torch.cuda.get_rng_state_all = lambda *a, **k: [_empty_rng_state.clone()]  # type: ignore[attr-defined]
+    torch.cuda.set_rng_state_all = lambda *a, **k: None  # type: ignore[attr-defined]
+    torch.cuda.initial_seed = lambda *a, **k: 0  # type: ignore[assignment]
+    torch.cuda.seed = lambda *a, **k: None  # type: ignore[assignment]
+    torch.cuda.seed_all = lambda *a, **k: None  # type: ignore[assignment]
+
+    # ----- Stream / Event no-op classes -----------------------------------
+    class _NoopStream:
+        def __init__(self, *a, **k): ...
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *a):
+            return False
+
+        def synchronize(self, *a, **k): ...
+        def wait_stream(self, *a, **k): ...
+        def query(self):
+            return True
+
+    class _NoopEvent:
+        def __init__(self, *a, **k): ...
+        def record(self, *a, **k): ...
+        def wait(self, *a, **k): ...
+        def query(self):
+            return True
+
+        def synchronize(self, *a, **k): ...
+        def elapsed_time(self, *a, **k):
+            return 0.0
+
+    torch.cuda.Stream = _NoopStream  # type: ignore[assignment]
+    torch.cuda.Event = _NoopEvent  # type: ignore[assignment]
+    torch.cuda.stream = lambda s: s if s is not None else _NoopStream()  # type: ignore[assignment]
+    torch.cuda.current_stream = lambda *a, **k: _NoopStream()  # type: ignore[assignment]
+    torch.cuda.default_stream = lambda *a, **k: _NoopStream()  # type: ignore[assignment]
+
+    # ----- pin_memory drop -------------------------------------------------
+    # `torch.empty(..., pin_memory=True)` and friends raise on a CPU-only
+    # build. Strip the kwarg — pin_memory has no meaning here.
+    for _name in (
+        "empty",
+        "zeros",
+        "ones",
+        "empty_like",
+        "zeros_like",
+        "ones_like",
+        "rand",
+        "randn",
+        "randint",
+    ):
+        _orig = getattr(torch, _name, None)
+        if _orig is None:
+            continue
+
+        def _wrap(*args: Any, _orig = _orig, **kwargs: Any):
+            kwargs.pop("pin_memory", None)
+            return _orig(*args, **kwargs)
+
+        setattr(torch, _name, _wrap)
+
+    # Tensor.pin_memory() instance method: also a no-op (return self).
+    if hasattr(torch.Tensor, "pin_memory"):
+        torch.Tensor.pin_memory = lambda self, *a, **k: self  # type: ignore[assignment]
+    if hasattr(torch.Tensor, "is_pinned"):
+        torch.Tensor.is_pinned = lambda self, *a, **k: False  # type: ignore[assignment]
+
+    # ----- amp.GradScaler: use the real one if torch ships a CPU-friendly
+    # path, else stub. Newer torch ships torch.amp.GradScaler that handles
+    # CPU; torch.cuda.amp.GradScaler is a wrapper. Both should work; just
+    # guard against import error.
+    try:
+        import torch.cuda.amp  # type: ignore
+    except Exception:
+        cuda_amp = types.ModuleType("torch.cuda.amp")
+
+        class _StubScaler:
+            def __init__(self, *a, **k): ...
+            def scale(self, x):
+                return x
+
+            def step(self, opt):
+                opt.step()
+
+            def update(self, *a, **k): ...
+            def unscale_(self, *a, **k): ...
+            def get_scale(self):
+                return 1.0
+
+            def is_enabled(self):
+                return False
+
+            def state_dict(self):
+                return {}
+
+            def load_state_dict(self, *a, **k): ...
+
+        cuda_amp.GradScaler = _StubScaler  # type: ignore[attr-defined]
+        sys.modules.setdefault("torch.cuda.amp", cuda_amp)
+        torch.cuda.amp = cuda_amp  # type: ignore[attr-defined]
+
+    # ----- Sentinel ------------------------------------------------------
+    torch.cuda._unsloth_consolidated_spoof = True  # type: ignore[attr-defined]
+
+
+if __name__ == "__main__":
+    apply()
+    print("CUDA spoof applied.")
diff --git a/tests/test_mlx_save_lora_adapters_filter.py b/tests/test_mlx_save_lora_adapters_filter.py
new file mode 100644
index 000000000..b227081fa
--- /dev/null
+++ b/tests/test_mlx_save_lora_adapters_filter.py
@@ -0,0 +1,238 @@
+# Unsloth Zoo - Utilities for Unsloth
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""Combined coverage for unslothai/unsloth-zoo PR #692 + PR #679.
+
+PR #692 (fix-mlx-export-adapters) makes ``save_lora_adapters`` keep only
+tensors whose flattened name contains ``lora_``. PR #679
+(fix/mlx-lora-adapter-metadata) makes the same save path persist live
+``rank`` / ``scale`` / ``dropout`` plus ``peft_type=LORA`` into
+``adapter_config.json``.
+
+Neither PR shipped a test that exercises the combined surface; this
+file closes that gap. Uses the ``mlx_simulation`` shim so it runs on
+non-Apple CI (Linux/Windows) as well as macOS.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+import torch
+
+
+@pytest.fixture(autouse=True, scope="module")
+def _install_shim():
+    from mlx_simulation import simulate_mlx_on_torch
+    simulate_mlx_on_torch()
+
+
+# ---------------------------------------------------------------------------
+# Minimal mock LoRA module: exposes the attributes _enrich_mlx_adapter_config
+# inspects (``lora_a``, ``lora_b``, ``scale``, ``dropout``) and the iteration
+# helpers ``save_lora_adapters`` walks (``parameters``, ``named_modules``).
+# ---------------------------------------------------------------------------
+class _MockDropout:
+    """Mirrors MLX's nn.Dropout: stores keep-probability as ``_p_1``.
+
+    _get_mlx_dropout_probability reads ``.p`` first, then falls back to
+    ``1.0 - _p_1``; this fixture exercises the fallback branch (PR #679
+    edge case).
+    """
+
+    def __init__(self, p: float):
+        self._p_1 = 1.0 - p
+
+
+class _MockLoRALinear:
+    """LoRA-wrapped Linear. Shape convention matches mlx-lm's LoRALinear:
+    ``lora_a`` is ``(in_features, rank)`` and ``lora_b`` is
+    ``(rank, out_features)`` so the matmul partners pair up as
+    ``lora_a_shape[-1] == lora_b_shape[0] == rank`` -- which is what
+    PR #679's ``_infer_mlx_lora_rank`` reads.
+    """
+
+    def __init__(self, in_features: int, out_features: int, rank: int, scale: float, dropout: float):
+        self.weight = torch.zeros(out_features, in_features)
+        self.lora_a = torch.zeros(in_features, rank)
+        self.lora_b = torch.zeros(rank, out_features)
+        self.scale = scale
+        self.dropout = _MockDropout(dropout)
+
+
+class _MockPlainLinear:
+    def __init__(self, in_features: int, out_features: int):
+        self.weight = torch.zeros(out_features, in_features)
+
+
+class _MockModel:
+    """A tiny model with one LoRA-wrapped attention proj + one plain MLP proj.
+
+    Module attribute names are picked to **not** contain ``lora_`` so the
+    substring filter in ``save_lora_adapters`` is exercised against the
+    realistic case where only the leaf parameter (``...lora_a`` /
+    ``...lora_b``) carries the prefix -- mirroring real MLX models where
+    LoRA is attached on e.g. ``model.layers.N.self_attn.q_proj.lora_*``.
+
+    ``parameters()`` returns the flat name→tensor map ``save_lora_adapters``
+    consumes via ``mlx.utils.tree_flatten``. ``named_modules()`` is the
+    iteration ``_enrich_mlx_adapter_config`` walks looking for objects with
+    ``lora_a`` + ``lora_b`` to record rank/scale/dropout.
+    """
+
+    def __init__(self):
+        self.q_proj = _MockLoRALinear(
+            in_features=8, out_features=16, rank=4, scale=2.5, dropout=0.25,
+        )
+        self.up_proj = _MockPlainLinear(in_features=16, out_features=32)
+        # _enrich_mlx_adapter_config probes these; supplying None keeps the
+        # config helper on the cheap fast path.
+        self._hf_repo = "unsloth/tiny-test-model"
+        self._config = None
+        self._unsloth_quantization_config = None
+        self._unsloth_quantization_policy = None
+        self._unsloth_quantized_source = None
+        self._unsloth_base_revision = None
+        self._unsloth_base_commit_hash = None
+        self._src_path = None
+
+    def parameters(self):
+        return {
+            "q_proj.weight": self.q_proj.weight,
+            "q_proj.lora_a": self.q_proj.lora_a,
+            "q_proj.lora_b": self.q_proj.lora_b,
+            "up_proj.weight": self.up_proj.weight,
+        }
+
+    def trainable_parameters(self):
+        return self.parameters()
+
+    def named_modules(self):
+        yield "", self
+        yield "q_proj", self.q_proj
+        yield "up_proj", self.up_proj
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+def test_save_lora_adapters_filters_to_lora_only(tmp_path: Path):
+    """PR #692: adapter export keeps only ``lora_*`` keys.
+
+    Both ``lora_layer.weight`` (base weight inside the LoRA module) and
+    ``base_layer.weight`` (plain Linear) must be excluded — only the two
+    ``lora_a`` / ``lora_b`` tensors survive.
+    """
+    from unsloth_zoo.mlx.utils import save_lora_adapters
+
+    model = _MockModel()
+    out_dir = tmp_path / "adapter_lora_only"
+    save_lora_adapters(model, out_dir)
+
+    safe_path = out_dir / "adapters.safetensors"
+    assert safe_path.is_file(), "adapters.safetensors must be written"
+
+    from safetensors.torch import load_file
+    saved = load_file(str(safe_path))
+    keys = set(saved.keys())
+
+    assert keys == {"q_proj.lora_a", "q_proj.lora_b"}, (
+        f"adapter export leaked non-LoRA tensors: {sorted(keys)}"
+    )
+
+
+def test_save_lora_adapters_writes_pr679_metadata(tmp_path: Path):
+    """PR #679: adapter_config.json carries live rank/scale/dropout +
+    peft_type=LORA + the keep-probability dropout fallback.
+    """
+    from unsloth_zoo.mlx.utils import save_lora_adapters
+
+    model = _MockModel()
+    out_dir = tmp_path / "adapter_metadata"
+    save_lora_adapters(model, out_dir)
+
+    cfg_path = out_dir / "adapter_config.json"
+    assert cfg_path.is_file(), "adapter_config.json must be written"
+    cfg = json.loads(cfg_path.read_text())
+
+    assert cfg.get("peft_type") == "LORA", cfg
+    assert cfg.get("rank") == 4, cfg
+    assert cfg.get("scale") == pytest.approx(2.5), cfg
+    # PR #679 fix: read dropout via the ``_p_1`` keep-prob fallback when
+    # ``.p`` is absent (real MLX nn.Dropout stores it that way).
+    assert cfg.get("dropout") == pytest.approx(0.25), cfg
+
+    params = cfg.get("lora_parameters") or {}
+    assert params.get("rank") == 4, params
+    assert params.get("scale") == pytest.approx(2.5), params
+    assert params.get("dropout") == pytest.approx(0.25), params
+
+    # PR #679 also records the LoRA topology so reload reconstructs the
+    # same attachment surface.
+    assert "q_proj" in (cfg.get("unsloth_mlx_lora_module_paths") or []), cfg
+
+
+def test_save_lora_adapters_raises_when_no_lora_tensors_present(tmp_path: Path):
+    """PR #692: explicit ValueError when nothing matched the ``lora_`` filter.
+
+    Guards against silently saving an empty/garbage adapter export when the
+    model has no LoRA layers (e.g. merged adapter state).
+    """
+    from unsloth_zoo.mlx.utils import save_lora_adapters
+
+    class _NoLoRAModel(_MockModel):
+        def parameters(self):
+            return {"up_proj.weight": self.up_proj.weight}
+
+        def named_modules(self):
+            yield "", self
+            yield "up_proj", self.up_proj
+
+    out_dir = tmp_path / "adapter_empty"
+    with pytest.raises(ValueError, match="LoRA adapter tensors"):
+        save_lora_adapters(_NoLoRAModel(), out_dir)
+
+
+def test_save_trainable_adapters_keeps_all_trainable(tmp_path: Path):
+    """PR #692 separation: ``save_trainable_adapters`` (used by mid-training
+    checkpoints) keeps ALL trainable tensors, including base weights — it
+    must NOT inherit the LoRA-only filter.
+    """
+    from unsloth_zoo.mlx.utils import save_trainable_adapters
+
+    model = _MockModel()
+    out_dir = tmp_path / "adapter_trainable"
+    save_trainable_adapters(model, out_dir)
+
+    safe_path = out_dir / "adapters.safetensors"
+    assert safe_path.is_file(), "adapters.safetensors must be written"
+
+    from safetensors.torch import load_file
+    saved = load_file(str(safe_path))
+    keys = set(saved.keys())
+
+    assert keys == {
+        "q_proj.weight",
+        "q_proj.lora_a",
+        "q_proj.lora_b",
+        "up_proj.weight",
+    }, (
+        "training checkpoint should preserve every trainable tensor, "
+        f"got {sorted(keys)}"
+    )

From c35d0c33460445904ec8f7409d20db0cb582da1b Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 12:05:46 +0000
Subject: [PATCH 12/13] [CI-validation] Drop inherited upstream workflows from
 staging branch

The seven upstream workflows (consolidated-tests-ci, lint-ci, mlx-ci,
security-audit, stale, studio-export-fix-ci, wheel-smoke) would fire on
every push and PR-event to this throwaway staging branch and burn runner
minutes that have nothing to do with validating MLX PRs #679, #682, #692.

Keep only the three mlx-pr-* workflows on this branch. They stay in
upstream main / origin/main untouched -- this deletion is scoped to the
staging branch only.
---
 .github/workflows/consolidated-tests-ci.yml | 255 --------------------
 .github/workflows/lint-ci.yml               | 122 ----------
 .github/workflows/mlx-ci.yml                |  70 ------
 .github/workflows/security-audit.yml        | 226 -----------------
 .github/workflows/stale.yml                 |  37 ---
 .github/workflows/studio-export-fix-ci.yml  |  62 -----
 .github/workflows/wheel-smoke.yml           | 118 ---------
 7 files changed, 890 deletions(-)
 delete mode 100644 .github/workflows/consolidated-tests-ci.yml
 delete mode 100644 .github/workflows/lint-ci.yml
 delete mode 100644 .github/workflows/mlx-ci.yml
 delete mode 100644 .github/workflows/security-audit.yml
 delete mode 100644 .github/workflows/stale.yml
 delete mode 100644 .github/workflows/studio-export-fix-ci.yml
 delete mode 100644 .github/workflows/wheel-smoke.yml

diff --git a/.github/workflows/consolidated-tests-ci.yml b/.github/workflows/consolidated-tests-ci.yml
deleted file mode 100644
index 6ab589c20..000000000
--- a/.github/workflows/consolidated-tests-ci.yml
+++ /dev/null
@@ -1,255 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Python compatibility + repo test gate. Adapted from unsloth's consolidated-tests-ci.yml.
-# Jobs: python-version-collect (pytest --collect-only on 3.10-3.13), repo-tests-cpu
-# (tests/security HARD GATE + CPU-pure zoo tests), core-upstream-matrix (HF/TRL/peft
-# drift detector across 3 cells -- the high-value zoo coverage).
-
-name: Tests CI
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  # Python compatibility: pytest --collect-only per interpreter.
-  python-version-collect:
-    name: (Python ${{ matrix.python-version }})
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: ${{ matrix.python-version }}
-          cache: 'pip'
-
-      - name: Install CPU-only torch + zoo runtime deps
-        # CPU index avoids the multi-GB CUDA wheel set. `--no-deps unsloth`
-        # satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-        run: |
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0"
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          pip install pytest==9.0.3
-
-      - name: pytest --collect-only
-        continue-on-error: true
-        run: python -m pytest tests/ --collect-only -q
-
-  # CPU-only repo tests. HARD GATE on tests/security.
-  repo-tests-cpu:
-    name: Repo tests (CPU)
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install runtime + test deps
-        # --no-deps unsloth satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-        run: |
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0"
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          pip install pytest==9.0.3 pyyaml==6.0.2
-
-      - name: pytest tests/security (HARD GATE)
-        run: python -m pytest tests/security -v
-
-      - name: pytest tests/test_pr_a_imports + zoo-specific CPU tests
-        # Run as SEPARATE pytest invocation: tests/security/conftest.py installs a
-        # session-scoped network_blocker autouse fixture that would otherwise block
-        # test_pypi_version_sync from reaching pypi.org.
-        continue-on-error: true
-        run: |
-          python -m pytest \
-            tests/test_pr_a_imports.py \
-            tests/test_rl_replacements_cpu.py \
-            tests/test_temporary_patches_imports.py \
-            tests/test_zoo_history_regressions.py \
-            tests/test_pypi_version_sync.py \
-            -v
-
-  # Core (HF/TRL/peft) drift matrix. Three cells: HF=4.57.6+TRL<1, HF=latest+TRL=latest,
-  # and pyproject defaults. fail-fast=false; drift in one cell shouldn't cancel others.
-  core-upstream-matrix:
-    name: "Core (${{ matrix.combo.label }})"
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        combo:
-          - id: t4576-trl0latest
-            label: "HF=4.57.6 + TRL<1"
-            transformers_spec: "transformers==4.57.6"
-            trl_spec: "trl>=0.18.2,<1.0.0"
-            peft_spec: "peft>=0.18,<0.20"
-          - id: tlatest5-trl1latest
-            label: "HF=latest + TRL=latest"
-            transformers_spec: "transformers>=5,<6"
-            trl_spec: "trl>=1,<2"
-            peft_spec: "peft"
-          - id: pyproject
-            label: "HF=default + TRL=default"
-            transformers_spec: "__from_pyproject__"
-            trl_spec: "__from_pyproject__"
-            peft_spec: "__from_pyproject__"
-    env:
-      MATRIX_TRANSFORMERS_SPEC: ${{ matrix.combo.transformers_spec }}
-      MATRIX_TRL_SPEC: ${{ matrix.combo.trl_spec }}
-      MATRIX_PEFT_SPEC: ${{ matrix.combo.peft_spec }}
-      MATRIX_COMBO_ID: ${{ matrix.combo.id }}
-      # Pure-Python protobuf parser; transformers' bundled *_pb2.py is rejected by C++ protobuf 4+/5+.
-      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
-      UNSLOTH_COMPILE_DISABLE: '1'
-      # Secondary handshake after find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-      UNSLOTH_IS_PRESENT: '1'
-    steps:
-      - name: Harden runner (audit)
-        # audit (not block): matrix pulls arbitrary transformers/TRL/peft pins.
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Resolve matrix specs (handle __from_pyproject__ sentinel)
-        # Resolve transformers/trl/peft from pyproject.toml when the sentinel is used.
-        run: |
-          set -euxo pipefail
-          python <<'PY' >> "$GITHUB_ENV"
-          import os, re, tomllib
-          spec_t = os.environ["MATRIX_TRANSFORMERS_SPEC"]
-          spec_r = os.environ["MATRIX_TRL_SPEC"]
-          spec_p = os.environ["MATRIX_PEFT_SPEC"]
-
-          def _pkg_name(spec: str) -> str:
-              m = re.match(r"\s*([A-Za-z0-9_.-]+)", spec)
-              return (m.group(1).lower() if m else "")
-
-          if "__from_pyproject__" in (spec_t, spec_r, spec_p):
-              with open("pyproject.toml", "rb") as f:
-                  doc = tomllib.load(f)
-              proj = doc.get("project", {})
-              all_deps: list[str] = list(proj.get("dependencies", []))
-              for _name, dep_list in proj.get("optional-dependencies", {}).items():
-                  all_deps.extend(dep_list)
-
-              # Strip environment markers so the resolved spec is pip-installable.
-              def _strip_marker(s: str) -> str:
-                  return s.split(";", 1)[0].strip()
-
-              if spec_t == "__from_pyproject__":
-                  spec_t = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "transformers"),
-                                "transformers")
-              if spec_r == "__from_pyproject__":
-                  spec_r = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "trl"),
-                                "trl")
-              if spec_p == "__from_pyproject__":
-                  spec_p = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "peft"),
-                                "peft")
-          print(f"RESOLVED_TRANSFORMERS_SPEC={spec_t}")
-          print(f"RESOLVED_TRL_SPEC={spec_r}")
-          print(f"RESOLVED_PEFT_SPEC={spec_p}")
-          PY
-          grep RESOLVED_ "$GITHUB_ENV" || true
-
-      - name: Install torch CPU + zoo + matrix-specified upstream libs
-        # Two-phase: `-e .[core]` for pyproject defaults, then `-U <RESOLVED_*>` to override.
-        # The -U is critical so pip will downgrade transformers (e.g. cell-1 pin 4.57.6).
-        # --no-deps unsloth satisfies the find_spec guard at unsloth_zoo/__init__.py:128.
-        run: |
-          set -euxo pipefail
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0" "torchvision<0.26"
-          # torchvision: transitive import of transformers.models.qwen2_vl
-          # / qwen2_5_vl image processors. The Qwen2_VL image-processor
-          # zoo references chains through `from torchvision...` at module
-          # top, so a missing torchvision turns the existence-probe drift
-          # tests RED on "ModuleNotFoundError: No module named 'torchvision'".
-          # CPU build is plenty; we don't need the CUDA variant.
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          # Override with matrix-resolved specs.
-          pip install -U "$RESOLVED_TRANSFORMERS_SPEC" "$RESOLVED_TRL_SPEC" "$RESOLVED_PEFT_SPEC"
-          # bitsandbytes: imported at module scope in saving_utils.py (_active_merge_device path).
-          pip install 'bitsandbytes>=0.45'
-          # IPython + ipywidgets: logging_utils.py:50 imports transformers.utils.notebook.
-          # Required so drift detector only fires on real drift, not missing CI deps.
-          pip install 'ipython>=8' 'ipywidgets>=8'
-          pip install pytest==9.0.3 packaging
-          echo "::group::Installed transformers + trl + peft + torch versions"
-          pip show transformers
-          pip show trl
-          pip show peft
-          pip show torch
-          echo "::endgroup::"
-
-      - name: pytest upstream-regression suite (94 pinned + 117 expanded)
-        # 626 drift-detector tests / cell across 12 files. HARD GATE: a red cell
-        # means real upstream drift (transformers/trl/peft/vllm/datasets renamed
-        # or removed a symbol zoo references). Zoo PRs #4 through #635 mined.
-        run: |
-          python -m pytest -v --tb=short -rs \
-            tests/test_upstream_pinned_symbols_transformers.py \
-            tests/test_upstream_pinned_symbols_trl_vllm.py \
-            tests/test_upstream_pinned_symbols_accelerator.py \
-            tests/test_zoo_history_regressions_deep.py \
-            tests/test_upstream_import_fixes_drift.py \
-            tests/test_zoo_source_upstream_refs.py \
-            tests/test_upstream_signatures.py \
-            tests/test_extended_dep_api_pins.py \
-            tests/test_upstream_source_patterns.py \
-            tests/test_compiler_rewriter_exhaustive.py \
-            tests/test_compiler_dynamic_exec.py \
-            tests/test_temporary_patches_exhaustive.py \
-            tests/test_unsloth_zoo_lora_merge.py \
-            tests/test_peft_paramwrapper_layout_drift.py \
-            tests/test_transformers_moe_structure_drift.py \
-            tests/test_moe_merge_e2e_cpu.py
diff --git a/.github/workflows/lint-ci.yml b/.github/workflows/lint-ci.yml
deleted file mode 100644
index 75446a499..000000000
--- a/.github/workflows/lint-ci.yml
+++ /dev/null
@@ -1,122 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Whole-repo Python source-lint gate. Adapted from unsloth's lint-ci.yml:
-# Python (compileall + narrow ruff) + YAML/JSON round-trip. Dropped vs unsloth:
-# shell lint (zoo has no committed *.sh), TypeScript/Rust (Studio/Tauri are unsloth-side).
-
-name: Lint CI
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  source-lint:
-    name: Source lint (Python + YAML + JSON)
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - run: pip install 'ruff==0.15.12' 'pyyaml>=6'
-
-      - name: Python AST/syntax check (every committed .py must compile)
-        # continue-on-error during CI bootstrap: pyproject.toml declares
-        # `requires-python = ">=3.9,<3.15"` but temporary_patches/gpt_oss.py
-        # uses a 3.10+ `match` statement. Tracked as a separate cleanup PR.
-        continue-on-error: true
-        run: |
-          python -m compileall -q -j 0 unsloth_zoo tests scripts
-
-      - name: Python ruff check (narrow gate)
-        # E9 / F63 / F7 / F82: syntax errors, broken comparisons, undefined names.
-        # continue-on-error during CI bootstrap: first run on main surfaced 13
-        # latent findings (rl_replacements.py L1128 F821, gpt_oss match-on-3.9).
-        continue-on-error: true
-        run: |
-          ruff check --select E9,F63,F7,F82 unsloth_zoo tests scripts
-
-      - name: No leftover debugger / pdb / breakpoint calls
-        # Catches `import pdb`, `pdb.set_trace()`, `breakpoint()`, `import ipdb`.
-        # continue-on-error during bootstrap: rl_replacements.py has a
-        # `#breakpoint()` comment the regex matches (# is [^A-Za-z_]).
-        continue-on-error: true
-        run: |
-          set -e
-          if grep -rnE '(^|[^A-Za-z_])(pdb\.set_trace|breakpoint)\(|^import (pdb|ipdb)$|^from (pdb|ipdb) import' \
-              --include='*.py' unsloth_zoo scripts; then
-            echo "::error::Leftover debugger call found above. Remove it." >&2
-            exit 1
-          fi
-
-      - name: YAML round-trip for every committed YAML
-        run: |
-          python <<'PY'
-          import pathlib, sys, yaml
-          fails = []
-          for p in pathlib.Path(".").rglob("*.yml"):
-              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
-                  continue
-              try:
-                  yaml.safe_load(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          for p in pathlib.Path(".").rglob("*.yaml"):
-              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
-                  continue
-              try:
-                  yaml.safe_load(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          if fails:
-              for f in fails:
-                  print("::error::", f)
-              sys.exit(1)
-          print(f"YAML round-trip OK")
-          PY
-
-      - name: JSON round-trip for every committed JSON
-        run: |
-          python <<'PY'
-          import pathlib, json, sys
-          fails = []
-          for p in pathlib.Path(".").rglob("*.json"):
-              if any(part in (".git", "node_modules", "__pycache__", "build", "dist") for part in p.parts):
-                  continue
-              try:
-                  json.loads(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          if fails:
-              for f in fails:
-                  print("::error::", f)
-              sys.exit(1)
-          print("JSON round-trip OK")
-          PY
-
-      - name: enforce kwargs spacing
-        # Style rule mirrored from unsloth: kwargs use `name = value` not `name=value`.
-        continue-on-error: true
-        run: |
-          python3 scripts/enforce_kwargs_spacing.py unsloth_zoo
diff --git a/.github/workflows/mlx-ci.yml b/.github/workflows/mlx-ci.yml
deleted file mode 100644
index 3df8be9d9..000000000
--- a/.github/workflows/mlx-ci.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# MLX-specific CI on macOS arm64 (Apple Silicon) so mlx / mlx-lm / mlx-vlm wheels
-# resolve. Installs `unsloth_zoo[mlx]`, smoke-imports unsloth_zoo/mlx_*.py modules,
-# runs tests/test_mlx_torch_shim_smoke.py. Opt-in via `mlx` label to save macOS minutes.
-
-name: MLX CI on Mac M1
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, labeled]
-  workflow_dispatch:
-  schedule:
-    # Daily @ 04:23 UTC -- off the security-audit cron rush at 04:13.
-    - cron: '23 4 * * *'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  mlx-smoke:
-    name: MLX install + import smoke (Apple Silicon)
-    # Opt-in: schedule / workflow_dispatch always run; PR runs only with `mlx` label.
-    if: >-
-      github.event_name == 'schedule' ||
-      github.event_name == 'workflow_dispatch' ||
-      contains(github.event.pull_request.labels.*.name, 'mlx')
-    runs-on: macos-14   # Apple Silicon (M1) hosted runner
-    timeout-minutes: 30
-    steps:
-      # harden-runner block-mode is Linux-only; stay in audit on macOS for parity.
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install zoo with MLX extras
-        # pyproject gates MLX deps on darwin+arm64; `.[mlx]` picks them up
-        # without the torch-on-Linux-CUDA path.
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e .[mlx]
-          pip install pytest==9.0.3
-
-      - name: MLX module import smoke
-        run: |
-          python -c "import unsloth_zoo.mlx_loader; print('mlx_loader OK')"
-          python -c "import unsloth_zoo.mlx_compile; print('mlx_compile OK')"
-          python -c "import unsloth_zoo.mlx_utils; print('mlx_utils OK')"
-          python -c "import unsloth_zoo.mlx_trainer; print('mlx_trainer OK')"
-          python -c "import unsloth_zoo.mlx_cce; print('mlx_cce OK')"
-
-      - name: tests/test_mlx_torch_shim_smoke.py
-        # Exercises the MLX-on-torch shim end-to-end against the real mlx runtime
-        # on Apple Silicon; on Linux runners it would run against tests/mlx_simulation/ stubs.
-        run: python -m pytest tests/test_mlx_torch_shim_smoke.py -v
diff --git a/.github/workflows/security-audit.yml b/.github/workflows/security-audit.yml
deleted file mode 100644
index 28a73eed0..000000000
--- a/.github/workflows/security-audit.yml
+++ /dev/null
@@ -1,226 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Pure-Python supply-chain audit for unsloth_zoo. Mirrors unslothai/unsloth's
-# security-audit.yml with npm/Cargo/Studio jobs stripped (zoo is pure Python).
-# Jobs: advisory-audit (pip-audit + trufflehog), pip-scan-packages (transitive
-# closure pattern scan), workflow-trigger-lint, tests-security (HARD GATE).
-
-name: Security audit
-
-on:
-  pull_request:
-    paths:
-      - 'pyproject.toml'
-      - 'scripts/scan_packages.py'
-      - 'scripts/lint_workflow_triggers.py'
-      - 'tests/security/**'
-      - '.github/workflows/security-audit.yml'
-  push:
-    branches: [main]
-  schedule:
-    - cron: '13 4 * * *'   # 04:13 UTC daily, off the cron rush
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  # Advisory-DB audit: pip-audit + trufflehog. Non-blocking while baseline settles.
-  advisory-audit:
-    name: advisory audit (pip + secrets)
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          fetch-depth: 0   # trufflehog needs full history for diff scans
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install pip-audit
-        run: python -m pip install --upgrade pip pip-audit
-
-      - name: Build filtered requirements set
-        # Reads pyproject.toml deps + extras into a flat requirements file.
-        # git+ specs are skipped (advisory-DB can't resolve them).
-        run: |
-          mkdir -p audit-reqs
-          python <<'PY' > audit-reqs/zoo-deps.txt
-          import tomllib
-          with open("pyproject.toml", "rb") as f:
-              d = tomllib.load(f)
-          core = d["project"]["dependencies"]
-          all_extras = []
-          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
-              # Skip self-referential extras like "huggingface = ['unsloth_zoo[core]']".
-              all_extras += [s for s in specs if "unsloth_zoo" not in s]
-          print("# Auto-generated from pyproject.toml by security-audit.yml.")
-          for spec in core + all_extras:
-              if "git+" in spec:
-                  print(f"# [security-audit] skipped git+ spec: {spec}")
-                  continue
-              print(spec)
-          PY
-
-      - name: pip-audit (advisory DB lookup)
-        continue-on-error: true
-        run: pip-audit --requirement audit-reqs/zoo-deps.txt --disable-pip --strict || true
-
-      - name: Trufflehog secret scan
-        continue-on-error: true
-        uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26  # v3.95.2
-        with:
-          base: ${{ github.event.repository.default_branch }}
-          head: HEAD
-          extra_args: --only-verified
-
-  # pip-scan-packages: downloads every PyPI archive in zoo's transitive closure and
-  # pattern-scans (catches the malicious-upload class that precedes CVE publication).
-  pip-scan-packages:
-    name: pip scan-packages (zoo transitive closure)
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install scan_packages.py runtime deps
-        # requests + packaging for PyPI's JSON API. Scanned packages are
-        # downloaded raw and inspected, never `pip install`-ed.
-        run: python -m pip install --upgrade pip requests packaging
-
-      - name: Build filtered requirements set
-        run: |
-          mkdir -p audit-reqs
-          python <<'PY' > audit-reqs/zoo-deps.txt
-          import tomllib
-          with open("pyproject.toml", "rb") as f:
-              d = tomllib.load(f)
-          core = d["project"]["dependencies"]
-          all_extras = []
-          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
-              all_extras += [s for s in specs if "unsloth_zoo" not in s]
-          print("# Auto-generated from pyproject.toml by security-audit.yml.")
-          for spec in core + all_extras:
-              if "git+" in spec:
-                  print(f"# [security-audit] skipped git+ spec: {spec}")
-                  continue
-              print(spec)
-          PY
-
-      - name: scan-packages (with deps)
-        continue-on-error: true
-        # --with-deps makes scan transitive. Archives are downloaded and
-        # pattern-scanned WITHOUT installing -- malicious wheels cannot execute.
-        run: python3 scripts/scan_packages.py --requirements audit-reqs/zoo-deps.txt --with-deps
-
-  # workflow-trigger-lint: refuses pull_request_target with PR-head checkout,
-  # restricted workflow_run without justification, and cache-key collisions.
-  workflow-trigger-lint:
-    name: workflow-trigger lint (pull_request_target / cache-poisoning)
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install PyYAML
-        run: pip install pyyaml==6.0.2
-
-      - name: Run workflow-trigger lint
-        run: python3 scripts/lint_workflow_triggers.py
-
-  # HARD GATE: regression tests for scanner + lint scripts. Drift in IOC tables
-  # or scanner exit semantics fails this PR at review time.
-  tests-security:
-    name: pytest tests/security
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install pytest + PyYAML
-        # PyYAML needed by scripts/lint_workflow_triggers.py, exercised via subprocess
-        # by tests/security/test_lint_workflow_triggers.py. (See unsloth PR #5397: without
-        # pyyaml the lint script exits 2.)
-        run: pip install pytest==9.0.3 pyyaml==6.0.2
-
-      - name: Run security regression tests
-        run: python3 -m pytest tests/security -v
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index 1a4cf841d..000000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: 'Inactive Issue Pinger'
-
-on:
-  schedule:
-    - cron: '30 5 * * *' # Runs at 5:30 UTC every day
-
-jobs:
-  stale:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-
-    steps:
-      - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f  # v10.2.0
-        with:
-          # The message to post on stale issues.
-          # This message will ping the issue author.
-          # Note: The stale bot action does not currently support a direct placeholder for the last commenter.
-          # As a workaround, this message encourages any participant to reply.
-          stale-issue-message: >
-            Is this issue still important to you?
-            Apologies in advance we might have missed this issue as well.
-            For faster response times, please post on our Reddit server - https://www.reddit.com/r/unsloth or our Discord - https://discord.com/invite/unsloth 
-
-          # The number of days of inactivity before an issue is considered stale.
-          days-before-issue-stale: 9999
-
-          # Set to -1 to never close stale issues.
-          days-before-issue-close: -1
-
-          # A label to apply to stale issues.
-          stale-issue-label: 'inactive'
-
-          # The number of operations to perform per run to avoid rate limiting.
-          operations-per-run: 500
-
-          enable-statistics: false
diff --git a/.github/workflows/studio-export-fix-ci.yml b/.github/workflows/studio-export-fix-ci.yml
deleted file mode 100644
index 699b78d16..000000000
--- a/.github/workflows/studio-export-fix-ci.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: studio-export-fix-ci
-
-on:
-  push:
-    branches: [main, nightly]
-    paths:
-      - "unsloth_zoo/llama_cpp.py"
-      - "tests/test_quantize_gguf_q2_k_l.py"
-      - "tests/test_convert_hf_to_gguf_patcher.py"
-      - ".github/workflows/studio-export-fix-ci.yml"
-  pull_request:
-    paths:
-      - "unsloth_zoo/llama_cpp.py"
-      - "tests/test_quantize_gguf_q2_k_l.py"
-      - "tests/test_convert_hf_to_gguf_patcher.py"
-      - ".github/workflows/studio-export-fix-ci.yml"
-
-concurrency:
-  group: studio-export-fix-${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  studio-export-fix:
-    name: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      # Cap matrix at 3 in flight so Windows stays under the repo-level
-      # 5-concurrent-Windows-runner limit when this job runs alongside others.
-      max-parallel: 3
-      matrix:
-        os: [ubuntu-latest, macos-14, windows-latest]
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 15
-    env:
-      # 5000/h vs 60/h on raw.githubusercontent.com for the live-upstream tests.
-      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UNSLOTH_COMPILE_DISABLE: '1'
-      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-          cache: pip
-
-      - name: Install minimal test deps
-        run: |
-          python -m pip install --upgrade pip
-          # Pure-Python tests: monkeypatch subprocess + AST-parse upstream files.
-          # No torch / transformers needed. Keep slim so Windows cold start stays under a minute.
-          python -m pip install pytest psutil requests tqdm
-
-      - name: Run patcher + q2_k_l unit tests
-        shell: bash
-        run: |
-          pytest -v \
-            tests/test_quantize_gguf_q2_k_l.py \
-            tests/test_convert_hf_to_gguf_patcher.py
diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml
deleted file mode 100644
index 626e8dccb..000000000
--- a/.github/workflows/wheel-smoke.yml
+++ /dev/null
@@ -1,118 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Build PyPI wheel + sdist, verify content sanity, import-smoke in a clean venv.
-# Adapted from unsloth's wheel-smoke.yml; zoo's content checks: package present,
-# no tests/ shipped, no stray .pyc, real version string, import smoke succeeds.
-
-name: Wheel CI
-
-on:
-  pull_request:
-    paths:
-      - 'pyproject.toml'
-      - 'unsloth_zoo/**'
-      - 'tests/**'
-      - '.github/workflows/wheel-smoke.yml'
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  wheel:
-    name: Wheel build + content sanity + import smoke
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Build wheel + sdist
-        run: |
-          python -m pip install --upgrade pip build
-          rm -rf dist build ./*.egg-info
-          python -m build
-
-      - name: Wheel content sanity
-        run: |
-          python - <<'PY'
-          import zipfile, glob, sys, re
-          wheels = glob.glob("dist/unsloth_zoo-*.whl")
-          if not wheels:
-              print("FAIL: no wheel produced"); sys.exit(2)
-          w = wheels[0]
-          print(f"wheel: {w}")
-          # Version sanity: dynamic metadata pulls from unsloth_zoo.__init__.__version__.
-          m = re.match(r"dist/unsloth_zoo-([^-]+)-py3-none-any\.whl", w)
-          version = m.group(1) if m else None
-          print(f"wheel version: {version}")
-          with zipfile.ZipFile(w) as z:
-              n = z.namelist()
-              # Hard checks: must hold for any zoo release wheel.
-              hard_checks = {
-                "unsloth_zoo/__init__.py shipped":      any(s == "unsloth_zoo/__init__.py" for s in n),
-                "unsloth_zoo/rl_replacements.py shipped": any(s == "unsloth_zoo/rl_replacements.py" for s in n),
-                "unsloth_zoo/temporary_patches/__init__.py shipped": any(s == "unsloth_zoo/temporary_patches/__init__.py" for s in n),
-                "no .pyc files":                        not any(s.endswith(".pyc") for s in n),
-                "no .git tree":                         not any(s.startswith(".git/") for s in n),
-                "version is not 0.0.0":                 version is not None and version != "0.0.0",
-                "METADATA present":                     any(s.endswith(".dist-info/METADATA") for s in n),
-              }
-              # Soft checks (warn only). Zoo's pyproject doesn't exclude tests/scripts;
-              # tightening the packaging config is a separate follow-up.
-              soft_checks = {
-                "no tests/ shipped":                    not any(s.startswith("tests/") for s in n),
-                "no scripts/ shipped":                  not any(s.startswith("scripts/") for s in n),
-              }
-              print("Hard checks:")
-              for k, v in hard_checks.items():
-                  print(f"  [{'PASS' if v else 'FAIL'}] {k}")
-              print()
-              print("Soft checks (warnings):")
-              for k, v in soft_checks.items():
-                  status = "PASS" if v else "WARN"
-                  print(f"  [{status}] {k}")
-              # Exit non-zero ONLY if a hard check failed.
-              sys.exit(0 if all(hard_checks.values()) else 1)
-          PY
-
-      - name: Import smoke (clean venv)
-        # unsloth_zoo/__init__.py:128 raises ImportError when parent `unsloth` is
-        # absent (deliberate guardrail). A bare `import unsloth_zoo` in a wheel-only
-        # venv will fail by design, so the smoke pivots to reading the version
-        # string from dist-info METADATA via importlib.metadata.
-        run: |
-          python -m venv /tmp/v
-          /tmp/v/bin/pip install --upgrade pip
-          /tmp/v/bin/pip install dist/unsloth_zoo-*.whl
-          # Read version from dist-info METADATA via importlib.metadata.
-          WHEEL_VERSION=$(/tmp/v/bin/python -c "
-          from importlib.metadata import version
-          print(version('unsloth_zoo'))
-          ")
-          echo "installed unsloth_zoo version: $WHEEL_VERSION"
-          test -n "$WHEEL_VERSION" && test "$WHEEL_VERSION" != "0.0.0"
-
-      - name: Upload wheel on failure
-        if: failure()
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
-        with:
-          name: unsloth-zoo-wheel
-          path: dist/
-          retention-days: 7

From cc4278df80407b6a6597adfbf281eec8ccc0a724 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 12:08:09 +0000
Subject: [PATCH 13/13] [CI-validation] fix install steps for first-round
 failures

macOS: pip install -e .[mlx] does not pull torch (correct in production
since MLX replaces torch on Apple Silicon), but the new
test_mlx_save_lora_adapters_filter.py uses torch via the shim. Add an
explicit torch==2.10.0 install from the PyTorch CPU index (same pattern
as danielhanchen/unsloth-staging-2/.github/workflows/mlx-ci.yml).

Linux + Windows: unsloth_zoo/__init__.py:198 has a find_spec("unsloth")
hard gate that fires before UNSLOTH_IS_PRESENT is read. Install
unsloth --no-deps from git main so the import survives without dragging
in unsloth's heavy CUDA-only deps. Mirror of upstream
consolidated-tests-ci.yml.
---
 .github/workflows/mlx-pr-linux.yml   | 7 ++++++-
 .github/workflows/mlx-pr-mac.yml     | 9 ++++++++-
 .github/workflows/mlx-pr-windows.yml | 7 ++++++-
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/mlx-pr-linux.yml b/.github/workflows/mlx-pr-linux.yml
index 070c94f1a..c8b69c230 100644
--- a/.github/workflows/mlx-pr-linux.yml
+++ b/.github/workflows/mlx-pr-linux.yml
@@ -54,15 +54,20 @@ jobs:
           python-version: '3.12'
           cache: 'pip'
 
-      - name: Install CPU-only torch + unsloth_zoo[core]
+      - name: Install CPU-only torch + unsloth_zoo[core] + unsloth shim
         # MLX extras intentionally skipped -- mlx/mlx-lm/mlx-vlm have no
         # Linux wheel. The package's pyproject already gates these on
         # darwin+arm64 so `.[core]` resolves cleanly here.
+        #
+        # unsloth --no-deps satisfies the find_spec("unsloth") guard at
+        # unsloth_zoo/__init__.py:198 without dragging in unsloth's heavy
+        # deps. Mirror of the upstream consolidated-tests-ci.yml pattern.
         run: |
           python -m pip install --upgrade pip
           pip install --index-url https://download.pytorch.org/whl/cpu \
             "torch>=2.4.0,<2.11.0"
           pip install -e .[core]
+          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
           pip install pytest==9.0.3 safetensors
 
       - name: Import smoke — unsloth_zoo top-level + MLX runtime probe
diff --git a/.github/workflows/mlx-pr-mac.yml b/.github/workflows/mlx-pr-mac.yml
index 0c9c61df2..f2f7ba814 100644
--- a/.github/workflows/mlx-pr-mac.yml
+++ b/.github/workflows/mlx-pr-mac.yml
@@ -65,9 +65,16 @@ jobs:
           print('OK: macOS arm64 confirmed')
           "
 
-      - name: Install unsloth_zoo with MLX extras
+      - name: Install unsloth_zoo with MLX extras (+ torch for tests)
+        # The .[mlx] extras intentionally skip torch (pyproject gates torch
+        # off on darwin+arm64 since MLX replaces it). But the new
+        # save_lora_adapters_filter test imports torch via the shim, so we
+        # explicitly pull the CPU-index Apple Silicon wheel (the same one
+        # danielhanchen/unsloth-staging-2/mlx-ci.yml uses).
         run: |
           python -m pip install --upgrade pip
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            'torch==2.10.0'
           pip install -e .[mlx]
           pip install pytest==9.0.3 safetensors
 
diff --git a/.github/workflows/mlx-pr-windows.yml b/.github/workflows/mlx-pr-windows.yml
index e5f36852e..407382770 100644
--- a/.github/workflows/mlx-pr-windows.yml
+++ b/.github/workflows/mlx-pr-windows.yml
@@ -53,14 +53,19 @@ jobs:
           python-version: '3.12'
           cache: 'pip'
 
-      - name: Install CPU-only torch + unsloth_zoo[core] (no triton)
+      - name: Install CPU-only torch + unsloth_zoo[core] + unsloth shim
         # pyproject already gates triton on linux, so [core] resolves
         # cleanly on Windows without dragging it in.
+        #
+        # unsloth --no-deps satisfies the find_spec("unsloth") guard at
+        # unsloth_zoo/__init__.py:198 without dragging in unsloth's heavy
+        # deps. Mirror of the upstream consolidated-tests-ci.yml pattern.
         run: |
           python -m pip install --upgrade pip
           pip install --index-url https://download.pytorch.org/whl/cpu \
             "torch>=2.4.0,<2.11.0"
           pip install -e .[core]
+          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
           pip install pytest==9.0.3 safetensors
 
       - name: Import smoke — unsloth_zoo top-level + MLX runtime probe