From 91535bc6eef043c9581a7a3f1fb4c15a273d73a1 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Thu, 21 May 2026 00:14:43 +0800
Subject: [PATCH 01/12] fix: handle zero-token MLX CCE inputs

---
 tests/test_mlx_runtime_cce_compile.py | 73 +++++++++++++++++++++++++++
 unsloth_zoo/mlx/cce/runtime_cce.py    | 13 +++++
 2 files changed, 86 insertions(+)

diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py
index 9168cfe0f..e646412d9 100644
--- a/tests/test_mlx_runtime_cce_compile.py
+++ b/tests/test_mlx_runtime_cce_compile.py
@@ -38,6 +38,79 @@ def _skip_torch_shim():
         pytest.skip("requires real MLX runtime")
 
 
+def test_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients():
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.zeros((0, 16), dtype=mx.float32)
+    weight = mx.zeros((32, 16), dtype=mx.float32)
+    targets = mx.zeros((0,), dtype=mx.int32)
+
+    losses = runtime_cce(hidden, weight, targets)
+    mx.eval(losses)
+    assert losses.shape == (0,)
+
+    def loss_fn(h, w):
+        return runtime_cce(h, w, targets).astype(mx.float32).sum()
+
+    loss, grads = mx.value_and_grad(loss_fn, argnums=(0, 1))(hidden, weight)
+    mx.eval(loss, *grads)
+
+    assert loss.item() == pytest.approx(0.0)
+    assert grads[0].shape == hidden.shape
+    assert grads[1].shape == weight.shape
+    assert mx.sum(mx.abs(grads[0]).astype(mx.float32)).item() == pytest.approx(0.0)
+    assert mx.sum(mx.abs(grads[1]).astype(mx.float32)).item() == pytest.approx(0.0)
+
+
+def test_quantized_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients():
+    _skip_torch_shim()
+    import mlx.nn as nn
+
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    linear = nn.Linear(32, 32, bias=False)
+    qlinear = nn.QuantizedLinear.from_linear(linear, group_size=32, bits=4)
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+        quantized=True,
+        group_size=qlinear.group_size,
+        bits=qlinear.bits,
+    )
+    hidden = mx.zeros((0, 32), dtype=mx.float32)
+    targets = mx.zeros((0,), dtype=mx.int32)
+
+    losses = runtime_cce(
+        hidden,
+        qlinear.weight,
+        qlinear.scales,
+        qlinear.biases,
+        targets,
+    )
+    mx.eval(losses)
+    assert losses.shape == (0,)
+
+    def loss_fn(h):
+        return runtime_cce(
+            h,
+            qlinear.weight,
+            qlinear.scales,
+            qlinear.biases,
+            targets,
+        ).astype(mx.float32).sum()
+
+    loss, grad = mx.value_and_grad(loss_fn)(hidden)
+    mx.eval(loss, grad)
+
+    assert loss.item() == pytest.approx(0.0)
+    assert grad.shape == hidden.shape
+    assert mx.sum(mx.abs(grad).astype(mx.float32)).item() == pytest.approx(0.0)
+
 def test_compiled_runtime_cce_preserves_aux_lse_for_gradients():
     _skip_torch_shim()
     from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 464051c97..f65668f21 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -464,6 +464,9 @@ def _forward_chunked_fused_finalize(
 
     n, _ = hidden_compute.shape
     vocab_size = weight_compute.shape[0]
+    if n == 0:
+        empty = mx.zeros((0,), dtype=mx.float32)
+        return empty, empty
     compute_bytes = 2 if hidden_compute.dtype in (mx.float16, mx.bfloat16) else 4
     chunk_size = _resolve_chunk_size(
         chunk_size,
@@ -680,6 +683,14 @@ def runtime_cce_loss_vjp(primals, cotangents, outputs):
             hidden_compute = hidden
             weight_compute = weight
             targets32 = targets.astype(mx.int32)
+            if hidden_compute.shape[0] == 0:
+                return (
+                    mx.zeros_like(hidden),
+                    mx.zeros_like(weight),
+                    mx.zeros_like(scales),
+                    mx.zeros_like(biases),
+                    mx.zeros_like(targets),
+                )
             if grad_output is None:
                 grad_output = mx.zeros_like(outputs[0])
             grad_output32 = grad_output.astype(mx.float32)
@@ -805,6 +816,8 @@ def runtime_cce_loss_vjp(primals, cotangents, outputs):
         hidden_compute = hidden
         weight_compute = weight
         targets32 = targets.astype(mx.int32)
+        if hidden_compute.shape[0] == 0:
+            return mx.zeros_like(hidden), mx.zeros_like(weight), mx.zeros_like(targets)
         if grad_output is None:
             grad_output = mx.zeros_like(outputs[0])
         grad_output32 = grad_output.astype(mx.float32)

From 41d5d411b73c7b78475923d4232c7bdc88407f43 Mon Sep 17 00:00:00 2001
From: Lyxot <longyixing331@gmail.com>
Date: Thu, 21 May 2026 00:17:06 +0800
Subject: [PATCH 02/12] fix: poison invalid MLX CCE labels

---
 tests/test_mlx_runtime_cce_compile.py | 85 +++++++++++++++++++++++++++
 unsloth_zoo/mlx/cce/runtime_cce.py    | 25 +++++++-
 2 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py
index e646412d9..cbe8e9d64 100644
--- a/tests/test_mlx_runtime_cce_compile.py
+++ b/tests/test_mlx_runtime_cce_compile.py
@@ -9,6 +9,7 @@
 
 from __future__ import annotations
 
+import math
 import sys
 
 import pytest
@@ -111,6 +112,90 @@ def loss_fn(h):
     assert grad.shape == hidden.shape
     assert mx.sum(mx.abs(grad).astype(mx.float32)).item() == pytest.approx(0.0)
 
+@pytest.mark.parametrize("bad_target", [-1, 32])
+def test_runtime_cce_invalid_labels_poison_loss_and_gradients(bad_target):
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.ones((3, 16), dtype=mx.float32)
+    weight = mx.ones((32, 16), dtype=mx.float32)
+    targets = mx.array([0, bad_target, -100], dtype=mx.int32)
+
+    losses = runtime_cce(hidden, weight, targets)
+    mx.eval(losses)
+
+    assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5)
+    assert math.isnan(losses[1].item())
+    assert losses[2].item() == pytest.approx(0.0)
+
+    def loss_fn(h, w):
+        return runtime_cce(h, w, targets).astype(mx.float32).sum()
+
+    loss, grads = mx.value_and_grad(loss_fn, argnums=(0, 1))(hidden, weight)
+    grad_norm = _stable_norm(grads)
+    mx.eval(loss, grad_norm)
+
+    assert math.isnan(loss.item())
+    assert math.isnan(grad_norm.item())
+
+
+def test_compiled_runtime_cce_invalid_labels_poison_loss():
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.ones((2, 16), dtype=mx.float32)
+    weight = mx.ones((32, 16), dtype=mx.float32)
+    targets = mx.array([0, 32], dtype=mx.int32)
+
+    def losses_fn(h, w, t):
+        return runtime_cce(h, w, t)
+
+    losses = mx.compile(losses_fn)(hidden, weight, targets)
+    mx.eval(losses)
+
+    assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5)
+    assert math.isnan(losses[1].item())
+
+
+def test_quantized_runtime_cce_invalid_labels_poison_loss():
+    _skip_torch_shim()
+    import mlx.nn as nn
+
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    linear = nn.Linear(32, 32, bias=False)
+    linear.weight = mx.ones((32, 32), dtype=mx.float32)
+    qlinear = nn.QuantizedLinear.from_linear(linear, group_size=32, bits=4)
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+        quantized=True,
+        group_size=qlinear.group_size,
+        bits=qlinear.bits,
+    )
+    hidden = mx.ones((2, 32), dtype=mx.float32)
+    targets = mx.array([0, 32], dtype=mx.int32)
+
+    losses = runtime_cce(
+        hidden,
+        qlinear.weight,
+        qlinear.scales,
+        qlinear.biases,
+        targets,
+    )
+    mx.eval(losses)
+
+    assert losses[0].item() == pytest.approx(math.log(32.0), rel=1e-5)
+    assert math.isnan(losses[1].item())
+
 def test_compiled_runtime_cce_preserves_aux_lse_for_gradients():
     _skip_torch_shim()
     from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index f65668f21..99081e5fe 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -115,6 +115,24 @@ def _apply_softcap(logits: mx.array, logit_softcap: float) -> mx.array:
     return softcap * mx.tanh(logits / softcap)
 
 
+def _target_validity_masks(
+    targets: mx.array,
+    vocab_size: int,
+    ignore_index: int,
+) -> tuple[mx.array, mx.array]:
+    in_vocab = (targets >= 0) & (targets < vocab_size)
+    not_ignored = targets != ignore_index
+    return not_ignored & in_vocab, not_ignored & ~in_vocab
+
+
+def _poison_invalid_targets(values: mx.array, invalid: mx.array) -> mx.array:
+    return mx.where(
+        invalid,
+        mx.full(values.shape, float("nan"), dtype=values.dtype),
+        values,
+    )
+
+
 def _chunk_matmul(
     x: mx.array,
     weight: mx.array,
@@ -509,8 +527,10 @@ def _forward_chunked_fused_finalize(
             target_logit = mx.where(in_chunk, chunk_target, target_logit)
 
         lse = running_max + mx.log(running_sum_exp + 1e-9)
-        valid = targets != ignore_index
+        valid, invalid = _target_validity_masks(targets, vocab_size, ignore_index)
         loss = mx.where(valid, lse - target_logit, mx.zeros_like(lse))
+        loss = _poison_invalid_targets(loss, invalid)
+        lse = _poison_invalid_targets(lse, invalid)
         return loss, lse
 
     ignore_arr = mx.array([ignore_index], dtype=mx.int32)
@@ -551,6 +571,9 @@ def _forward_chunked_fused_finalize(
                 grid=(n * 256, 1, 1),
                 threadgroup=(256, 1, 1),
             )
+            _, invalid = _target_validity_masks(targets, vocab_size, ignore_index)
+            loss = _poison_invalid_targets(loss, invalid)
+            lse = _poison_invalid_targets(lse, invalid)
             return loss, lse
 
         running_max, running_sum_exp, target_logit = forward_update_kernel(

From 51fc49ec2c73e34e918fbf1af98872234d3d569c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 13:54:30 +0000
Subject: [PATCH 03/12] Tighten CCE invalid-target guards and broaden compile
 coverage for PR #682

Three follow-ups from review feedback:

1. _poison_invalid_targets used mx.full(values.shape, NaN, ...) which
   allocates an O(n) tensor on every forward call, even when the invalid
   mask is all False. Replaced with a scalar NaN broadcast through
   mx.where so the normal path costs nothing extra.

2. The zero-token forward early-return now raises ValueError when
   hidden has zero rows but targets is non-empty. The previous version
   silently returned empty loss/LSE which masked an upstream shape
   mismatch instead of surfacing it.

3. The compiled invalid-label regression test now parametrizes over
   bad_target in [-1, vocab_size]. Negative and out-of-range labels
   take different lookup paths under mx.compile, so single-sided
   coverage was insufficient. Added a matching test for the new
   ValueError on hidden=0 with non-empty targets.
---
 tests/test_mlx_runtime_cce_compile.py | 24 ++++++++++++++++++++++--
 unsloth_zoo/mlx/cce/runtime_cce.py    | 13 ++++++++-----
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py
index cbe8e9d64..72a13f12f 100644
--- a/tests/test_mlx_runtime_cce_compile.py
+++ b/tests/test_mlx_runtime_cce_compile.py
@@ -39,6 +39,24 @@ def _skip_torch_shim():
         pytest.skip("requires real MLX runtime")
 
 
+def test_runtime_cce_zero_tokens_with_non_empty_targets_raises():
+    # hidden=0 but targets!=0 indicates an upstream shape mismatch we
+    # want to surface, not silently drop labels.
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.zeros((0, 16), dtype=mx.float32)
+    weight = mx.zeros((32, 16), dtype=mx.float32)
+    targets = mx.array([0, 1, 2], dtype=mx.int32)
+
+    with pytest.raises(ValueError, match="hidden has 0 tokens"):
+        runtime_cce(hidden, weight, targets)
+
+
 def test_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients():
     _skip_torch_shim()
     from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
@@ -143,7 +161,9 @@ def loss_fn(h, w):
     assert math.isnan(grad_norm.item())
 
 
-def test_compiled_runtime_cce_invalid_labels_poison_loss():
+@pytest.mark.parametrize("bad_target", [-1, 32])
+def test_compiled_runtime_cce_invalid_labels_poison_loss(bad_target):
+    # cover both negative and >= vocab_size labels under mx.compile.
     _skip_torch_shim()
     from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
 
@@ -153,7 +173,7 @@ def test_compiled_runtime_cce_invalid_labels_poison_loss():
     )
     hidden = mx.ones((2, 16), dtype=mx.float32)
     weight = mx.ones((32, 16), dtype=mx.float32)
-    targets = mx.array([0, 32], dtype=mx.int32)
+    targets = mx.array([0, bad_target], dtype=mx.int32)
 
     def losses_fn(h, w, t):
         return runtime_cce(h, w, t)
diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 99081e5fe..446c8e282 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -126,11 +126,8 @@ def _target_validity_masks(
 
 
 def _poison_invalid_targets(values: mx.array, invalid: mx.array) -> mx.array:
-    return mx.where(
-        invalid,
-        mx.full(values.shape, float("nan"), dtype=values.dtype),
-        values,
-    )
+    # scalar NaN broadcasts; avoids O(n) alloc on every forward call.
+    return mx.where(invalid, mx.array(float("nan"), dtype=values.dtype), values)
 
 
 def _chunk_matmul(
@@ -483,6 +480,12 @@ def _forward_chunked_fused_finalize(
     n, _ = hidden_compute.shape
     vocab_size = weight_compute.shape[0]
     if n == 0:
+        # surface upstream shape mismatch instead of silently dropping labels.
+        if targets.shape[0] != 0:
+            raise ValueError(
+                "MLX CCE: hidden has 0 tokens but targets is non-empty "
+                f"(targets.shape={targets.shape})."
+            )
         empty = mx.zeros((0,), dtype=mx.float32)
         return empty, empty
     compute_bytes = 2 if hidden_compute.dtype in (mx.float16, mx.bfloat16) else 4

From 037e63b559082f6f65636c00415689ff16be7321 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 14:10:29 +0000
Subject: [PATCH 04/12] Revert scalar NaN broadcast in _poison_invalid_targets
 for PR #682

The scalar mx.array(float('nan'), ...) broadcast got baked into the
Metal kernel as the literal token 'nan', which the Metal C++
tokenizer rejects ('use of undeclared identifier nan'). Restoring
the original mx.full(values.shape, ...) form keeps the allocation
but preserves Metal-kernel compilation. The two-sided compile-test
parametrize and the hidden=0/targets!=0 ValueError defensive check
remain.
---
 unsloth_zoo/mlx/cce/runtime_cce.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 446c8e282..45d649738 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -126,8 +126,14 @@ def _target_validity_masks(
 
 
 def _poison_invalid_targets(values: mx.array, invalid: mx.array) -> mx.array:
-    # scalar NaN broadcasts; avoids O(n) alloc on every forward call.
-    return mx.where(invalid, mx.array(float("nan"), dtype=values.dtype), values)
+    # mx.full produces a real tensor; a 0-d scalar gets baked into the
+    # Metal kernel as the literal token `nan` which the Metal C++
+    # tokenizer rejects (use of undeclared identifier 'nan').
+    return mx.where(
+        invalid,
+        mx.full(values.shape, float("nan"), dtype=values.dtype),
+        values,
+    )
 
 
 def _chunk_matmul(

From c72840a88e2b974df3b610bdc4214dc62957e0f5 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 14:30:45 +0000
Subject: [PATCH 05/12] Scrub .github/workflows for staging push (matches
 staging base)

---
 .github/workflows/consolidated-tests-ci.yml | 255 --------------------
 .github/workflows/lint-ci.yml               | 122 ----------
 .github/workflows/mlx-ci.yml                |  70 ------
 .github/workflows/security-audit.yml        | 226 -----------------
 .github/workflows/stale.yml                 |  37 ---
 .github/workflows/studio-export-fix-ci.yml  |  62 -----
 .github/workflows/wheel-smoke.yml           | 118 ---------
 7 files changed, 890 deletions(-)
 delete mode 100644 .github/workflows/consolidated-tests-ci.yml
 delete mode 100644 .github/workflows/lint-ci.yml
 delete mode 100644 .github/workflows/mlx-ci.yml
 delete mode 100644 .github/workflows/security-audit.yml
 delete mode 100644 .github/workflows/stale.yml
 delete mode 100644 .github/workflows/studio-export-fix-ci.yml
 delete mode 100644 .github/workflows/wheel-smoke.yml

diff --git a/.github/workflows/consolidated-tests-ci.yml b/.github/workflows/consolidated-tests-ci.yml
deleted file mode 100644
index 6ab589c20..000000000
--- a/.github/workflows/consolidated-tests-ci.yml
+++ /dev/null
@@ -1,255 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Python compatibility + repo test gate. Adapted from unsloth's consolidated-tests-ci.yml.
-# Jobs: python-version-collect (pytest --collect-only on 3.10-3.13), repo-tests-cpu
-# (tests/security HARD GATE + CPU-pure zoo tests), core-upstream-matrix (HF/TRL/peft
-# drift detector across 3 cells -- the high-value zoo coverage).
-
-name: Tests CI
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  # Python compatibility: pytest --collect-only per interpreter.
-  python-version-collect:
-    name: (Python ${{ matrix.python-version }})
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: ${{ matrix.python-version }}
-          cache: 'pip'
-
-      - name: Install CPU-only torch + zoo runtime deps
-        # CPU index avoids the multi-GB CUDA wheel set. `--no-deps unsloth`
-        # satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-        run: |
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0"
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          pip install pytest==9.0.3
-
-      - name: pytest --collect-only
-        continue-on-error: true
-        run: python -m pytest tests/ --collect-only -q
-
-  # CPU-only repo tests. HARD GATE on tests/security.
-  repo-tests-cpu:
-    name: Repo tests (CPU)
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install runtime + test deps
-        # --no-deps unsloth satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-        run: |
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0"
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          pip install pytest==9.0.3 pyyaml==6.0.2
-
-      - name: pytest tests/security (HARD GATE)
-        run: python -m pytest tests/security -v
-
-      - name: pytest tests/test_pr_a_imports + zoo-specific CPU tests
-        # Run as SEPARATE pytest invocation: tests/security/conftest.py installs a
-        # session-scoped network_blocker autouse fixture that would otherwise block
-        # test_pypi_version_sync from reaching pypi.org.
-        continue-on-error: true
-        run: |
-          python -m pytest \
-            tests/test_pr_a_imports.py \
-            tests/test_rl_replacements_cpu.py \
-            tests/test_temporary_patches_imports.py \
-            tests/test_zoo_history_regressions.py \
-            tests/test_pypi_version_sync.py \
-            -v
-
-  # Core (HF/TRL/peft) drift matrix. Three cells: HF=4.57.6+TRL<1, HF=latest+TRL=latest,
-  # and pyproject defaults. fail-fast=false; drift in one cell shouldn't cancel others.
-  core-upstream-matrix:
-    name: "Core (${{ matrix.combo.label }})"
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        combo:
-          - id: t4576-trl0latest
-            label: "HF=4.57.6 + TRL<1"
-            transformers_spec: "transformers==4.57.6"
-            trl_spec: "trl>=0.18.2,<1.0.0"
-            peft_spec: "peft>=0.18,<0.20"
-          - id: tlatest5-trl1latest
-            label: "HF=latest + TRL=latest"
-            transformers_spec: "transformers>=5,<6"
-            trl_spec: "trl>=1,<2"
-            peft_spec: "peft"
-          - id: pyproject
-            label: "HF=default + TRL=default"
-            transformers_spec: "__from_pyproject__"
-            trl_spec: "__from_pyproject__"
-            peft_spec: "__from_pyproject__"
-    env:
-      MATRIX_TRANSFORMERS_SPEC: ${{ matrix.combo.transformers_spec }}
-      MATRIX_TRL_SPEC: ${{ matrix.combo.trl_spec }}
-      MATRIX_PEFT_SPEC: ${{ matrix.combo.peft_spec }}
-      MATRIX_COMBO_ID: ${{ matrix.combo.id }}
-      # Pure-Python protobuf parser; transformers' bundled *_pb2.py is rejected by C++ protobuf 4+/5+.
-      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
-      UNSLOTH_COMPILE_DISABLE: '1'
-      # Secondary handshake after find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-      UNSLOTH_IS_PRESENT: '1'
-    steps:
-      - name: Harden runner (audit)
-        # audit (not block): matrix pulls arbitrary transformers/TRL/peft pins.
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Resolve matrix specs (handle __from_pyproject__ sentinel)
-        # Resolve transformers/trl/peft from pyproject.toml when the sentinel is used.
-        run: |
-          set -euxo pipefail
-          python <<'PY' >> "$GITHUB_ENV"
-          import os, re, tomllib
-          spec_t = os.environ["MATRIX_TRANSFORMERS_SPEC"]
-          spec_r = os.environ["MATRIX_TRL_SPEC"]
-          spec_p = os.environ["MATRIX_PEFT_SPEC"]
-
-          def _pkg_name(spec: str) -> str:
-              m = re.match(r"\s*([A-Za-z0-9_.-]+)", spec)
-              return (m.group(1).lower() if m else "")
-
-          if "__from_pyproject__" in (spec_t, spec_r, spec_p):
-              with open("pyproject.toml", "rb") as f:
-                  doc = tomllib.load(f)
-              proj = doc.get("project", {})
-              all_deps: list[str] = list(proj.get("dependencies", []))
-              for _name, dep_list in proj.get("optional-dependencies", {}).items():
-                  all_deps.extend(dep_list)
-
-              # Strip environment markers so the resolved spec is pip-installable.
-              def _strip_marker(s: str) -> str:
-                  return s.split(";", 1)[0].strip()
-
-              if spec_t == "__from_pyproject__":
-                  spec_t = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "transformers"),
-                                "transformers")
-              if spec_r == "__from_pyproject__":
-                  spec_r = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "trl"),
-                                "trl")
-              if spec_p == "__from_pyproject__":
-                  spec_p = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "peft"),
-                                "peft")
-          print(f"RESOLVED_TRANSFORMERS_SPEC={spec_t}")
-          print(f"RESOLVED_TRL_SPEC={spec_r}")
-          print(f"RESOLVED_PEFT_SPEC={spec_p}")
-          PY
-          grep RESOLVED_ "$GITHUB_ENV" || true
-
-      - name: Install torch CPU + zoo + matrix-specified upstream libs
-        # Two-phase: `-e .[core]` for pyproject defaults, then `-U <RESOLVED_*>` to override.
-        # The -U is critical so pip will downgrade transformers (e.g. cell-1 pin 4.57.6).
-        # --no-deps unsloth satisfies the find_spec guard at unsloth_zoo/__init__.py:128.
-        run: |
-          set -euxo pipefail
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0" "torchvision<0.26"
-          # torchvision: transitive import of transformers.models.qwen2_vl
-          # / qwen2_5_vl image processors. The Qwen2_VL image-processor
-          # zoo references chains through `from torchvision...` at module
-          # top, so a missing torchvision turns the existence-probe drift
-          # tests RED on "ModuleNotFoundError: No module named 'torchvision'".
-          # CPU build is plenty; we don't need the CUDA variant.
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          # Override with matrix-resolved specs.
-          pip install -U "$RESOLVED_TRANSFORMERS_SPEC" "$RESOLVED_TRL_SPEC" "$RESOLVED_PEFT_SPEC"
-          # bitsandbytes: imported at module scope in saving_utils.py (_active_merge_device path).
-          pip install 'bitsandbytes>=0.45'
-          # IPython + ipywidgets: logging_utils.py:50 imports transformers.utils.notebook.
-          # Required so drift detector only fires on real drift, not missing CI deps.
-          pip install 'ipython>=8' 'ipywidgets>=8'
-          pip install pytest==9.0.3 packaging
-          echo "::group::Installed transformers + trl + peft + torch versions"
-          pip show transformers
-          pip show trl
-          pip show peft
-          pip show torch
-          echo "::endgroup::"
-
-      - name: pytest upstream-regression suite (94 pinned + 117 expanded)
-        # 626 drift-detector tests / cell across 12 files. HARD GATE: a red cell
-        # means real upstream drift (transformers/trl/peft/vllm/datasets renamed
-        # or removed a symbol zoo references). Zoo PRs #4 through #635 mined.
-        run: |
-          python -m pytest -v --tb=short -rs \
-            tests/test_upstream_pinned_symbols_transformers.py \
-            tests/test_upstream_pinned_symbols_trl_vllm.py \
-            tests/test_upstream_pinned_symbols_accelerator.py \
-            tests/test_zoo_history_regressions_deep.py \
-            tests/test_upstream_import_fixes_drift.py \
-            tests/test_zoo_source_upstream_refs.py \
-            tests/test_upstream_signatures.py \
-            tests/test_extended_dep_api_pins.py \
-            tests/test_upstream_source_patterns.py \
-            tests/test_compiler_rewriter_exhaustive.py \
-            tests/test_compiler_dynamic_exec.py \
-            tests/test_temporary_patches_exhaustive.py \
-            tests/test_unsloth_zoo_lora_merge.py \
-            tests/test_peft_paramwrapper_layout_drift.py \
-            tests/test_transformers_moe_structure_drift.py \
-            tests/test_moe_merge_e2e_cpu.py
diff --git a/.github/workflows/lint-ci.yml b/.github/workflows/lint-ci.yml
deleted file mode 100644
index 75446a499..000000000
--- a/.github/workflows/lint-ci.yml
+++ /dev/null
@@ -1,122 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Whole-repo Python source-lint gate. Adapted from unsloth's lint-ci.yml:
-# Python (compileall + narrow ruff) + YAML/JSON round-trip. Dropped vs unsloth:
-# shell lint (zoo has no committed *.sh), TypeScript/Rust (Studio/Tauri are unsloth-side).
-
-name: Lint CI
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  source-lint:
-    name: Source lint (Python + YAML + JSON)
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - run: pip install 'ruff==0.15.12' 'pyyaml>=6'
-
-      - name: Python AST/syntax check (every committed .py must compile)
-        # continue-on-error during CI bootstrap: pyproject.toml declares
-        # `requires-python = ">=3.9,<3.15"` but temporary_patches/gpt_oss.py
-        # uses a 3.10+ `match` statement. Tracked as a separate cleanup PR.
-        continue-on-error: true
-        run: |
-          python -m compileall -q -j 0 unsloth_zoo tests scripts
-
-      - name: Python ruff check (narrow gate)
-        # E9 / F63 / F7 / F82: syntax errors, broken comparisons, undefined names.
-        # continue-on-error during CI bootstrap: first run on main surfaced 13
-        # latent findings (rl_replacements.py L1128 F821, gpt_oss match-on-3.9).
-        continue-on-error: true
-        run: |
-          ruff check --select E9,F63,F7,F82 unsloth_zoo tests scripts
-
-      - name: No leftover debugger / pdb / breakpoint calls
-        # Catches `import pdb`, `pdb.set_trace()`, `breakpoint()`, `import ipdb`.
-        # continue-on-error during bootstrap: rl_replacements.py has a
-        # `#breakpoint()` comment the regex matches (# is [^A-Za-z_]).
-        continue-on-error: true
-        run: |
-          set -e
-          if grep -rnE '(^|[^A-Za-z_])(pdb\.set_trace|breakpoint)\(|^import (pdb|ipdb)$|^from (pdb|ipdb) import' \
-              --include='*.py' unsloth_zoo scripts; then
-            echo "::error::Leftover debugger call found above. Remove it." >&2
-            exit 1
-          fi
-
-      - name: YAML round-trip for every committed YAML
-        run: |
-          python <<'PY'
-          import pathlib, sys, yaml
-          fails = []
-          for p in pathlib.Path(".").rglob("*.yml"):
-              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
-                  continue
-              try:
-                  yaml.safe_load(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          for p in pathlib.Path(".").rglob("*.yaml"):
-              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
-                  continue
-              try:
-                  yaml.safe_load(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          if fails:
-              for f in fails:
-                  print("::error::", f)
-              sys.exit(1)
-          print(f"YAML round-trip OK")
-          PY
-
-      - name: JSON round-trip for every committed JSON
-        run: |
-          python <<'PY'
-          import pathlib, json, sys
-          fails = []
-          for p in pathlib.Path(".").rglob("*.json"):
-              if any(part in (".git", "node_modules", "__pycache__", "build", "dist") for part in p.parts):
-                  continue
-              try:
-                  json.loads(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          if fails:
-              for f in fails:
-                  print("::error::", f)
-              sys.exit(1)
-          print("JSON round-trip OK")
-          PY
-
-      - name: enforce kwargs spacing
-        # Style rule mirrored from unsloth: kwargs use `name = value` not `name=value`.
-        continue-on-error: true
-        run: |
-          python3 scripts/enforce_kwargs_spacing.py unsloth_zoo
diff --git a/.github/workflows/mlx-ci.yml b/.github/workflows/mlx-ci.yml
deleted file mode 100644
index 3df8be9d9..000000000
--- a/.github/workflows/mlx-ci.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# MLX-specific CI on macOS arm64 (Apple Silicon) so mlx / mlx-lm / mlx-vlm wheels
-# resolve. Installs `unsloth_zoo[mlx]`, smoke-imports unsloth_zoo/mlx_*.py modules,
-# runs tests/test_mlx_torch_shim_smoke.py. Opt-in via `mlx` label to save macOS minutes.
-
-name: MLX CI on Mac M1
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, labeled]
-  workflow_dispatch:
-  schedule:
-    # Daily @ 04:23 UTC -- off the security-audit cron rush at 04:13.
-    - cron: '23 4 * * *'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  mlx-smoke:
-    name: MLX install + import smoke (Apple Silicon)
-    # Opt-in: schedule / workflow_dispatch always run; PR runs only with `mlx` label.
-    if: >-
-      github.event_name == 'schedule' ||
-      github.event_name == 'workflow_dispatch' ||
-      contains(github.event.pull_request.labels.*.name, 'mlx')
-    runs-on: macos-14   # Apple Silicon (M1) hosted runner
-    timeout-minutes: 30
-    steps:
-      # harden-runner block-mode is Linux-only; stay in audit on macOS for parity.
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install zoo with MLX extras
-        # pyproject gates MLX deps on darwin+arm64; `.[mlx]` picks them up
-        # without the torch-on-Linux-CUDA path.
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e .[mlx]
-          pip install pytest==9.0.3
-
-      - name: MLX module import smoke
-        run: |
-          python -c "import unsloth_zoo.mlx_loader; print('mlx_loader OK')"
-          python -c "import unsloth_zoo.mlx_compile; print('mlx_compile OK')"
-          python -c "import unsloth_zoo.mlx_utils; print('mlx_utils OK')"
-          python -c "import unsloth_zoo.mlx_trainer; print('mlx_trainer OK')"
-          python -c "import unsloth_zoo.mlx_cce; print('mlx_cce OK')"
-
-      - name: tests/test_mlx_torch_shim_smoke.py
-        # Exercises the MLX-on-torch shim end-to-end against the real mlx runtime
-        # on Apple Silicon; on Linux runners it would run against tests/mlx_simulation/ stubs.
-        run: python -m pytest tests/test_mlx_torch_shim_smoke.py -v
diff --git a/.github/workflows/security-audit.yml b/.github/workflows/security-audit.yml
deleted file mode 100644
index 28a73eed0..000000000
--- a/.github/workflows/security-audit.yml
+++ /dev/null
@@ -1,226 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Pure-Python supply-chain audit for unsloth_zoo. Mirrors unslothai/unsloth's
-# security-audit.yml with npm/Cargo/Studio jobs stripped (zoo is pure Python).
-# Jobs: advisory-audit (pip-audit + trufflehog), pip-scan-packages (transitive
-# closure pattern scan), workflow-trigger-lint, tests-security (HARD GATE).
-
-name: Security audit
-
-on:
-  pull_request:
-    paths:
-      - 'pyproject.toml'
-      - 'scripts/scan_packages.py'
-      - 'scripts/lint_workflow_triggers.py'
-      - 'tests/security/**'
-      - '.github/workflows/security-audit.yml'
-  push:
-    branches: [main]
-  schedule:
-    - cron: '13 4 * * *'   # 04:13 UTC daily, off the cron rush
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  # Advisory-DB audit: pip-audit + trufflehog. Non-blocking while baseline settles.
-  advisory-audit:
-    name: advisory audit (pip + secrets)
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          fetch-depth: 0   # trufflehog needs full history for diff scans
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install pip-audit
-        run: python -m pip install --upgrade pip pip-audit
-
-      - name: Build filtered requirements set
-        # Reads pyproject.toml deps + extras into a flat requirements file.
-        # git+ specs are skipped (advisory-DB can't resolve them).
-        run: |
-          mkdir -p audit-reqs
-          python <<'PY' > audit-reqs/zoo-deps.txt
-          import tomllib
-          with open("pyproject.toml", "rb") as f:
-              d = tomllib.load(f)
-          core = d["project"]["dependencies"]
-          all_extras = []
-          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
-              # Skip self-referential extras like "huggingface = ['unsloth_zoo[core]']".
-              all_extras += [s for s in specs if "unsloth_zoo" not in s]
-          print("# Auto-generated from pyproject.toml by security-audit.yml.")
-          for spec in core + all_extras:
-              if "git+" in spec:
-                  print(f"# [security-audit] skipped git+ spec: {spec}")
-                  continue
-              print(spec)
-          PY
-
-      - name: pip-audit (advisory DB lookup)
-        continue-on-error: true
-        run: pip-audit --requirement audit-reqs/zoo-deps.txt --disable-pip --strict || true
-
-      - name: Trufflehog secret scan
-        continue-on-error: true
-        uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26  # v3.95.2
-        with:
-          base: ${{ github.event.repository.default_branch }}
-          head: HEAD
-          extra_args: --only-verified
-
-  # pip-scan-packages: downloads every PyPI archive in zoo's transitive closure and
-  # pattern-scans (catches the malicious-upload class that precedes CVE publication).
-  pip-scan-packages:
-    name: pip scan-packages (zoo transitive closure)
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install scan_packages.py runtime deps
-        # requests + packaging for PyPI's JSON API. Scanned packages are
-        # downloaded raw and inspected, never `pip install`-ed.
-        run: python -m pip install --upgrade pip requests packaging
-
-      - name: Build filtered requirements set
-        run: |
-          mkdir -p audit-reqs
-          python <<'PY' > audit-reqs/zoo-deps.txt
-          import tomllib
-          with open("pyproject.toml", "rb") as f:
-              d = tomllib.load(f)
-          core = d["project"]["dependencies"]
-          all_extras = []
-          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
-              all_extras += [s for s in specs if "unsloth_zoo" not in s]
-          print("# Auto-generated from pyproject.toml by security-audit.yml.")
-          for spec in core + all_extras:
-              if "git+" in spec:
-                  print(f"# [security-audit] skipped git+ spec: {spec}")
-                  continue
-              print(spec)
-          PY
-
-      - name: scan-packages (with deps)
-        continue-on-error: true
-        # --with-deps makes scan transitive. Archives are downloaded and
-        # pattern-scanned WITHOUT installing -- malicious wheels cannot execute.
-        run: python3 scripts/scan_packages.py --requirements audit-reqs/zoo-deps.txt --with-deps
-
-  # workflow-trigger-lint: refuses pull_request_target with PR-head checkout,
-  # restricted workflow_run without justification, and cache-key collisions.
-  workflow-trigger-lint:
-    name: workflow-trigger lint (pull_request_target / cache-poisoning)
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install PyYAML
-        run: pip install pyyaml==6.0.2
-
-      - name: Run workflow-trigger lint
-        run: python3 scripts/lint_workflow_triggers.py
-
-  # HARD GATE: regression tests for scanner + lint scripts. Drift in IOC tables
-  # or scanner exit semantics fails this PR at review time.
-  tests-security:
-    name: pytest tests/security
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install pytest + PyYAML
-        # PyYAML needed by scripts/lint_workflow_triggers.py, exercised via subprocess
-        # by tests/security/test_lint_workflow_triggers.py. (See unsloth PR #5397: without
-        # pyyaml the lint script exits 2.)
-        run: pip install pytest==9.0.3 pyyaml==6.0.2
-
-      - name: Run security regression tests
-        run: python3 -m pytest tests/security -v
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index 1a4cf841d..000000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: 'Inactive Issue Pinger'
-
-on:
-  schedule:
-    - cron: '30 5 * * *' # Runs at 5:30 UTC every day
-
-jobs:
-  stale:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-
-    steps:
-      - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f  # v10.2.0
-        with:
-          # The message to post on stale issues.
-          # This message will ping the issue author.
-          # Note: The stale bot action does not currently support a direct placeholder for the last commenter.
-          # As a workaround, this message encourages any participant to reply.
-          stale-issue-message: >
-            Is this issue still important to you?
-            Apologies in advance we might have missed this issue as well.
-            For faster response times, please post on our Reddit server - https://www.reddit.com/r/unsloth or our Discord - https://discord.com/invite/unsloth 
-
-          # The number of days of inactivity before an issue is considered stale.
-          days-before-issue-stale: 9999
-
-          # Set to -1 to never close stale issues.
-          days-before-issue-close: -1
-
-          # A label to apply to stale issues.
-          stale-issue-label: 'inactive'
-
-          # The number of operations to perform per run to avoid rate limiting.
-          operations-per-run: 500
-
-          enable-statistics: false
diff --git a/.github/workflows/studio-export-fix-ci.yml b/.github/workflows/studio-export-fix-ci.yml
deleted file mode 100644
index 699b78d16..000000000
--- a/.github/workflows/studio-export-fix-ci.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: studio-export-fix-ci
-
-on:
-  push:
-    branches: [main, nightly]
-    paths:
-      - "unsloth_zoo/llama_cpp.py"
-      - "tests/test_quantize_gguf_q2_k_l.py"
-      - "tests/test_convert_hf_to_gguf_patcher.py"
-      - ".github/workflows/studio-export-fix-ci.yml"
-  pull_request:
-    paths:
-      - "unsloth_zoo/llama_cpp.py"
-      - "tests/test_quantize_gguf_q2_k_l.py"
-      - "tests/test_convert_hf_to_gguf_patcher.py"
-      - ".github/workflows/studio-export-fix-ci.yml"
-
-concurrency:
-  group: studio-export-fix-${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  studio-export-fix:
-    name: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      # Cap matrix at 3 in flight so Windows stays under the repo-level
-      # 5-concurrent-Windows-runner limit when this job runs alongside others.
-      max-parallel: 3
-      matrix:
-        os: [ubuntu-latest, macos-14, windows-latest]
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 15
-    env:
-      # 5000/h vs 60/h on raw.githubusercontent.com for the live-upstream tests.
-      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UNSLOTH_COMPILE_DISABLE: '1'
-      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-          cache: pip
-
-      - name: Install minimal test deps
-        run: |
-          python -m pip install --upgrade pip
-          # Pure-Python tests: monkeypatch subprocess + AST-parse upstream files.
-          # No torch / transformers needed. Keep slim so Windows cold start stays under a minute.
-          python -m pip install pytest psutil requests tqdm
-
-      - name: Run patcher + q2_k_l unit tests
-        shell: bash
-        run: |
-          pytest -v \
-            tests/test_quantize_gguf_q2_k_l.py \
-            tests/test_convert_hf_to_gguf_patcher.py
diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml
deleted file mode 100644
index 626e8dccb..000000000
--- a/.github/workflows/wheel-smoke.yml
+++ /dev/null
@@ -1,118 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Build PyPI wheel + sdist, verify content sanity, import-smoke in a clean venv.
-# Adapted from unsloth's wheel-smoke.yml; zoo's content checks: package present,
-# no tests/ shipped, no stray .pyc, real version string, import smoke succeeds.
-
-name: Wheel CI
-
-on:
-  pull_request:
-    paths:
-      - 'pyproject.toml'
-      - 'unsloth_zoo/**'
-      - 'tests/**'
-      - '.github/workflows/wheel-smoke.yml'
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  wheel:
-    name: Wheel build + content sanity + import smoke
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Build wheel + sdist
-        run: |
-          python -m pip install --upgrade pip build
-          rm -rf dist build ./*.egg-info
-          python -m build
-
-      - name: Wheel content sanity
-        run: |
-          python - <<'PY'
-          import zipfile, glob, sys, re
-          wheels = glob.glob("dist/unsloth_zoo-*.whl")
-          if not wheels:
-              print("FAIL: no wheel produced"); sys.exit(2)
-          w = wheels[0]
-          print(f"wheel: {w}")
-          # Version sanity: dynamic metadata pulls from unsloth_zoo.__init__.__version__.
-          m = re.match(r"dist/unsloth_zoo-([^-]+)-py3-none-any\.whl", w)
-          version = m.group(1) if m else None
-          print(f"wheel version: {version}")
-          with zipfile.ZipFile(w) as z:
-              n = z.namelist()
-              # Hard checks: must hold for any zoo release wheel.
-              hard_checks = {
-                "unsloth_zoo/__init__.py shipped":      any(s == "unsloth_zoo/__init__.py" for s in n),
-                "unsloth_zoo/rl_replacements.py shipped": any(s == "unsloth_zoo/rl_replacements.py" for s in n),
-                "unsloth_zoo/temporary_patches/__init__.py shipped": any(s == "unsloth_zoo/temporary_patches/__init__.py" for s in n),
-                "no .pyc files":                        not any(s.endswith(".pyc") for s in n),
-                "no .git tree":                         not any(s.startswith(".git/") for s in n),
-                "version is not 0.0.0":                 version is not None and version != "0.0.0",
-                "METADATA present":                     any(s.endswith(".dist-info/METADATA") for s in n),
-              }
-              # Soft checks (warn only). Zoo's pyproject doesn't exclude tests/scripts;
-              # tightening the packaging config is a separate follow-up.
-              soft_checks = {
-                "no tests/ shipped":                    not any(s.startswith("tests/") for s in n),
-                "no scripts/ shipped":                  not any(s.startswith("scripts/") for s in n),
-              }
-              print("Hard checks:")
-              for k, v in hard_checks.items():
-                  print(f"  [{'PASS' if v else 'FAIL'}] {k}")
-              print()
-              print("Soft checks (warnings):")
-              for k, v in soft_checks.items():
-                  status = "PASS" if v else "WARN"
-                  print(f"  [{status}] {k}")
-              # Exit non-zero ONLY if a hard check failed.
-              sys.exit(0 if all(hard_checks.values()) else 1)
-          PY
-
-      - name: Import smoke (clean venv)
-        # unsloth_zoo/__init__.py:128 raises ImportError when parent `unsloth` is
-        # absent (deliberate guardrail). A bare `import unsloth_zoo` in a wheel-only
-        # venv will fail by design, so the smoke pivots to reading the version
-        # string from dist-info METADATA via importlib.metadata.
-        run: |
-          python -m venv /tmp/v
-          /tmp/v/bin/pip install --upgrade pip
-          /tmp/v/bin/pip install dist/unsloth_zoo-*.whl
-          # Read version from dist-info METADATA via importlib.metadata.
-          WHEEL_VERSION=$(/tmp/v/bin/python -c "
-          from importlib.metadata import version
-          print(version('unsloth_zoo'))
-          ")
-          echo "installed unsloth_zoo version: $WHEEL_VERSION"
-          test -n "$WHEEL_VERSION" && test "$WHEEL_VERSION" != "0.0.0"
-
-      - name: Upload wheel on failure
-        if: failure()
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
-        with:
-          name: unsloth-zoo-wheel
-          path: dist/
-          retention-days: 7

From 5d8a382995cf88c3caf112c0f93da69af9bdedf8 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 15:18:08 +0000
Subject: [PATCH 06/12] Validate target length against hidden token count in
 MLX CCE

Extend the existing n==0 shape-mismatch guard in
_forward_chunked_fused_finalize so any targets.shape[0] != n is surfaced
as a ValueError before chunk planning, fallback indexing, or Metal
kernel launch. Short non-empty targets previously broadcast to the
n-row loss and lse tensors, silently returning wrong-but-finite losses;
longer targets reached kernels that index targets[row] for row in
[0, n) and produced cryptic backend errors. The new check follows the
PR intent of surfacing upstream shape bugs early.
---
 unsloth_zoo/mlx/cce/runtime_cce.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 45d649738..194df35ea 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -494,6 +494,11 @@ def _forward_chunked_fused_finalize(
             )
         empty = mx.zeros((0,), dtype=mx.float32)
         return empty, empty
+    if targets.shape[0] != n:
+        raise ValueError(
+            "MLX CCE: targets length does not match hidden token count "
+            f"(hidden.shape={hidden_compute.shape}, targets.shape={targets.shape})."
+        )
     compute_bytes = 2 if hidden_compute.dtype in (mx.float16, mx.bfloat16) else 4
     chunk_size = _resolve_chunk_size(
         chunk_size,

From 5f700d0bc8ea6fdf3d698ca1fd4b48c4d697f996 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 15:40:00 +0000
Subject: [PATCH 07/12] Sync .github/workflows with upstream author branch

---
 .github/workflows/consolidated-tests-ci.yml | 255 ++++++++++++++++++++
 .github/workflows/lint-ci.yml               | 122 ++++++++++
 .github/workflows/mlx-ci.yml                |  70 ++++++
 .github/workflows/security-audit.yml        | 226 +++++++++++++++++
 .github/workflows/stale.yml                 |  37 +++
 .github/workflows/studio-export-fix-ci.yml  |  62 +++++
 .github/workflows/wheel-smoke.yml           | 118 +++++++++
 7 files changed, 890 insertions(+)
 create mode 100644 .github/workflows/consolidated-tests-ci.yml
 create mode 100644 .github/workflows/lint-ci.yml
 create mode 100644 .github/workflows/mlx-ci.yml
 create mode 100644 .github/workflows/security-audit.yml
 create mode 100644 .github/workflows/stale.yml
 create mode 100644 .github/workflows/studio-export-fix-ci.yml
 create mode 100644 .github/workflows/wheel-smoke.yml

diff --git a/.github/workflows/consolidated-tests-ci.yml b/.github/workflows/consolidated-tests-ci.yml
new file mode 100644
index 000000000..6ab589c20
--- /dev/null
+++ b/.github/workflows/consolidated-tests-ci.yml
@@ -0,0 +1,255 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Python compatibility + repo test gate. Adapted from unsloth's consolidated-tests-ci.yml.
+# Jobs: python-version-collect (pytest --collect-only on 3.10-3.13), repo-tests-cpu
+# (tests/security HARD GATE + CPU-pure zoo tests), core-upstream-matrix (HF/TRL/peft
+# drift detector across 3 cells -- the high-value zoo coverage).
+
+name: Tests CI
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  # Python compatibility: pytest --collect-only per interpreter.
+  python-version-collect:
+    name: (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.10', '3.11', '3.12', '3.13']
+    steps:
+      - name: Harden runner (audit)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+
+      - name: Install CPU-only torch + zoo runtime deps
+        # CPU index avoids the multi-GB CUDA wheel set. `--no-deps unsloth`
+        # satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
+        run: |
+          python -m pip install --upgrade pip
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            "torch>=2.4.0,<2.11.0"
+          pip install -e .[core]
+          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
+          pip install pytest==9.0.3
+
+      - name: pytest --collect-only
+        continue-on-error: true
+        run: python -m pytest tests/ --collect-only -q
+
+  # CPU-only repo tests. HARD GATE on tests/security.
+  repo-tests-cpu:
+    name: Repo tests (CPU)
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - name: Harden runner (audit)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install runtime + test deps
+        # --no-deps unsloth satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
+        run: |
+          python -m pip install --upgrade pip
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            "torch>=2.4.0,<2.11.0"
+          pip install -e .[core]
+          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
+          pip install pytest==9.0.3 pyyaml==6.0.2
+
+      - name: pytest tests/security (HARD GATE)
+        run: python -m pytest tests/security -v
+
+      - name: pytest tests/test_pr_a_imports + zoo-specific CPU tests
+        # Run as SEPARATE pytest invocation: tests/security/conftest.py installs a
+        # session-scoped network_blocker autouse fixture that would otherwise block
+        # test_pypi_version_sync from reaching pypi.org.
+        continue-on-error: true
+        run: |
+          python -m pytest \
+            tests/test_pr_a_imports.py \
+            tests/test_rl_replacements_cpu.py \
+            tests/test_temporary_patches_imports.py \
+            tests/test_zoo_history_regressions.py \
+            tests/test_pypi_version_sync.py \
+            -v
+
+  # Core (HF/TRL/peft) drift matrix. Three cells: HF=4.57.6+TRL<1, HF=latest+TRL=latest,
+  # and pyproject defaults. fail-fast=false; drift in one cell shouldn't cancel others.
+  core-upstream-matrix:
+    name: "Core (${{ matrix.combo.label }})"
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        combo:
+          - id: t4576-trl0latest
+            label: "HF=4.57.6 + TRL<1"
+            transformers_spec: "transformers==4.57.6"
+            trl_spec: "trl>=0.18.2,<1.0.0"
+            peft_spec: "peft>=0.18,<0.20"
+          - id: tlatest5-trl1latest
+            label: "HF=latest + TRL=latest"
+            transformers_spec: "transformers>=5,<6"
+            trl_spec: "trl>=1,<2"
+            peft_spec: "peft"
+          - id: pyproject
+            label: "HF=default + TRL=default"
+            transformers_spec: "__from_pyproject__"
+            trl_spec: "__from_pyproject__"
+            peft_spec: "__from_pyproject__"
+    env:
+      MATRIX_TRANSFORMERS_SPEC: ${{ matrix.combo.transformers_spec }}
+      MATRIX_TRL_SPEC: ${{ matrix.combo.trl_spec }}
+      MATRIX_PEFT_SPEC: ${{ matrix.combo.peft_spec }}
+      MATRIX_COMBO_ID: ${{ matrix.combo.id }}
+      # Pure-Python protobuf parser; transformers' bundled *_pb2.py is rejected by C++ protobuf 4+/5+.
+      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
+      UNSLOTH_COMPILE_DISABLE: '1'
+      # Secondary handshake after find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
+      UNSLOTH_IS_PRESENT: '1'
+    steps:
+      - name: Harden runner (audit)
+        # audit (not block): matrix pulls arbitrary transformers/TRL/peft pins.
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Resolve matrix specs (handle __from_pyproject__ sentinel)
+        # Resolve transformers/trl/peft from pyproject.toml when the sentinel is used.
+        run: |
+          set -euxo pipefail
+          python <<'PY' >> "$GITHUB_ENV"
+          import os, re, tomllib
+          spec_t = os.environ["MATRIX_TRANSFORMERS_SPEC"]
+          spec_r = os.environ["MATRIX_TRL_SPEC"]
+          spec_p = os.environ["MATRIX_PEFT_SPEC"]
+
+          def _pkg_name(spec: str) -> str:
+              m = re.match(r"\s*([A-Za-z0-9_.-]+)", spec)
+              return (m.group(1).lower() if m else "")
+
+          if "__from_pyproject__" in (spec_t, spec_r, spec_p):
+              with open("pyproject.toml", "rb") as f:
+                  doc = tomllib.load(f)
+              proj = doc.get("project", {})
+              all_deps: list[str] = list(proj.get("dependencies", []))
+              for _name, dep_list in proj.get("optional-dependencies", {}).items():
+                  all_deps.extend(dep_list)
+
+              # Strip environment markers so the resolved spec is pip-installable.
+              def _strip_marker(s: str) -> str:
+                  return s.split(";", 1)[0].strip()
+
+              if spec_t == "__from_pyproject__":
+                  spec_t = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "transformers"),
+                                "transformers")
+              if spec_r == "__from_pyproject__":
+                  spec_r = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "trl"),
+                                "trl")
+              if spec_p == "__from_pyproject__":
+                  spec_p = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "peft"),
+                                "peft")
+          print(f"RESOLVED_TRANSFORMERS_SPEC={spec_t}")
+          print(f"RESOLVED_TRL_SPEC={spec_r}")
+          print(f"RESOLVED_PEFT_SPEC={spec_p}")
+          PY
+          grep RESOLVED_ "$GITHUB_ENV" || true
+
+      - name: Install torch CPU + zoo + matrix-specified upstream libs
+        # Two-phase: `-e .[core]` for pyproject defaults, then `-U <RESOLVED_*>` to override.
+        # The -U is critical so pip will downgrade transformers (e.g. cell-1 pin 4.57.6).
+        # --no-deps unsloth satisfies the find_spec guard at unsloth_zoo/__init__.py:128.
+        run: |
+          set -euxo pipefail
+          python -m pip install --upgrade pip
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            "torch>=2.4.0,<2.11.0" "torchvision<0.26"
+          # torchvision: transitive import of transformers.models.qwen2_vl
+          # / qwen2_5_vl image processors. The Qwen2_VL image-processor
+          # zoo references chains through `from torchvision...` at module
+          # top, so a missing torchvision turns the existence-probe drift
+          # tests RED on "ModuleNotFoundError: No module named 'torchvision'".
+          # CPU build is plenty; we don't need the CUDA variant.
+          pip install -e .[core]
+          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
+          # Override with matrix-resolved specs.
+          pip install -U "$RESOLVED_TRANSFORMERS_SPEC" "$RESOLVED_TRL_SPEC" "$RESOLVED_PEFT_SPEC"
+          # bitsandbytes: imported at module scope in saving_utils.py (_active_merge_device path).
+          pip install 'bitsandbytes>=0.45'
+          # IPython + ipywidgets: logging_utils.py:50 imports transformers.utils.notebook.
+          # Required so drift detector only fires on real drift, not missing CI deps.
+          pip install 'ipython>=8' 'ipywidgets>=8'
+          pip install pytest==9.0.3 packaging
+          echo "::group::Installed transformers + trl + peft + torch versions"
+          pip show transformers
+          pip show trl
+          pip show peft
+          pip show torch
+          echo "::endgroup::"
+
+      - name: pytest upstream-regression suite (94 pinned + 117 expanded)
+        # 626 drift-detector tests / cell across 12 files. HARD GATE: a red cell
+        # means real upstream drift (transformers/trl/peft/vllm/datasets renamed
+        # or removed a symbol zoo references). Zoo PRs #4 through #635 mined.
+        run: |
+          python -m pytest -v --tb=short -rs \
+            tests/test_upstream_pinned_symbols_transformers.py \
+            tests/test_upstream_pinned_symbols_trl_vllm.py \
+            tests/test_upstream_pinned_symbols_accelerator.py \
+            tests/test_zoo_history_regressions_deep.py \
+            tests/test_upstream_import_fixes_drift.py \
+            tests/test_zoo_source_upstream_refs.py \
+            tests/test_upstream_signatures.py \
+            tests/test_extended_dep_api_pins.py \
+            tests/test_upstream_source_patterns.py \
+            tests/test_compiler_rewriter_exhaustive.py \
+            tests/test_compiler_dynamic_exec.py \
+            tests/test_temporary_patches_exhaustive.py \
+            tests/test_unsloth_zoo_lora_merge.py \
+            tests/test_peft_paramwrapper_layout_drift.py \
+            tests/test_transformers_moe_structure_drift.py \
+            tests/test_moe_merge_e2e_cpu.py
diff --git a/.github/workflows/lint-ci.yml b/.github/workflows/lint-ci.yml
new file mode 100644
index 000000000..75446a499
--- /dev/null
+++ b/.github/workflows/lint-ci.yml
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Whole-repo Python source-lint gate. Adapted from unsloth's lint-ci.yml:
+# Python (compileall + narrow ruff) + YAML/JSON round-trip. Dropped vs unsloth:
+# shell lint (zoo has no committed *.sh), TypeScript/Rust (Studio/Tauri are unsloth-side).
+
+name: Lint CI
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  source-lint:
+    name: Source lint (Python + YAML + JSON)
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Harden runner (audit)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - run: pip install 'ruff==0.15.12' 'pyyaml>=6'
+
+      - name: Python AST/syntax check (every committed .py must compile)
+        # continue-on-error during CI bootstrap: pyproject.toml declares
+        # `requires-python = ">=3.9,<3.15"` but temporary_patches/gpt_oss.py
+        # uses a 3.10+ `match` statement. Tracked as a separate cleanup PR.
+        continue-on-error: true
+        run: |
+          python -m compileall -q -j 0 unsloth_zoo tests scripts
+
+      - name: Python ruff check (narrow gate)
+        # E9 / F63 / F7 / F82: syntax errors, broken comparisons, undefined names.
+        # continue-on-error during CI bootstrap: first run on main surfaced 13
+        # latent findings (rl_replacements.py L1128 F821, gpt_oss match-on-3.9).
+        continue-on-error: true
+        run: |
+          ruff check --select E9,F63,F7,F82 unsloth_zoo tests scripts
+
+      - name: No leftover debugger / pdb / breakpoint calls
+        # Catches `import pdb`, `pdb.set_trace()`, `breakpoint()`, `import ipdb`.
+        # continue-on-error during bootstrap: rl_replacements.py has a
+        # `#breakpoint()` comment the regex matches (# is [^A-Za-z_]).
+        continue-on-error: true
+        run: |
+          set -e
+          if grep -rnE '(^|[^A-Za-z_])(pdb\.set_trace|breakpoint)\(|^import (pdb|ipdb)$|^from (pdb|ipdb) import' \
+              --include='*.py' unsloth_zoo scripts; then
+            echo "::error::Leftover debugger call found above. Remove it." >&2
+            exit 1
+          fi
+
+      - name: YAML round-trip for every committed YAML
+        run: |
+          python <<'PY'
+          import pathlib, sys, yaml
+          fails = []
+          for p in pathlib.Path(".").rglob("*.yml"):
+              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
+                  continue
+              try:
+                  yaml.safe_load(p.read_text())
+              except Exception as exc:
+                  fails.append(f"{p}: {exc}")
+          for p in pathlib.Path(".").rglob("*.yaml"):
+              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
+                  continue
+              try:
+                  yaml.safe_load(p.read_text())
+              except Exception as exc:
+                  fails.append(f"{p}: {exc}")
+          if fails:
+              for f in fails:
+                  print("::error::", f)
+              sys.exit(1)
+          print(f"YAML round-trip OK")
+          PY
+
+      - name: JSON round-trip for every committed JSON
+        run: |
+          python <<'PY'
+          import pathlib, json, sys
+          fails = []
+          for p in pathlib.Path(".").rglob("*.json"):
+              if any(part in (".git", "node_modules", "__pycache__", "build", "dist") for part in p.parts):
+                  continue
+              try:
+                  json.loads(p.read_text())
+              except Exception as exc:
+                  fails.append(f"{p}: {exc}")
+          if fails:
+              for f in fails:
+                  print("::error::", f)
+              sys.exit(1)
+          print("JSON round-trip OK")
+          PY
+
+      - name: enforce kwargs spacing
+        # Style rule mirrored from unsloth: kwargs use `name = value` not `name=value`.
+        continue-on-error: true
+        run: |
+          python3 scripts/enforce_kwargs_spacing.py unsloth_zoo
diff --git a/.github/workflows/mlx-ci.yml b/.github/workflows/mlx-ci.yml
new file mode 100644
index 000000000..3df8be9d9
--- /dev/null
+++ b/.github/workflows/mlx-ci.yml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# MLX-specific CI on macOS arm64 (Apple Silicon) so mlx / mlx-lm / mlx-vlm wheels
+# resolve. Installs `unsloth_zoo[mlx]`, smoke-imports unsloth_zoo/mlx_*.py modules,
+# runs tests/test_mlx_torch_shim_smoke.py. Opt-in via `mlx` label to save macOS minutes.
+
+name: MLX CI on Mac M1
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, labeled]
+  workflow_dispatch:
+  schedule:
+    # Daily @ 04:23 UTC -- off the security-audit cron rush at 04:13.
+    - cron: '23 4 * * *'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  mlx-smoke:
+    name: MLX install + import smoke (Apple Silicon)
+    # Opt-in: schedule / workflow_dispatch always run; PR runs only with `mlx` label.
+    if: >-
+      github.event_name == 'schedule' ||
+      github.event_name == 'workflow_dispatch' ||
+      contains(github.event.pull_request.labels.*.name, 'mlx')
+    runs-on: macos-14   # Apple Silicon (M1) hosted runner
+    timeout-minutes: 30
+    steps:
+      # harden-runner block-mode is Linux-only; stay in audit on macOS for parity.
+      - name: Harden runner (audit)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install zoo with MLX extras
+        # pyproject gates MLX deps on darwin+arm64; `.[mlx]` picks them up
+        # without the torch-on-Linux-CUDA path.
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[mlx]
+          pip install pytest==9.0.3
+
+      - name: MLX module import smoke
+        run: |
+          python -c "import unsloth_zoo.mlx_loader; print('mlx_loader OK')"
+          python -c "import unsloth_zoo.mlx_compile; print('mlx_compile OK')"
+          python -c "import unsloth_zoo.mlx_utils; print('mlx_utils OK')"
+          python -c "import unsloth_zoo.mlx_trainer; print('mlx_trainer OK')"
+          python -c "import unsloth_zoo.mlx_cce; print('mlx_cce OK')"
+
+      - name: tests/test_mlx_torch_shim_smoke.py
+        # Exercises the MLX-on-torch shim end-to-end against the real mlx runtime
+        # on Apple Silicon; on Linux runners it would run against tests/mlx_simulation/ stubs.
+        run: python -m pytest tests/test_mlx_torch_shim_smoke.py -v
diff --git a/.github/workflows/security-audit.yml b/.github/workflows/security-audit.yml
new file mode 100644
index 000000000..28a73eed0
--- /dev/null
+++ b/.github/workflows/security-audit.yml
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Pure-Python supply-chain audit for unsloth_zoo. Mirrors unslothai/unsloth's
+# security-audit.yml with npm/Cargo/Studio jobs stripped (zoo is pure Python).
+# Jobs: advisory-audit (pip-audit + trufflehog), pip-scan-packages (transitive
+# closure pattern scan), workflow-trigger-lint, tests-security (HARD GATE).
+
+name: Security audit
+
+on:
+  pull_request:
+    paths:
+      - 'pyproject.toml'
+      - 'scripts/scan_packages.py'
+      - 'scripts/lint_workflow_triggers.py'
+      - 'tests/security/**'
+      - '.github/workflows/security-audit.yml'
+  push:
+    branches: [main]
+  schedule:
+    - cron: '13 4 * * *'   # 04:13 UTC daily, off the cron rush
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  # Advisory-DB audit: pip-audit + trufflehog. Non-blocking while baseline settles.
+  advisory-audit:
+    name: advisory audit (pip + secrets)
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - name: Harden runner (egress block)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: block
+          disable-sudo: true
+          allowed-endpoints: >
+            api.github.com:443
+            github.com:443
+            codeload.github.com:443
+            objects.githubusercontent.com:443
+            pypi.org:443
+            files.pythonhosted.org:443
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          fetch-depth: 0   # trufflehog needs full history for diff scans
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+
+      - name: Install pip-audit
+        run: python -m pip install --upgrade pip pip-audit
+
+      - name: Build filtered requirements set
+        # Reads pyproject.toml deps + extras into a flat requirements file.
+        # git+ specs are skipped (advisory-DB can't resolve them).
+        run: |
+          mkdir -p audit-reqs
+          python <<'PY' > audit-reqs/zoo-deps.txt
+          import tomllib
+          with open("pyproject.toml", "rb") as f:
+              d = tomllib.load(f)
+          core = d["project"]["dependencies"]
+          all_extras = []
+          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
+              # Skip self-referential extras like "huggingface = ['unsloth_zoo[core]']".
+              all_extras += [s for s in specs if "unsloth_zoo" not in s]
+          print("# Auto-generated from pyproject.toml by security-audit.yml.")
+          for spec in core + all_extras:
+              if "git+" in spec:
+                  print(f"# [security-audit] skipped git+ spec: {spec}")
+                  continue
+              print(spec)
+          PY
+
+      - name: pip-audit (advisory DB lookup)
+        continue-on-error: true
+        run: pip-audit --requirement audit-reqs/zoo-deps.txt --disable-pip --strict || true
+
+      - name: Trufflehog secret scan
+        continue-on-error: true
+        uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26  # v3.95.2
+        with:
+          base: ${{ github.event.repository.default_branch }}
+          head: HEAD
+          extra_args: --only-verified
+
+  # pip-scan-packages: downloads every PyPI archive in zoo's transitive closure and
+  # pattern-scans (catches the malicious-upload class that precedes CVE publication).
+  pip-scan-packages:
+    name: pip scan-packages (zoo transitive closure)
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Harden runner (egress block)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: block
+          disable-sudo: true
+          allowed-endpoints: >
+            api.github.com:443
+            github.com:443
+            codeload.github.com:443
+            objects.githubusercontent.com:443
+            pypi.org:443
+            files.pythonhosted.org:443
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install scan_packages.py runtime deps
+        # requests + packaging for PyPI's JSON API. Scanned packages are
+        # downloaded raw and inspected, never `pip install`-ed.
+        run: python -m pip install --upgrade pip requests packaging
+
+      - name: Build filtered requirements set
+        run: |
+          mkdir -p audit-reqs
+          python <<'PY' > audit-reqs/zoo-deps.txt
+          import tomllib
+          with open("pyproject.toml", "rb") as f:
+              d = tomllib.load(f)
+          core = d["project"]["dependencies"]
+          all_extras = []
+          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
+              all_extras += [s for s in specs if "unsloth_zoo" not in s]
+          print("# Auto-generated from pyproject.toml by security-audit.yml.")
+          for spec in core + all_extras:
+              if "git+" in spec:
+                  print(f"# [security-audit] skipped git+ spec: {spec}")
+                  continue
+              print(spec)
+          PY
+
+      - name: scan-packages (with deps)
+        continue-on-error: true
+        # --with-deps makes scan transitive. Archives are downloaded and
+        # pattern-scanned WITHOUT installing -- malicious wheels cannot execute.
+        run: python3 scripts/scan_packages.py --requirements audit-reqs/zoo-deps.txt --with-deps
+
+  # workflow-trigger-lint: refuses pull_request_target with PR-head checkout,
+  # restricted workflow_run without justification, and cache-key collisions.
+  workflow-trigger-lint:
+    name: workflow-trigger lint (pull_request_target / cache-poisoning)
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Harden runner (egress block)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: block
+          disable-sudo: true
+          allowed-endpoints: >
+            api.github.com:443
+            github.com:443
+            codeload.github.com:443
+            objects.githubusercontent.com:443
+            pypi.org:443
+            files.pythonhosted.org:443
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+
+      - name: Install PyYAML
+        run: pip install pyyaml==6.0.2
+
+      - name: Run workflow-trigger lint
+        run: python3 scripts/lint_workflow_triggers.py
+
+  # HARD GATE: regression tests for scanner + lint scripts. Drift in IOC tables
+  # or scanner exit semantics fails this PR at review time.
+  tests-security:
+    name: pytest tests/security
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Harden runner (egress block)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: block
+          disable-sudo: true
+          allowed-endpoints: >
+            api.github.com:443
+            github.com:443
+            codeload.github.com:443
+            objects.githubusercontent.com:443
+            pypi.org:443
+            files.pythonhosted.org:443
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+
+      - name: Install pytest + PyYAML
+        # PyYAML needed by scripts/lint_workflow_triggers.py, exercised via subprocess
+        # by tests/security/test_lint_workflow_triggers.py. (See unsloth PR #5397: without
+        # pyyaml the lint script exits 2.)
+        run: pip install pytest==9.0.3 pyyaml==6.0.2
+
+      - name: Run security regression tests
+        run: python3 -m pytest tests/security -v
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 000000000..1a4cf841d
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,37 @@
+name: 'Inactive Issue Pinger'
+
+on:
+  schedule:
+    - cron: '30 5 * * *' # Runs at 5:30 UTC every day
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+
+    steps:
+      - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f  # v10.2.0
+        with:
+          # The message to post on stale issues.
+          # This message will ping the issue author.
+          # Note: The stale bot action does not currently support a direct placeholder for the last commenter.
+          # As a workaround, this message encourages any participant to reply.
+          stale-issue-message: >
+            Is this issue still important to you?
+            Apologies in advance we might have missed this issue as well.
+            For faster response times, please post on our Reddit server - https://www.reddit.com/r/unsloth or our Discord - https://discord.com/invite/unsloth 
+
+          # The number of days of inactivity before an issue is considered stale.
+          days-before-issue-stale: 9999
+
+          # Set to -1 to never close stale issues.
+          days-before-issue-close: -1
+
+          # A label to apply to stale issues.
+          stale-issue-label: 'inactive'
+
+          # The number of operations to perform per run to avoid rate limiting.
+          operations-per-run: 500
+
+          enable-statistics: false
diff --git a/.github/workflows/studio-export-fix-ci.yml b/.github/workflows/studio-export-fix-ci.yml
new file mode 100644
index 000000000..699b78d16
--- /dev/null
+++ b/.github/workflows/studio-export-fix-ci.yml
@@ -0,0 +1,62 @@
+name: studio-export-fix-ci
+
+on:
+  push:
+    branches: [main, nightly]
+    paths:
+      - "unsloth_zoo/llama_cpp.py"
+      - "tests/test_quantize_gguf_q2_k_l.py"
+      - "tests/test_convert_hf_to_gguf_patcher.py"
+      - ".github/workflows/studio-export-fix-ci.yml"
+  pull_request:
+    paths:
+      - "unsloth_zoo/llama_cpp.py"
+      - "tests/test_quantize_gguf_q2_k_l.py"
+      - "tests/test_convert_hf_to_gguf_patcher.py"
+      - ".github/workflows/studio-export-fix-ci.yml"
+
+concurrency:
+  group: studio-export-fix-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  studio-export-fix:
+    name: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      # Cap matrix at 3 in flight so Windows stays under the repo-level
+      # 5-concurrent-Windows-runner limit when this job runs alongside others.
+      max-parallel: 3
+      matrix:
+        os: [ubuntu-latest, macos-14, windows-latest]
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 15
+    env:
+      # 5000/h vs 60/h on raw.githubusercontent.com for the live-upstream tests.
+      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      UNSLOTH_COMPILE_DISABLE: '1'
+      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install minimal test deps
+        run: |
+          python -m pip install --upgrade pip
+          # Pure-Python tests: monkeypatch subprocess + AST-parse upstream files.
+          # No torch / transformers needed. Keep slim so Windows cold start stays under a minute.
+          python -m pip install pytest psutil requests tqdm
+
+      - name: Run patcher + q2_k_l unit tests
+        shell: bash
+        run: |
+          pytest -v \
+            tests/test_quantize_gguf_q2_k_l.py \
+            tests/test_convert_hf_to_gguf_patcher.py
diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml
new file mode 100644
index 000000000..626e8dccb
--- /dev/null
+++ b/.github/workflows/wheel-smoke.yml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Build PyPI wheel + sdist, verify content sanity, import-smoke in a clean venv.
+# Adapted from unsloth's wheel-smoke.yml; zoo's content checks: package present,
+# no tests/ shipped, no stray .pyc, real version string, import smoke succeeds.
+
+name: Wheel CI
+
+on:
+  pull_request:
+    paths:
+      - 'pyproject.toml'
+      - 'unsloth_zoo/**'
+      - 'tests/**'
+      - '.github/workflows/wheel-smoke.yml'
+  push:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  wheel:
+    name: Wheel build + content sanity + import smoke
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - name: Harden runner (audit)
+        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+
+      - name: Build wheel + sdist
+        run: |
+          python -m pip install --upgrade pip build
+          rm -rf dist build ./*.egg-info
+          python -m build
+
+      - name: Wheel content sanity
+        run: |
+          python - <<'PY'
+          import zipfile, glob, sys, re
+          wheels = glob.glob("dist/unsloth_zoo-*.whl")
+          if not wheels:
+              print("FAIL: no wheel produced"); sys.exit(2)
+          w = wheels[0]
+          print(f"wheel: {w}")
+          # Version sanity: dynamic metadata pulls from unsloth_zoo.__init__.__version__.
+          m = re.match(r"dist/unsloth_zoo-([^-]+)-py3-none-any\.whl", w)
+          version = m.group(1) if m else None
+          print(f"wheel version: {version}")
+          with zipfile.ZipFile(w) as z:
+              n = z.namelist()
+              # Hard checks: must hold for any zoo release wheel.
+              hard_checks = {
+                "unsloth_zoo/__init__.py shipped":      any(s == "unsloth_zoo/__init__.py" for s in n),
+                "unsloth_zoo/rl_replacements.py shipped": any(s == "unsloth_zoo/rl_replacements.py" for s in n),
+                "unsloth_zoo/temporary_patches/__init__.py shipped": any(s == "unsloth_zoo/temporary_patches/__init__.py" for s in n),
+                "no .pyc files":                        not any(s.endswith(".pyc") for s in n),
+                "no .git tree":                         not any(s.startswith(".git/") for s in n),
+                "version is not 0.0.0":                 version is not None and version != "0.0.0",
+                "METADATA present":                     any(s.endswith(".dist-info/METADATA") for s in n),
+              }
+              # Soft checks (warn only). Zoo's pyproject doesn't exclude tests/scripts;
+              # tightening the packaging config is a separate follow-up.
+              soft_checks = {
+                "no tests/ shipped":                    not any(s.startswith("tests/") for s in n),
+                "no scripts/ shipped":                  not any(s.startswith("scripts/") for s in n),
+              }
+              print("Hard checks:")
+              for k, v in hard_checks.items():
+                  print(f"  [{'PASS' if v else 'FAIL'}] {k}")
+              print()
+              print("Soft checks (warnings):")
+              for k, v in soft_checks.items():
+                  status = "PASS" if v else "WARN"
+                  print(f"  [{status}] {k}")
+              # Exit non-zero ONLY if a hard check failed.
+              sys.exit(0 if all(hard_checks.values()) else 1)
+          PY
+
+      - name: Import smoke (clean venv)
+        # unsloth_zoo/__init__.py:128 raises ImportError when parent `unsloth` is
+        # absent (deliberate guardrail). A bare `import unsloth_zoo` in a wheel-only
+        # venv will fail by design, so the smoke pivots to reading the version
+        # string from dist-info METADATA via importlib.metadata.
+        run: |
+          python -m venv /tmp/v
+          /tmp/v/bin/pip install --upgrade pip
+          /tmp/v/bin/pip install dist/unsloth_zoo-*.whl
+          # Read version from dist-info METADATA via importlib.metadata.
+          WHEEL_VERSION=$(/tmp/v/bin/python -c "
+          from importlib.metadata import version
+          print(version('unsloth_zoo'))
+          ")
+          echo "installed unsloth_zoo version: $WHEEL_VERSION"
+          test -n "$WHEEL_VERSION" && test "$WHEEL_VERSION" != "0.0.0"
+
+      - name: Upload wheel on failure
+        if: failure()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: unsloth-zoo-wheel
+          path: dist/
+          retention-days: 7

From 5f1b3052df324f3b08b0bc519698e72b959a12ef Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 15:46:28 +0000
Subject: [PATCH 08/12] Guard Metal backward kernel against NaN-poisoned lse
 for PR #682

The Metal dlogits kernel uses fast::exp(capped - lse[row]). When
_poison_invalid_targets sets lse[row] to NaN for invalid labels,
fast:: math is not IEEE 754 strict (MSL spec 6.5.1) and may return
a finite value, leaving the forward loss NaN but the gradient
silently wrong. Add an isnan(lse[row]) guard that emits NaN via
0.0f/0.0f (Metal C++ rejects the literal 'nan' token).

Also tighten the invalid-labels gradient test: previously asserted
only that _stable_norm(grads) is NaN, which is trivially satisfied
because grad_weight is all-NaN whenever any invalid row exists.
Now explicitly checks finite grad_hidden on the valid row, NaN on
the invalid row, and zero on the ignore_index row.
---
 tests/test_mlx_runtime_cce_compile.py | 12 +++++++++++-
 unsloth_zoo/mlx/cce/runtime_cce.py    | 10 ++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py
index 72a13f12f..30c3cbf42 100644
--- a/tests/test_mlx_runtime_cce_compile.py
+++ b/tests/test_mlx_runtime_cce_compile.py
@@ -154,11 +154,21 @@ def loss_fn(h, w):
         return runtime_cce(h, w, targets).astype(mx.float32).sum()
 
     loss, grads = mx.value_and_grad(loss_fn, argnums=(0, 1))(hidden, weight)
+    grad_hidden, grad_weight = grads
     grad_norm = _stable_norm(grads)
-    mx.eval(loss, grad_norm)
+    mx.eval(loss, grad_norm, grad_hidden)
 
     assert math.isnan(loss.item())
     assert math.isnan(grad_norm.item())
+    # explicit: invalid-label row poisons its OWN grad_hidden row with NaN
+    # while valid and ignore_index rows must stay finite. Without this,
+    # an NaN leak from _poison_invalid_targets into valid rows' lse would
+    # be hidden by grad_weight's NaN contamination of grad_norm.
+    grad_hidden_rows = mx.sum(mx.abs(grad_hidden).astype(mx.float32), axis=1)
+    mx.eval(grad_hidden_rows)
+    assert math.isfinite(grad_hidden_rows[0].item()), "valid row must have finite grad_hidden"
+    assert math.isnan(grad_hidden_rows[1].item()), "invalid row must have NaN grad_hidden"
+    assert grad_hidden_rows[2].item() == pytest.approx(0.0), "ignore_index row must zero-grad"
 
 
 @pytest.mark.parametrize("bad_target", [-1, 32])
diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 194df35ea..f67be9982 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -418,6 +418,16 @@ def _build_dlogits_kernel() -> Callable:
                 continue;
             }
 
+            // invalid-label rows arrive with lse[row]=NaN from
+            // _poison_invalid_targets. fast::exp() is not IEEE-754 strict
+            // (MSL spec 6.5.1) so NaN propagation is not guaranteed; emit
+            // an explicit NaN gradient via 0/0 (Metal C++ rejects the
+            // literal token 'nan').
+            if (isnan(lse[row])) {
+                d_logits[elem] = 0.0f / 0.0f;
+                continue;
+            }
+
             int global_v = v_start + int(col);
             float raw = logits[elem];
             float capped = raw;

From 8e32c6c31628d82ab22c9aa85a45192afd893cce Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 17:37:09 +0000
Subject: [PATCH 09/12] Reject non-flat targets up front for PR #682

Rank-2 (n, 1) or scalar targets slipped past the length check and crashed
deep inside the kernels with backend-specific errors. Add a 1D guard at
the top of _runtime_cce_loss_and_aux so callers get a clear ValueError.
---
 tests/test_mlx_runtime_cce_compile.py | 21 +++++++++++++++++++++
 unsloth_zoo/mlx/cce/runtime_cce.py    |  7 +++++++
 2 files changed, 28 insertions(+)

diff --git a/tests/test_mlx_runtime_cce_compile.py b/tests/test_mlx_runtime_cce_compile.py
index 30c3cbf42..e7625f86c 100644
--- a/tests/test_mlx_runtime_cce_compile.py
+++ b/tests/test_mlx_runtime_cce_compile.py
@@ -57,6 +57,27 @@ def test_runtime_cce_zero_tokens_with_non_empty_targets_raises():
         runtime_cce(hidden, weight, targets)
 
 
+def test_runtime_cce_rejects_non_flat_targets():
+    # rank-2 (n, 1) targets would slip past the length check and crash inside
+    # the kernels; reject up front with a clear ValueError instead.
+    _skip_torch_shim()
+    from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
+
+    runtime_cce, _ = make_chunked_cross_entropy_loss(
+        ignore_index=-100,
+        chunk_size=16,
+    )
+    hidden = mx.zeros((4, 16), dtype=mx.float32)
+    weight = mx.zeros((32, 16), dtype=mx.float32)
+    targets_2d = mx.zeros((4, 1), dtype=mx.int32)
+    targets_scalar = mx.array(0, dtype=mx.int32)
+
+    with pytest.raises(ValueError, match="flat 1D vector"):
+        runtime_cce(hidden, weight, targets_2d)
+    with pytest.raises(ValueError, match="flat 1D vector"):
+        runtime_cce(hidden, weight, targets_scalar)
+
+
 def test_runtime_cce_zero_tokens_returns_empty_losses_and_zero_gradients():
     _skip_torch_shim()
     from unsloth_zoo.mlx.cce import make_chunked_cross_entropy_loss
diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index f67be9982..4fbca9013 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -495,6 +495,13 @@ def _forward_chunked_fused_finalize(
 
     n, _ = hidden_compute.shape
     vocab_size = weight_compute.shape[0]
+    # targets must be a flat 1D vector; rank-2 inputs like (n, 1) would slip
+    # past the length check and explode later inside kernels.
+    if len(targets.shape) != 1:
+        raise ValueError(
+            "MLX CCE: targets must be a flat 1D vector "
+            f"(hidden.shape={hidden_compute.shape}, targets.shape={targets.shape})."
+        )
     if n == 0:
         # surface upstream shape mismatch instead of silently dropping labels.
         if targets.shape[0] != 0:

From 69115d82092bfbb1b901deb32deb9357fbd04af4 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 18:16:37 +0000
Subject: [PATCH 10/12] Scrub .github/workflows for staging push (matches
 staging base)

---
 .github/workflows/consolidated-tests-ci.yml | 255 --------------------
 .github/workflows/lint-ci.yml               | 122 ----------
 .github/workflows/mlx-ci.yml                |  70 ------
 .github/workflows/security-audit.yml        | 226 -----------------
 .github/workflows/stale.yml                 |  37 ---
 .github/workflows/studio-export-fix-ci.yml  |  62 -----
 .github/workflows/wheel-smoke.yml           | 118 ---------
 7 files changed, 890 deletions(-)
 delete mode 100644 .github/workflows/consolidated-tests-ci.yml
 delete mode 100644 .github/workflows/lint-ci.yml
 delete mode 100644 .github/workflows/mlx-ci.yml
 delete mode 100644 .github/workflows/security-audit.yml
 delete mode 100644 .github/workflows/stale.yml
 delete mode 100644 .github/workflows/studio-export-fix-ci.yml
 delete mode 100644 .github/workflows/wheel-smoke.yml

diff --git a/.github/workflows/consolidated-tests-ci.yml b/.github/workflows/consolidated-tests-ci.yml
deleted file mode 100644
index 6ab589c20..000000000
--- a/.github/workflows/consolidated-tests-ci.yml
+++ /dev/null
@@ -1,255 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Python compatibility + repo test gate. Adapted from unsloth's consolidated-tests-ci.yml.
-# Jobs: python-version-collect (pytest --collect-only on 3.10-3.13), repo-tests-cpu
-# (tests/security HARD GATE + CPU-pure zoo tests), core-upstream-matrix (HF/TRL/peft
-# drift detector across 3 cells -- the high-value zoo coverage).
-
-name: Tests CI
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  # Python compatibility: pytest --collect-only per interpreter.
-  python-version-collect:
-    name: (Python ${{ matrix.python-version }})
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: ${{ matrix.python-version }}
-          cache: 'pip'
-
-      - name: Install CPU-only torch + zoo runtime deps
-        # CPU index avoids the multi-GB CUDA wheel set. `--no-deps unsloth`
-        # satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-        run: |
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0"
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          pip install pytest==9.0.3
-
-      - name: pytest --collect-only
-        continue-on-error: true
-        run: python -m pytest tests/ --collect-only -q
-
-  # CPU-only repo tests. HARD GATE on tests/security.
-  repo-tests-cpu:
-    name: Repo tests (CPU)
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install runtime + test deps
-        # --no-deps unsloth satisfies the find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-        run: |
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0"
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          pip install pytest==9.0.3 pyyaml==6.0.2
-
-      - name: pytest tests/security (HARD GATE)
-        run: python -m pytest tests/security -v
-
-      - name: pytest tests/test_pr_a_imports + zoo-specific CPU tests
-        # Run as SEPARATE pytest invocation: tests/security/conftest.py installs a
-        # session-scoped network_blocker autouse fixture that would otherwise block
-        # test_pypi_version_sync from reaching pypi.org.
-        continue-on-error: true
-        run: |
-          python -m pytest \
-            tests/test_pr_a_imports.py \
-            tests/test_rl_replacements_cpu.py \
-            tests/test_temporary_patches_imports.py \
-            tests/test_zoo_history_regressions.py \
-            tests/test_pypi_version_sync.py \
-            -v
-
-  # Core (HF/TRL/peft) drift matrix. Three cells: HF=4.57.6+TRL<1, HF=latest+TRL=latest,
-  # and pyproject defaults. fail-fast=false; drift in one cell shouldn't cancel others.
-  core-upstream-matrix:
-    name: "Core (${{ matrix.combo.label }})"
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        combo:
-          - id: t4576-trl0latest
-            label: "HF=4.57.6 + TRL<1"
-            transformers_spec: "transformers==4.57.6"
-            trl_spec: "trl>=0.18.2,<1.0.0"
-            peft_spec: "peft>=0.18,<0.20"
-          - id: tlatest5-trl1latest
-            label: "HF=latest + TRL=latest"
-            transformers_spec: "transformers>=5,<6"
-            trl_spec: "trl>=1,<2"
-            peft_spec: "peft"
-          - id: pyproject
-            label: "HF=default + TRL=default"
-            transformers_spec: "__from_pyproject__"
-            trl_spec: "__from_pyproject__"
-            peft_spec: "__from_pyproject__"
-    env:
-      MATRIX_TRANSFORMERS_SPEC: ${{ matrix.combo.transformers_spec }}
-      MATRIX_TRL_SPEC: ${{ matrix.combo.trl_spec }}
-      MATRIX_PEFT_SPEC: ${{ matrix.combo.peft_spec }}
-      MATRIX_COMBO_ID: ${{ matrix.combo.id }}
-      # Pure-Python protobuf parser; transformers' bundled *_pb2.py is rejected by C++ protobuf 4+/5+.
-      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
-      UNSLOTH_COMPILE_DISABLE: '1'
-      # Secondary handshake after find_spec("unsloth") guard at unsloth_zoo/__init__.py:128.
-      UNSLOTH_IS_PRESENT: '1'
-    steps:
-      - name: Harden runner (audit)
-        # audit (not block): matrix pulls arbitrary transformers/TRL/peft pins.
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Resolve matrix specs (handle __from_pyproject__ sentinel)
-        # Resolve transformers/trl/peft from pyproject.toml when the sentinel is used.
-        run: |
-          set -euxo pipefail
-          python <<'PY' >> "$GITHUB_ENV"
-          import os, re, tomllib
-          spec_t = os.environ["MATRIX_TRANSFORMERS_SPEC"]
-          spec_r = os.environ["MATRIX_TRL_SPEC"]
-          spec_p = os.environ["MATRIX_PEFT_SPEC"]
-
-          def _pkg_name(spec: str) -> str:
-              m = re.match(r"\s*([A-Za-z0-9_.-]+)", spec)
-              return (m.group(1).lower() if m else "")
-
-          if "__from_pyproject__" in (spec_t, spec_r, spec_p):
-              with open("pyproject.toml", "rb") as f:
-                  doc = tomllib.load(f)
-              proj = doc.get("project", {})
-              all_deps: list[str] = list(proj.get("dependencies", []))
-              for _name, dep_list in proj.get("optional-dependencies", {}).items():
-                  all_deps.extend(dep_list)
-
-              # Strip environment markers so the resolved spec is pip-installable.
-              def _strip_marker(s: str) -> str:
-                  return s.split(";", 1)[0].strip()
-
-              if spec_t == "__from_pyproject__":
-                  spec_t = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "transformers"),
-                                "transformers")
-              if spec_r == "__from_pyproject__":
-                  spec_r = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "trl"),
-                                "trl")
-              if spec_p == "__from_pyproject__":
-                  spec_p = next((_strip_marker(x) for x in all_deps if _pkg_name(x) == "peft"),
-                                "peft")
-          print(f"RESOLVED_TRANSFORMERS_SPEC={spec_t}")
-          print(f"RESOLVED_TRL_SPEC={spec_r}")
-          print(f"RESOLVED_PEFT_SPEC={spec_p}")
-          PY
-          grep RESOLVED_ "$GITHUB_ENV" || true
-
-      - name: Install torch CPU + zoo + matrix-specified upstream libs
-        # Two-phase: `-e .[core]` for pyproject defaults, then `-U <RESOLVED_*>` to override.
-        # The -U is critical so pip will downgrade transformers (e.g. cell-1 pin 4.57.6).
-        # --no-deps unsloth satisfies the find_spec guard at unsloth_zoo/__init__.py:128.
-        run: |
-          set -euxo pipefail
-          python -m pip install --upgrade pip
-          pip install --index-url https://download.pytorch.org/whl/cpu \
-            "torch>=2.4.0,<2.11.0" "torchvision<0.26"
-          # torchvision: transitive import of transformers.models.qwen2_vl
-          # / qwen2_5_vl image processors. The Qwen2_VL image-processor
-          # zoo references chains through `from torchvision...` at module
-          # top, so a missing torchvision turns the existence-probe drift
-          # tests RED on "ModuleNotFoundError: No module named 'torchvision'".
-          # CPU build is plenty; we don't need the CUDA variant.
-          pip install -e .[core]
-          pip install --no-deps "unsloth @ git+https://github.com/unslothai/unsloth@main" || true
-          # Override with matrix-resolved specs.
-          pip install -U "$RESOLVED_TRANSFORMERS_SPEC" "$RESOLVED_TRL_SPEC" "$RESOLVED_PEFT_SPEC"
-          # bitsandbytes: imported at module scope in saving_utils.py (_active_merge_device path).
-          pip install 'bitsandbytes>=0.45'
-          # IPython + ipywidgets: logging_utils.py:50 imports transformers.utils.notebook.
-          # Required so drift detector only fires on real drift, not missing CI deps.
-          pip install 'ipython>=8' 'ipywidgets>=8'
-          pip install pytest==9.0.3 packaging
-          echo "::group::Installed transformers + trl + peft + torch versions"
-          pip show transformers
-          pip show trl
-          pip show peft
-          pip show torch
-          echo "::endgroup::"
-
-      - name: pytest upstream-regression suite (94 pinned + 117 expanded)
-        # 626 drift-detector tests / cell across 12 files. HARD GATE: a red cell
-        # means real upstream drift (transformers/trl/peft/vllm/datasets renamed
-        # or removed a symbol zoo references). Zoo PRs #4 through #635 mined.
-        run: |
-          python -m pytest -v --tb=short -rs \
-            tests/test_upstream_pinned_symbols_transformers.py \
-            tests/test_upstream_pinned_symbols_trl_vllm.py \
-            tests/test_upstream_pinned_symbols_accelerator.py \
-            tests/test_zoo_history_regressions_deep.py \
-            tests/test_upstream_import_fixes_drift.py \
-            tests/test_zoo_source_upstream_refs.py \
-            tests/test_upstream_signatures.py \
-            tests/test_extended_dep_api_pins.py \
-            tests/test_upstream_source_patterns.py \
-            tests/test_compiler_rewriter_exhaustive.py \
-            tests/test_compiler_dynamic_exec.py \
-            tests/test_temporary_patches_exhaustive.py \
-            tests/test_unsloth_zoo_lora_merge.py \
-            tests/test_peft_paramwrapper_layout_drift.py \
-            tests/test_transformers_moe_structure_drift.py \
-            tests/test_moe_merge_e2e_cpu.py
diff --git a/.github/workflows/lint-ci.yml b/.github/workflows/lint-ci.yml
deleted file mode 100644
index 75446a499..000000000
--- a/.github/workflows/lint-ci.yml
+++ /dev/null
@@ -1,122 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Whole-repo Python source-lint gate. Adapted from unsloth's lint-ci.yml:
-# Python (compileall + narrow ruff) + YAML/JSON round-trip. Dropped vs unsloth:
-# shell lint (zoo has no committed *.sh), TypeScript/Rust (Studio/Tauri are unsloth-side).
-
-name: Lint CI
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  source-lint:
-    name: Source lint (Python + YAML + JSON)
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - run: pip install 'ruff==0.15.12' 'pyyaml>=6'
-
-      - name: Python AST/syntax check (every committed .py must compile)
-        # continue-on-error during CI bootstrap: pyproject.toml declares
-        # `requires-python = ">=3.9,<3.15"` but temporary_patches/gpt_oss.py
-        # uses a 3.10+ `match` statement. Tracked as a separate cleanup PR.
-        continue-on-error: true
-        run: |
-          python -m compileall -q -j 0 unsloth_zoo tests scripts
-
-      - name: Python ruff check (narrow gate)
-        # E9 / F63 / F7 / F82: syntax errors, broken comparisons, undefined names.
-        # continue-on-error during CI bootstrap: first run on main surfaced 13
-        # latent findings (rl_replacements.py L1128 F821, gpt_oss match-on-3.9).
-        continue-on-error: true
-        run: |
-          ruff check --select E9,F63,F7,F82 unsloth_zoo tests scripts
-
-      - name: No leftover debugger / pdb / breakpoint calls
-        # Catches `import pdb`, `pdb.set_trace()`, `breakpoint()`, `import ipdb`.
-        # continue-on-error during bootstrap: rl_replacements.py has a
-        # `#breakpoint()` comment the regex matches (# is [^A-Za-z_]).
-        continue-on-error: true
-        run: |
-          set -e
-          if grep -rnE '(^|[^A-Za-z_])(pdb\.set_trace|breakpoint)\(|^import (pdb|ipdb)$|^from (pdb|ipdb) import' \
-              --include='*.py' unsloth_zoo scripts; then
-            echo "::error::Leftover debugger call found above. Remove it." >&2
-            exit 1
-          fi
-
-      - name: YAML round-trip for every committed YAML
-        run: |
-          python <<'PY'
-          import pathlib, sys, yaml
-          fails = []
-          for p in pathlib.Path(".").rglob("*.yml"):
-              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
-                  continue
-              try:
-                  yaml.safe_load(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          for p in pathlib.Path(".").rglob("*.yaml"):
-              if any(part.startswith(".") and part not in (".github",) for part in p.parts):
-                  continue
-              try:
-                  yaml.safe_load(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          if fails:
-              for f in fails:
-                  print("::error::", f)
-              sys.exit(1)
-          print(f"YAML round-trip OK")
-          PY
-
-      - name: JSON round-trip for every committed JSON
-        run: |
-          python <<'PY'
-          import pathlib, json, sys
-          fails = []
-          for p in pathlib.Path(".").rglob("*.json"):
-              if any(part in (".git", "node_modules", "__pycache__", "build", "dist") for part in p.parts):
-                  continue
-              try:
-                  json.loads(p.read_text())
-              except Exception as exc:
-                  fails.append(f"{p}: {exc}")
-          if fails:
-              for f in fails:
-                  print("::error::", f)
-              sys.exit(1)
-          print("JSON round-trip OK")
-          PY
-
-      - name: enforce kwargs spacing
-        # Style rule mirrored from unsloth: kwargs use `name = value` not `name=value`.
-        continue-on-error: true
-        run: |
-          python3 scripts/enforce_kwargs_spacing.py unsloth_zoo
diff --git a/.github/workflows/mlx-ci.yml b/.github/workflows/mlx-ci.yml
deleted file mode 100644
index 3df8be9d9..000000000
--- a/.github/workflows/mlx-ci.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# MLX-specific CI on macOS arm64 (Apple Silicon) so mlx / mlx-lm / mlx-vlm wheels
-# resolve. Installs `unsloth_zoo[mlx]`, smoke-imports unsloth_zoo/mlx_*.py modules,
-# runs tests/test_mlx_torch_shim_smoke.py. Opt-in via `mlx` label to save macOS minutes.
-
-name: MLX CI on Mac M1
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, labeled]
-  workflow_dispatch:
-  schedule:
-    # Daily @ 04:23 UTC -- off the security-audit cron rush at 04:13.
-    - cron: '23 4 * * *'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  mlx-smoke:
-    name: MLX install + import smoke (Apple Silicon)
-    # Opt-in: schedule / workflow_dispatch always run; PR runs only with `mlx` label.
-    if: >-
-      github.event_name == 'schedule' ||
-      github.event_name == 'workflow_dispatch' ||
-      contains(github.event.pull_request.labels.*.name, 'mlx')
-    runs-on: macos-14   # Apple Silicon (M1) hosted runner
-    timeout-minutes: 30
-    steps:
-      # harden-runner block-mode is Linux-only; stay in audit on macOS for parity.
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install zoo with MLX extras
-        # pyproject gates MLX deps on darwin+arm64; `.[mlx]` picks them up
-        # without the torch-on-Linux-CUDA path.
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e .[mlx]
-          pip install pytest==9.0.3
-
-      - name: MLX module import smoke
-        run: |
-          python -c "import unsloth_zoo.mlx_loader; print('mlx_loader OK')"
-          python -c "import unsloth_zoo.mlx_compile; print('mlx_compile OK')"
-          python -c "import unsloth_zoo.mlx_utils; print('mlx_utils OK')"
-          python -c "import unsloth_zoo.mlx_trainer; print('mlx_trainer OK')"
-          python -c "import unsloth_zoo.mlx_cce; print('mlx_cce OK')"
-
-      - name: tests/test_mlx_torch_shim_smoke.py
-        # Exercises the MLX-on-torch shim end-to-end against the real mlx runtime
-        # on Apple Silicon; on Linux runners it would run against tests/mlx_simulation/ stubs.
-        run: python -m pytest tests/test_mlx_torch_shim_smoke.py -v
diff --git a/.github/workflows/security-audit.yml b/.github/workflows/security-audit.yml
deleted file mode 100644
index 28a73eed0..000000000
--- a/.github/workflows/security-audit.yml
+++ /dev/null
@@ -1,226 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Pure-Python supply-chain audit for unsloth_zoo. Mirrors unslothai/unsloth's
-# security-audit.yml with npm/Cargo/Studio jobs stripped (zoo is pure Python).
-# Jobs: advisory-audit (pip-audit + trufflehog), pip-scan-packages (transitive
-# closure pattern scan), workflow-trigger-lint, tests-security (HARD GATE).
-
-name: Security audit
-
-on:
-  pull_request:
-    paths:
-      - 'pyproject.toml'
-      - 'scripts/scan_packages.py'
-      - 'scripts/lint_workflow_triggers.py'
-      - 'tests/security/**'
-      - '.github/workflows/security-audit.yml'
-  push:
-    branches: [main]
-  schedule:
-    - cron: '13 4 * * *'   # 04:13 UTC daily, off the cron rush
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  # Advisory-DB audit: pip-audit + trufflehog. Non-blocking while baseline settles.
-  advisory-audit:
-    name: advisory audit (pip + secrets)
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          fetch-depth: 0   # trufflehog needs full history for diff scans
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install pip-audit
-        run: python -m pip install --upgrade pip pip-audit
-
-      - name: Build filtered requirements set
-        # Reads pyproject.toml deps + extras into a flat requirements file.
-        # git+ specs are skipped (advisory-DB can't resolve them).
-        run: |
-          mkdir -p audit-reqs
-          python <<'PY' > audit-reqs/zoo-deps.txt
-          import tomllib
-          with open("pyproject.toml", "rb") as f:
-              d = tomllib.load(f)
-          core = d["project"]["dependencies"]
-          all_extras = []
-          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
-              # Skip self-referential extras like "huggingface = ['unsloth_zoo[core]']".
-              all_extras += [s for s in specs if "unsloth_zoo" not in s]
-          print("# Auto-generated from pyproject.toml by security-audit.yml.")
-          for spec in core + all_extras:
-              if "git+" in spec:
-                  print(f"# [security-audit] skipped git+ spec: {spec}")
-                  continue
-              print(spec)
-          PY
-
-      - name: pip-audit (advisory DB lookup)
-        continue-on-error: true
-        run: pip-audit --requirement audit-reqs/zoo-deps.txt --disable-pip --strict || true
-
-      - name: Trufflehog secret scan
-        continue-on-error: true
-        uses: trufflesecurity/trufflehog@17456f8c7d042d8c82c9a8ca9e937231f9f42e26  # v3.95.2
-        with:
-          base: ${{ github.event.repository.default_branch }}
-          head: HEAD
-          extra_args: --only-verified
-
-  # pip-scan-packages: downloads every PyPI archive in zoo's transitive closure and
-  # pattern-scans (catches the malicious-upload class that precedes CVE publication).
-  pip-scan-packages:
-    name: pip scan-packages (zoo transitive closure)
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-          cache: 'pip'
-
-      - name: Install scan_packages.py runtime deps
-        # requests + packaging for PyPI's JSON API. Scanned packages are
-        # downloaded raw and inspected, never `pip install`-ed.
-        run: python -m pip install --upgrade pip requests packaging
-
-      - name: Build filtered requirements set
-        run: |
-          mkdir -p audit-reqs
-          python <<'PY' > audit-reqs/zoo-deps.txt
-          import tomllib
-          with open("pyproject.toml", "rb") as f:
-              d = tomllib.load(f)
-          core = d["project"]["dependencies"]
-          all_extras = []
-          for extra_name, specs in d["project"].get("optional-dependencies", {}).items():
-              all_extras += [s for s in specs if "unsloth_zoo" not in s]
-          print("# Auto-generated from pyproject.toml by security-audit.yml.")
-          for spec in core + all_extras:
-              if "git+" in spec:
-                  print(f"# [security-audit] skipped git+ spec: {spec}")
-                  continue
-              print(spec)
-          PY
-
-      - name: scan-packages (with deps)
-        continue-on-error: true
-        # --with-deps makes scan transitive. Archives are downloaded and
-        # pattern-scanned WITHOUT installing -- malicious wheels cannot execute.
-        run: python3 scripts/scan_packages.py --requirements audit-reqs/zoo-deps.txt --with-deps
-
-  # workflow-trigger-lint: refuses pull_request_target with PR-head checkout,
-  # restricted workflow_run without justification, and cache-key collisions.
-  workflow-trigger-lint:
-    name: workflow-trigger lint (pull_request_target / cache-poisoning)
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install PyYAML
-        run: pip install pyyaml==6.0.2
-
-      - name: Run workflow-trigger lint
-        run: python3 scripts/lint_workflow_triggers.py
-
-  # HARD GATE: regression tests for scanner + lint scripts. Drift in IOC tables
-  # or scanner exit semantics fails this PR at review time.
-  tests-security:
-    name: pytest tests/security
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    steps:
-      - name: Harden runner (egress block)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: block
-          disable-sudo: true
-          allowed-endpoints: >
-            api.github.com:443
-            github.com:443
-            codeload.github.com:443
-            objects.githubusercontent.com:443
-            pypi.org:443
-            files.pythonhosted.org:443
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Install pytest + PyYAML
-        # PyYAML needed by scripts/lint_workflow_triggers.py, exercised via subprocess
-        # by tests/security/test_lint_workflow_triggers.py. (See unsloth PR #5397: without
-        # pyyaml the lint script exits 2.)
-        run: pip install pytest==9.0.3 pyyaml==6.0.2
-
-      - name: Run security regression tests
-        run: python3 -m pytest tests/security -v
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index 1a4cf841d..000000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: 'Inactive Issue Pinger'
-
-on:
-  schedule:
-    - cron: '30 5 * * *' # Runs at 5:30 UTC every day
-
-jobs:
-  stale:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-
-    steps:
-      - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f  # v10.2.0
-        with:
-          # The message to post on stale issues.
-          # This message will ping the issue author.
-          # Note: The stale bot action does not currently support a direct placeholder for the last commenter.
-          # As a workaround, this message encourages any participant to reply.
-          stale-issue-message: >
-            Is this issue still important to you?
-            Apologies in advance we might have missed this issue as well.
-            For faster response times, please post on our Reddit server - https://www.reddit.com/r/unsloth or our Discord - https://discord.com/invite/unsloth 
-
-          # The number of days of inactivity before an issue is considered stale.
-          days-before-issue-stale: 9999
-
-          # Set to -1 to never close stale issues.
-          days-before-issue-close: -1
-
-          # A label to apply to stale issues.
-          stale-issue-label: 'inactive'
-
-          # The number of operations to perform per run to avoid rate limiting.
-          operations-per-run: 500
-
-          enable-statistics: false
diff --git a/.github/workflows/studio-export-fix-ci.yml b/.github/workflows/studio-export-fix-ci.yml
deleted file mode 100644
index 699b78d16..000000000
--- a/.github/workflows/studio-export-fix-ci.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: studio-export-fix-ci
-
-on:
-  push:
-    branches: [main, nightly]
-    paths:
-      - "unsloth_zoo/llama_cpp.py"
-      - "tests/test_quantize_gguf_q2_k_l.py"
-      - "tests/test_convert_hf_to_gguf_patcher.py"
-      - ".github/workflows/studio-export-fix-ci.yml"
-  pull_request:
-    paths:
-      - "unsloth_zoo/llama_cpp.py"
-      - "tests/test_quantize_gguf_q2_k_l.py"
-      - "tests/test_convert_hf_to_gguf_patcher.py"
-      - ".github/workflows/studio-export-fix-ci.yml"
-
-concurrency:
-  group: studio-export-fix-${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  studio-export-fix:
-    name: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      # Cap matrix at 3 in flight so Windows stays under the repo-level
-      # 5-concurrent-Windows-runner limit when this job runs alongside others.
-      max-parallel: 3
-      matrix:
-        os: [ubuntu-latest, macos-14, windows-latest]
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 15
-    env:
-      # 5000/h vs 60/h on raw.githubusercontent.com for the live-upstream tests.
-      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UNSLOTH_COMPILE_DISABLE: '1'
-      PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-          cache: pip
-
-      - name: Install minimal test deps
-        run: |
-          python -m pip install --upgrade pip
-          # Pure-Python tests: monkeypatch subprocess + AST-parse upstream files.
-          # No torch / transformers needed. Keep slim so Windows cold start stays under a minute.
-          python -m pip install pytest psutil requests tqdm
-
-      - name: Run patcher + q2_k_l unit tests
-        shell: bash
-        run: |
-          pytest -v \
-            tests/test_quantize_gguf_q2_k_l.py \
-            tests/test_convert_hf_to_gguf_patcher.py
diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml
deleted file mode 100644
index 626e8dccb..000000000
--- a/.github/workflows/wheel-smoke.yml
+++ /dev/null
@@ -1,118 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
-
-# Build PyPI wheel + sdist, verify content sanity, import-smoke in a clean venv.
-# Adapted from unsloth's wheel-smoke.yml; zoo's content checks: package present,
-# no tests/ shipped, no stray .pyc, real version string, import smoke succeeds.
-
-name: Wheel CI
-
-on:
-  pull_request:
-    paths:
-      - 'pyproject.toml'
-      - 'unsloth_zoo/**'
-      - 'tests/**'
-      - '.github/workflows/wheel-smoke.yml'
-  push:
-    branches: [main]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  wheel:
-    name: Wheel build + content sanity + import smoke
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - name: Harden runner (audit)
-        uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450  # v2.19.1
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          persist-credentials: false
-
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
-        with:
-          python-version: '3.12'
-
-      - name: Build wheel + sdist
-        run: |
-          python -m pip install --upgrade pip build
-          rm -rf dist build ./*.egg-info
-          python -m build
-
-      - name: Wheel content sanity
-        run: |
-          python - <<'PY'
-          import zipfile, glob, sys, re
-          wheels = glob.glob("dist/unsloth_zoo-*.whl")
-          if not wheels:
-              print("FAIL: no wheel produced"); sys.exit(2)
-          w = wheels[0]
-          print(f"wheel: {w}")
-          # Version sanity: dynamic metadata pulls from unsloth_zoo.__init__.__version__.
-          m = re.match(r"dist/unsloth_zoo-([^-]+)-py3-none-any\.whl", w)
-          version = m.group(1) if m else None
-          print(f"wheel version: {version}")
-          with zipfile.ZipFile(w) as z:
-              n = z.namelist()
-              # Hard checks: must hold for any zoo release wheel.
-              hard_checks = {
-                "unsloth_zoo/__init__.py shipped":      any(s == "unsloth_zoo/__init__.py" for s in n),
-                "unsloth_zoo/rl_replacements.py shipped": any(s == "unsloth_zoo/rl_replacements.py" for s in n),
-                "unsloth_zoo/temporary_patches/__init__.py shipped": any(s == "unsloth_zoo/temporary_patches/__init__.py" for s in n),
-                "no .pyc files":                        not any(s.endswith(".pyc") for s in n),
-                "no .git tree":                         not any(s.startswith(".git/") for s in n),
-                "version is not 0.0.0":                 version is not None and version != "0.0.0",
-                "METADATA present":                     any(s.endswith(".dist-info/METADATA") for s in n),
-              }
-              # Soft checks (warn only). Zoo's pyproject doesn't exclude tests/scripts;
-              # tightening the packaging config is a separate follow-up.
-              soft_checks = {
-                "no tests/ shipped":                    not any(s.startswith("tests/") for s in n),
-                "no scripts/ shipped":                  not any(s.startswith("scripts/") for s in n),
-              }
-              print("Hard checks:")
-              for k, v in hard_checks.items():
-                  print(f"  [{'PASS' if v else 'FAIL'}] {k}")
-              print()
-              print("Soft checks (warnings):")
-              for k, v in soft_checks.items():
-                  status = "PASS" if v else "WARN"
-                  print(f"  [{status}] {k}")
-              # Exit non-zero ONLY if a hard check failed.
-              sys.exit(0 if all(hard_checks.values()) else 1)
-          PY
-
-      - name: Import smoke (clean venv)
-        # unsloth_zoo/__init__.py:128 raises ImportError when parent `unsloth` is
-        # absent (deliberate guardrail). A bare `import unsloth_zoo` in a wheel-only
-        # venv will fail by design, so the smoke pivots to reading the version
-        # string from dist-info METADATA via importlib.metadata.
-        run: |
-          python -m venv /tmp/v
-          /tmp/v/bin/pip install --upgrade pip
-          /tmp/v/bin/pip install dist/unsloth_zoo-*.whl
-          # Read version from dist-info METADATA via importlib.metadata.
-          WHEEL_VERSION=$(/tmp/v/bin/python -c "
-          from importlib.metadata import version
-          print(version('unsloth_zoo'))
-          ")
-          echo "installed unsloth_zoo version: $WHEEL_VERSION"
-          test -n "$WHEEL_VERSION" && test "$WHEEL_VERSION" != "0.0.0"
-
-      - name: Upload wheel on failure
-        if: failure()
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
-        with:
-          name: unsloth-zoo-wheel
-          path: dist/
-          retention-days: 7

From 0c4aef94dcf680d447cd4798b26eeed26a0bb32a Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 18:38:38 +0000
Subject: [PATCH 11/12] Document NaN-lse precondition for MLX CCE fallback
 dlogits

_fallback_dlogits relies on lse arriving pre-poisoned with NaN for
invalid-label rows (targets outside [0, vocab_size) and not equal to
ignore_index). It does not re-check vocab bounds; NaN propagation
through exp(capped - NaN) = NaN is what produces NaN gradients for
those rows. Add a comment so future callers cannot accidentally pass
unpoisoned lse and silently get finite garbage gradients.
---
 unsloth_zoo/mlx/cce/runtime_cce.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 4fbca9013..1be83c191 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -627,6 +627,10 @@ def _forward_chunked_fused_finalize(
     raise RuntimeError("Unreachable: fused finalize path did not return outputs.")
 
 
+# why: lse must arrive pre-poisoned with NaN for invalid-label rows
+# (targets outside [0, vocab_size) and not equal to ignore_index).
+# This function does not re-check vocab bounds; NaN propagation through
+# exp(capped - NaN) = NaN is what produces NaN gradients for those rows.
 def _fallback_dlogits(
     logits: mx.array,
     lse: mx.array,

From 2b3a2cf356afe319b4dab39df77c7f1640a8f3c9 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Sun, 24 May 2026 19:34:17 +0000
Subject: [PATCH 12/12] Document poison precondition for MLX CCE
 forward-finalize kernel

The Metal forward-finalize kernel writes finite lse_out and loss_out
for every row, including out-of-vocab targets it cannot classify
because vocab_size is not a kernel input. The dlogits backward kernel
relies on the Python caller having applied _poison_invalid_targets to
both lse and loss before backward is invoked. Make that invariant
explicit at the kernel definition site so future fused or multi-GPU
paths that pass raw lse_out to the dlogits kernel will not silently
produce finite wrong gradients for invalid labels.
---
 unsloth_zoo/mlx/cce/runtime_cce.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/unsloth_zoo/mlx/cce/runtime_cce.py b/unsloth_zoo/mlx/cce/runtime_cce.py
index 1be83c191..5759aeecc 100644
--- a/unsloth_zoo/mlx/cce/runtime_cce.py
+++ b/unsloth_zoo/mlx/cce/runtime_cce.py
@@ -271,6 +271,13 @@ def _build_forward_update_kernel() -> Callable:
     )
 
 
+# INVARIANT: this kernel emits finite lse_out and loss_out for every row,
+# including out-of-vocab targets (target < 0 or target >= vocab_size that
+# are not ignore_index). The kernel does not see vocab_size and cannot
+# classify invalid rows. Callers MUST apply _poison_invalid_targets(loss,
+# invalid) and _poison_invalid_targets(lse, invalid) immediately after
+# this kernel returns, before passing lse to the dlogits backward kernel.
+# Skipping the poison step produces finite wrong gradients silently.
 def _build_forward_update_finalize_kernel() -> Callable:
     source = """
         uint gid = thread_position_in_grid.x;