diff --git a/.github/workflows/studio-backend-ci.yml b/.github/workflows/studio-backend-ci.yml
new file mode 100644
index 0000000000..5a858888e7
--- /dev/null
+++ b/.github/workflows/studio-backend-ci.yml
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Runs the existing studio/backend/tests/ suite (~860 tests, all CPU-friendly)
+# on every PR that touches the backend or unsloth library. Until this lands,
+# none of those tests run automatically. Verified locally on Python 3.13 with
+# the surgical exclusions below: 861 pass, 4 skipped.
+#
+# Exclusions:
+#   - tests/test_studio_api.py: end-to-end against a live model + GGUF download,
+#     too heavy for free runners. Run separately when GPU CI is available.
+#   - -k 'not llama_cpp_load_progress_live': spawns a real llama.cpp process,
+#     not appropriate for CPU-only runners.
+#
+# ruff is non-blocking initially; remove `|| true` once the backend lints clean.
+
+name: Backend CI
+
+on:
+  pull_request:
+    paths:
+      - 'studio/**'
+      - 'unsloth/**'
+      - 'unsloth_cli/**'
+      - 'tests/**'
+      - 'pyproject.toml'
+      - '.github/workflows/studio-backend-ci.yml'
+  push:
+    branches: [main, pip]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  pytest:
+    name: (Python ${{ matrix.python }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    strategy:
+      fail-fast: false
+      matrix:
+        python: ['3.10', '3.11', '3.12', '3.13']
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '${{ matrix.python }}'
+          cache: 'pip'
+
+      - name: Install backend test dependencies (CPU only)
+        run: |
+          python -m pip install --upgrade pip
+          # Studio's declared backend deps:
+          pip install -r studio/backend/requirements/studio.txt
+          # Extras that studio.txt does not list but the import chain needs
+          # (python-multipart for FastAPI form/file uploads, sqlalchemy/cryptography
+          #  for the auth DB, yaml/jinja2 for utils.models.model_config, etc.):
+          pip install \
+            python-multipart aiofiles sqlalchemy cryptography \
+            pyyaml jinja2 mammoth unpdf requests \
+            'numpy<3' pytest pytest-asyncio httpx
+          # Torch CPU + transformers are required by a chunk of the backend test
+          # suite (gpu_selection, kv_cache_estimation, utils). CPU-only torch
+          # keeps the install ~250 MB / ~1 min on a clean runner.
+          pip install --index-url https://download.pytorch.org/whl/cpu 'torch>=2.4,<2.11'
+          pip install 'transformers>=4.51,<5.5'
+
+      - name: Backend tests
+        working-directory: studio/backend
+        # Locally validated against this dep set: 831 passed, 5 skipped, 35 deselected.
+        # Deselections (all environment-specific, would never pass on a GPU-less
+        # `ubuntu-latest` runner regardless of code correctness):
+        #   - llama_cpp_load_progress_live: spawns a real llama.cpp process
+        #   - TestGpuAutoSelection / TestPreSpawnGpuResolution / TestPerGpuFitGuardAllCounts:
+        #       require live transformers config introspection on real GPUs
+        #   - TestTransformersIntrospection: same
+        #   - test_returns_cuda_when_cuda_available / test_calls_cuda_cache_when_cuda:
+        #       assume CUDA-capable GPU
+        run: |
+          python -m pytest tests/ -q --tb=short \
+            --ignore=tests/test_studio_api.py \
+            -k 'not llama_cpp_load_progress_live and not TestGpuAutoSelection and not TestPreSpawnGpuResolution and not TestPerGpuFitGuardAllCounts and not TestTransformersIntrospection and not test_returns_cuda_when_cuda_available and not test_calls_cuda_cache_when_cuda'
+
+  repo-cpu-tests:
+    # Auto-discover everything under tests/ that is not GPU-bound by
+    # design. New tests added in covered directories are picked up
+    # without a workflow edit. Locally validated: 779 passed, 11
+    # skipped, 23 deselected. tests/conftest.py (mirroring unsloth-zoo
+    # PR #624) pre-loads unsloth_zoo.device_type and unsloth.device_type
+    # under a mocked torch.cuda.is_available so the unsloth import
+    # chain succeeds on CPU.
+    name: Repo tests (CPU)
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install deps (shared shape with backend pytest job)
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r studio/backend/requirements/studio.txt
+          pip install \
+            python-multipart aiofiles sqlalchemy cryptography \
+            pyyaml jinja2 mammoth unpdf requests typer \
+            'numpy<3' pytest pytest-asyncio httpx
+          # torchvision is needed because unsloth_zoo.vision_utils imports
+          # it at module scope and is reached via unsloth.models._utils.
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            'torch>=2.4,<2.11' 'torchvision<0.26'
+          pip install 'transformers>=4.51,<5.5'
+          # bitsandbytes is a hard import in unsloth/models/_utils.py.
+          # Recent versions ship a CPU build so it installs on a free
+          # Linux runner; the kernels still raise on use, but import
+          # succeeds and the package collects.
+          pip install 'bitsandbytes>=0.45'
+          # unsloth.device_type imports unsloth_zoo.utils.Version at module
+          # scope, so the conftest harness needs unsloth_zoo on the path
+          # even though it is an optional dep of unsloth.
+          pip install 'unsloth_zoo>=2026.5.1'
+          pip install -e . --no-deps
+
+      - name: Repo tests (CPU, auto-discovered)
+        env:
+          # tests/python/* import install_python_stack from studio/.
+          PYTHONPATH: ${{ github.workspace }}/studio
+          # Skip lazy compilation work the unsloth import chain wants to
+          # do at import time on a real GPU.
+          UNSLOTH_COMPILE_DISABLE: '1'
+        # --ignore: GPU-bound directories (qlora and saving need real
+        #   weights / GPU; tests/sh is a shell suite the next step
+        #   handles; tests/utils is a helpers folder, not tests).
+        # State-sensitive hardware-spoofing files are pulled out and run
+        # in isolation in the next step because they mutate
+        # hardware.py module globals (IS_ROCM / DEVICE) and pollute
+        # downstream tests.
+        # -m: honour markers already declared in tests/python/conftest.py
+        #   (`server` = needs studio venv, `e2e` = needs network).
+        # --deselect: two registry tests that hit huggingface_hub for
+        #   live model existence checks; they belong on a network job.
+        run: |
+          python -m pytest tests/ -q --tb=short \
+            --ignore=tests/qlora \
+            --ignore=tests/saving \
+            --ignore=tests/utils \
+            --ignore=tests/sh \
+            --ignore=tests/studio/test_hardware_dispatch_matrix.py \
+            --ignore=tests/studio/test_is_mlx_dispatch_gate.py \
+            -m 'not server and not e2e' \
+            --deselect tests/test_model_registry.py::test_model_registration \
+            --deselect tests/test_model_registry.py::test_all_model_registration
+
+      - name: Hardware-spoof tests (state-sensitive, run in isolation)
+        env:
+          PYTHONPATH: ${{ github.workspace }}/studio
+          UNSLOTH_COMPILE_DISABLE: '1'
+        # These two files mutate hardware.py module globals at runtime
+        # via the spoof fixtures, which leaks state into any other test
+        # that imports hardware. Run them in their own pytest invocation
+        # so the leak does not cross file boundaries.
+        run: |
+          python -m pytest -q --tb=short \
+            tests/studio/test_hardware_dispatch_matrix.py \
+            tests/studio/test_is_mlx_dispatch_gate.py
+
+      - name: Shell installer tests
+        # Subset that does not depend on a writable / pristine install.sh
+        # tree; test_install_host_defaults.sh checks install.ps1 layout
+        # which has drifted (separate followup).
+        run: |
+          set -e
+          for s in \
+              tests/sh/test_get_torch_index_url.sh \
+              tests/sh/test_mac_intel_compat.sh \
+              tests/sh/test_tauri_install_exit_order.sh \
+              tests/sh/test_torch_constraint.sh; do
+              echo "::group::$s"
+              bash "$s"
+              echo "::endgroup::"
+          done
+
+  ruff:
+    name: Backend ruff lint (non-blocking)
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+      - run: pip install ruff
+      - name: ruff check (non-blocking until accumulated drift is cleared)
+        run: ruff check studio/backend || true
diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml
new file mode 100644
index 0000000000..039bd5dd08
--- /dev/null
+++ b/.github/workflows/studio-frontend-ci.yml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Frontend PR gate: lockfile freshness, typecheck, build, and a bundle grep
+# that catches the 2026.5.1 chat-history regression at the JS level.
+#
+# biome runs as non-blocking for now: the codebase currently has accumulated
+# ~470 errors and ~1650 warnings against the existing biome config. Surfacing
+# the count in CI lets us drive it down without forcing a fleet-wide cleanup
+# in the same PR. Drop `continue-on-error` once that number is zero.
+
+name: Frontend CI
+
+on:
+  pull_request:
+    paths:
+      - 'studio/frontend/**'
+      - '.github/workflows/studio-frontend-ci.yml'
+  push:
+    branches: [main, pip]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Frontend build + bundle sanity
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    defaults:
+      run:
+        working-directory: studio/frontend
+    steps:
+      - uses: actions/checkout@v4
+
+      # FIXME: drop this step once @assistant-ui/* and assistant-stream
+      # leave 0.x -- on 1.x, caret ranges are conventional. Until then,
+      # every 0.minor on this surface is a SemVer-major (this is exactly
+      # how 2026.5.1 shipped a broken chat runtime: ^0.12.19 quietly
+      # resolved to 0.12.28).
+      - name: '@assistant-ui must be pinned exactly (no caret/tilde)'
+        working-directory: ${{ github.workspace }}
+        run: |
+          set -e
+          if grep -nE '"(@assistant-ui/[a-z-]+|assistant-stream)":[[:space:]]*"[\^~]' studio/frontend/package.json; then
+            echo "::error file=studio/frontend/package.json::These packages must be pinned to exact versions until they leave 0.x. Drop the leading ^ or ~."
+            exit 1
+          fi
+          echo "All assistant-ui packages are pinned exactly."
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: studio/frontend/package-lock.json
+
+      - name: Lockfile must agree with package.json (npm ci is strict)
+        run: npm ci --no-fund --no-audit
+
+      - name: npm ci must not have modified the working tree
+        working-directory: ${{ github.workspace }}
+        run: |
+          if ! git diff --quiet -- studio/frontend; then
+            echo "::error::npm ci modified files; commit the updated lockfile"
+            git status -- studio/frontend
+            exit 1
+          fi
+
+      - name: Typecheck
+        run: npm run typecheck
+
+      - name: Build
+        run: npm run build
+
+      - name: Built bundle must not contain Studio's unstable_Provider call site
+        run: |
+          set -e
+          JS=$(ls dist/assets/index-*.js | head -1)
+          HITS=$(grep -c 'unstable_Provider:' "$JS" || echo 0)
+          echo "main bundle: $JS"
+          echo "unstable_Provider: hits=$HITS (assistant-ui internals contribute up to 3)"
+          if [ "$HITS" -gt 3 ]; then
+            echo "::error file=studio/frontend/src/features/chat/runtime-provider.tsx::Studio bundle still passes unstable_Provider through useRemoteThreadListRuntime; this is the 2026.5.1 chat-history regression. Pass adapters directly into useLocalRuntime instead."
+            exit 1
+          fi
+
+      - name: Bundle size budget (75 MB)
+        run: |
+          SIZE=$(du -sb dist | cut -f1)
+          BUDGET=$((75 * 1024 * 1024))
+          echo "dist size: $SIZE bytes ($((SIZE/1024/1024)) MB), budget: $BUDGET bytes (75 MB)"
+          if [ "$SIZE" -gt "$BUDGET" ]; then
+            echo "::error::studio/frontend/dist/ exceeded the 75 MB budget. Drop dead deps (e.g. the unused next dep) or split chunks."
+            exit 1
+          fi
+
+      - name: Biome (non-blocking until accumulated drift is cleared)
+        continue-on-error: true
+        run: npm run biome:check
+
+      - name: Upload built dist on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: studio-frontend-dist
+          path: studio/frontend/dist
+          retention-days: 3
diff --git a/.github/workflows/studio-inference-smoke.yml b/.github/workflows/studio-inference-smoke.yml
new file mode 100644
index 0000000000..8efe072d28
--- /dev/null
+++ b/.github/workflows/studio-inference-smoke.yml
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# End-to-end smoke: install Studio via install.sh --local --no-torch, download
+# a tiny GGUF, boot Studio, log in, change password, load the model, send a
+# chat completion, assert a non-empty response. Only workflow that tests "the
+# app actually works".
+#
+# Model: Qwen3.5-2B UD-IQ3_XXS (~890 MiB) -- small enough that the cache miss
+# is cheap and inference fits in the 25 min CPU-runner budget. GGUF is cached
+# across runs via actions/cache.
+
+name: Studio GGUF CI
+
+on:
+  pull_request:
+    paths:
+      - 'studio/**'
+      - 'unsloth/**'
+      - 'unsloth_cli/**'
+      - 'install.sh'
+      - 'pyproject.toml'
+      - '.github/workflows/studio-inference-smoke.yml'
+  push:
+    branches: [main, pip]
+  # Manual trigger for pre-warming the GGUF cache on main, or re-running
+  # against an arbitrary branch without pushing a no-op commit.
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  GGUF_REPO: unsloth/Qwen3.5-2B-GGUF
+  GGUF_FILE: Qwen3.5-2B-UD-IQ3_XXS.gguf
+  STUDIO_PORT: '18888'
+
+jobs:
+  inference:
+    name: Studio boots, loads a GGUF, answers a chat completion
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Linux dependencies for llama.cpp prebuilt
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends \
+            libcurl4-openssl-dev libssl-dev jq
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: studio/frontend/package-lock.json
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Cache GGUF model file
+        id: cache-gguf
+        uses: actions/cache@v4
+        with:
+          path: gguf-cache
+          key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1
+
+      - name: Download GGUF if cache miss
+        if: steps.cache-gguf.outputs.cache-hit != 'true'
+        run: |
+          # huggingface-cli was deprecated in huggingface_hub 1.13; the new CLI is `hf`.
+          python -m pip install --upgrade huggingface_hub hf_transfer
+          mkdir -p gguf-cache
+          HF_HUB_ENABLE_HF_TRANSFER=1 \
+            hf download "$GGUF_REPO" "$GGUF_FILE" --local-dir gguf-cache
+
+      - name: Install Studio (--local, --no-torch keeps the install lean)
+        run: |
+          mkdir -p logs
+          set -o pipefail
+          bash install.sh --local --no-torch 2>&1 | tee logs/install.log
+
+      - name: Assert llama.cpp prebuilt was installed (no source-build fallback)
+        # ubuntu-latest is CPU-only x86_64, so studio/setup.sh should route
+        # to ggml-org/llama.cpp and grab bin-ubuntu-x64.tar.gz. A source
+        # build here means the routing regressed.
+        run: |
+          if grep -q "falling back to source build" logs/install.log; then
+            echo "::error::llama.cpp prebuilt path failed on ubuntu-latest. studio/setup.sh routing regressed; CPU-only Linux x86_64 should hit ggml-org/llama.cpp's bin-ubuntu-x64.tar.gz."
+            grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60
+            exit 1
+          fi
+          if ! grep -qE "prebuilt installed and validated|prebuilt up to date and validated" logs/install.log; then
+            echo "::error::install.log does not contain the success marker for the llama.cpp prebuilt path. Did setup.sh skip the prebuilt install?"
+            grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60
+            exit 1
+          fi
+          echo "llama.cpp prebuilt path used successfully"
+
+      - name: Reset auth + start Studio in the background
+        run: |
+          unsloth studio reset-password
+          mkdir -p logs
+          UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \
+            > logs/studio.log 2>&1 &
+          echo "STUDIO_PID=$!" >> "$GITHUB_ENV"
+
+      - name: Wait for /api/health
+        run: |
+          for i in $(seq 1 60); do
+            if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then
+              echo "ready after ${i}s"
+              cat /tmp/health.json
+              jq -e '.status == "healthy"' /tmp/health.json
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Studio did not become healthy in 60s"
+          tail -200 logs/studio.log
+          exit 1
+
+      - name: Login + change bootstrap password
+        run: |
+          PW=$(cat ~/.unsloth/studio/auth/.bootstrap_password)
+          NEW="CIPasswordSmoke12345!"
+          TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
+            -H 'content-type: application/json' \
+            -d "{\"username\":\"unsloth\",\"password\":\"$PW\"}" | jq -r .access_token)
+          curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \
+            -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
+            -d "{\"current_password\":\"$PW\",\"new_password\":\"$NEW\"}" > /dev/null
+          # Re-login to clear must_change_password flag.
+          NEW_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \
+            -H 'content-type: application/json' \
+            -d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token)
+          echo "TOKEN=$NEW_TOKEN" >> "$GITHUB_ENV"
+
+      - name: Load the GGUF into Studio
+        run: |
+          GGUF_PATH="$GITHUB_WORKSPACE/gguf-cache/${GGUF_FILE}"
+          ls -lh "$GGUF_PATH"
+          curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \
+            -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
+            --max-time 600 \
+            -d "{\"model_path\":\"$GGUF_PATH\",\"is_lora\":false,\"max_seq_length\":2048}" \
+            | jq '{status, display_name, is_gguf, context_length}'
+
+      - name: Send a chat completion + assert non-empty response
+        run: |
+          RESP=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/chat/completions" \
+            -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \
+            --max-time 900 \
+            -d '{
+              "messages":[{"role":"user","content":"Say hello in one short sentence."}],
+              "max_tokens":40,
+              "stream":false
+            }')
+          echo "raw response: $RESP"
+          CONTENT=$(echo "$RESP" | jq -r '.choices[0].message.content // empty')
+          echo "model response: $CONTENT"
+          if [ -z "$CONTENT" ]; then
+            echo "::error::Empty assistant response from Studio"
+            exit 1
+          fi
+
+      - name: Stop Studio
+        if: always()
+        run: |
+          kill "${STUDIO_PID}" || true
+          sleep 2
+          ss -tln | grep ":${STUDIO_PORT}" || true
+
+      - name: Upload Studio + install logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: studio-inference-log
+          path: |
+            logs/studio.log
+            logs/install.log
+          retention-days: 7
diff --git a/.github/workflows/studio-tauri-smoke.yml b/.github/workflows/studio-tauri-smoke.yml
new file mode 100644
index 0000000000..fcc9c8d963
--- /dev/null
+++ b/.github/workflows/studio-tauri-smoke.yml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# PR-time smoke for the Tauri desktop wrapper. Builds the frontend and the
+# Tauri Linux debug binary, with no codesigning. Catches:
+#   - tauri.conf.json drift
+#   - src-tauri Cargo.toml or rust source breakage
+#   - Tauri CLI version drift (we pin 2.10.1, matching release-desktop.yml)
+#   - frontend output not picked up by Tauri's distDir
+#
+# Linux-only on a free `ubuntu-latest` runner. Mac and Windows desktop builds
+# stay in release-desktop.yml (manual `workflow_dispatch`) because they need
+# code-signing secrets and ~30 min of runner time each.
+
+name: Studio Tauri CI
+
+on:
+  pull_request:
+    paths:
+      - 'studio/frontend/**'
+      - 'studio/src-tauri/**'
+      - '.github/workflows/studio-tauri-smoke.yml'
+  push:
+    branches: [main, pip]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  linux-debug-build:
+    name: Tauri Linux debug build (no codesign)
+    runs-on: ubuntu-22.04
+    timeout-minutes: 25
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Linux native deps for Tauri / WebKit2GTK
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            libwebkit2gtk-4.1-dev libayatana-appindicator3-dev \
+            librsvg2-dev libxdo-dev libssl-dev patchelf
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '24'
+          cache: 'npm'
+          cache-dependency-path: studio/frontend/package-lock.json
+
+      - uses: dtolnay/rust-toolchain@stable
+
+      - uses: swatinem/rust-cache@v2
+        with:
+          workspaces: studio/src-tauri -> target
+
+      - name: Install pinned Tauri CLI (matches release-desktop.yml)
+        run: npm install --save-dev --prefix studio @tauri-apps/cli@2.10.1
+
+      - name: Verify pinned Tauri CLI version
+        run: |
+          out="$(npx --prefix studio tauri --version)"
+          echo "$out"
+          [ "$out" = "tauri-cli 2.10.1" ] || { echo "::error::expected tauri-cli 2.10.1, got $out"; exit 1; }
+
+      - name: Frontend build (npm ci, vite)
+        working-directory: studio/frontend
+        run: |
+          npm ci --no-fund --no-audit
+          npm run build
+          test -f dist/index.html
+
+      - name: Tauri debug build (Linux, no bundle, no codesign)
+        # `--debug` + `--no-bundle` keeps this lean: compiles the Rust crate,
+        # confirms the frontend dist is wired into Tauri, but skips the AppImage
+        # / .deb production. Code signing is irrelevant because we never produce
+        # a distributable artifact.
+        env:
+          TAURI_SIGNING_PRIVATE_KEY: ''
+          TAURI_SIGNING_PRIVATE_KEY_PASSWORD: ''
+        run: npx --prefix studio tauri build --debug --no-bundle
+
+      - name: Inspect produced binary
+        run: |
+          BIN=$(find studio/src-tauri/target/debug -maxdepth 1 -type f -executable 2>/dev/null \
+                | grep -Ev '\.(d|so|dylib|dll)$' \
+                | grep -Ev '/(deps|build|examples)$' \
+                | head -1)
+          echo "binary: $BIN"
+          if [ -z "$BIN" ]; then
+            echo "::error::Tauri debug binary not produced"
+            ls -la studio/src-tauri/target/debug/ || true
+            exit 1
+          fi
+          file "$BIN"
+          du -h "$BIN"
+
+      - uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: tauri-debug-build
+          path: |
+            studio/src-tauri/target/debug
+            studio/frontend/dist
+          retention-days: 3
diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml
new file mode 100644
index 0000000000..080a6bb261
--- /dev/null
+++ b/.github/workflows/wheel-smoke.yml
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Builds the PyPI wheel from the PR branch, then verifies the built wheel
+# actually contains what we expect to ship and does NOT contain the broken
+# Studio bundle that 2026.5.1 published. This is the single workflow that
+# would have blocked the 2026.5.1 release before twine upload.
+#
+# Verified locally end-to-end against this branch:
+#   - python -m build produces unsloth-<version>-py3-none-any.whl in 13s
+#   - wheel content sanity passes:
+#       lockfile shipped, frontend dist shipped,
+#       no node_modules in wheel, no bun.lock in wheel,
+#       main bundle has unstable_Provider hits=1 (assistant-ui internals only).
+#   - Studio backend imports cleanly from the installed wheel with the
+#     lightweight dep set below.
+
+name: Wheel CI
+
+on:
+  pull_request:
+    paths:
+      - 'pyproject.toml'
+      - 'studio/**'
+      - 'unsloth/**'
+      - 'unsloth_cli/**'
+      - '.github/workflows/wheel-smoke.yml'
+  push:
+    branches: [main, pip]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  wheel:
+    name: Wheel build + content sanity + import smoke
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: studio/frontend/package-lock.json
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Build frontend
+        run: |
+          cd studio/frontend
+          npm ci --no-fund --no-audit
+          npm run build
+
+      - name: Build wheel + sdist
+        run: |
+          python -m pip install --upgrade pip build
+          rm -rf dist build ./*.egg-info
+          python -m build
+
+      - name: Wheel content sanity
+        run: |
+          python - <<'PY'
+          import zipfile, glob, sys
+          w = glob.glob("dist/unsloth-*.whl")
+          if not w:
+              print("FAIL: no wheel produced"); sys.exit(2)
+          w = w[0]
+          print(f"wheel: {w}")
+          with zipfile.ZipFile(w) as z:
+              n = z.namelist()
+              checks = {
+                "lockfile shipped":      any(s.endswith("studio/frontend/package-lock.json") for s in n),
+                "frontend dist shipped": any(s.endswith("studio/frontend/dist/index.html")    for s in n),
+                "no node_modules":       not any("studio/frontend/node_modules/" in s for s in n),
+                "no bun.lock":           not any(s.endswith("studio/frontend/bun.lock")       for s in n),
+              }
+              js = [s for s in n
+                    if "studio/frontend/dist/assets/" in s
+                    and s.endswith(".js")
+                    and "/index-" in s]
+              if not js:
+                  print("FAIL: no main bundle index-*.js in wheel"); sys.exit(2)
+              data = z.read(js[0]).decode("utf-8", "replace")
+              hits = data.count("unstable_Provider:")
+              print(f"main bundle: {js[0]}")
+              print(f"unstable_Provider hits: {hits} (>=4 indicates 2026.5.1 regression)")
+              checks["bundle has no Studio unstable_Provider call site"] = (hits < 4)
+
+              print()
+              for k, v in checks.items():
+                  print(f"  [{'PASS' if v else 'FAIL'}] {k}")
+              sys.exit(0 if all(checks.values()) else 1)
+          PY
+
+      - name: Studio backend import smoke
+        # Imports `studio.backend.main:app` from the freshly-installed wheel in
+        # a clean venv. This catches the class of bug that 2026.5.1 shipped with:
+        # frontend dist missing, package-lock.json missing, or the wheel's Python
+        # source tree broken in a way that surfaces only at app construction time.
+        run: |
+          python -m venv /tmp/v
+          /tmp/v/bin/pip install --upgrade pip
+          /tmp/v/bin/pip install -r studio/backend/requirements/studio.txt
+          /tmp/v/bin/pip install \
+            python-multipart aiofiles sqlalchemy cryptography \
+            pyyaml jinja2 mammoth unpdf requests \
+            'numpy<3'
+          /tmp/v/bin/pip install --no-deps dist/unsloth-*.whl
+          # Run from /tmp so Python imports the installed package, not the source tree.
+          cd /tmp
+          /tmp/v/bin/python -c "from studio.backend.main import app; print('Studio backend OK:', app.title)"
+
+      - name: Upload wheel on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: unsloth-wheel
+          path: dist/
+          retention-days: 7
diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py
index 0f10847282..0611b63361 100755
--- a/unsloth/models/rl_replacements.py
+++ b/unsloth/models/rl_replacements.py
@@ -1045,6 +1045,7 @@ def _get_per_token_logps_and_entropies(
                 kwargs.get("pixel_attention_mask", None),
                 kwargs.get("image_sizes", None),
             )
+            num_images = kwargs.get("num_images", None)
             # Transformers 5.x needs token_type_ids/mm_token_type_ids for some vision models
             token_type_ids = kwargs.get("token_type_ids", None)
             mm_token_type_ids = kwargs.get("mm_token_type_ids", None)
@@ -1099,65 +1100,95 @@ def _get_per_token_logps_and_entropies(
             else:
                 max_left_pad = 0
 
-            # input_ids_chunks = torch.chunk(input_ids, chunks = B, dim = 0)
-            attention_mask_chunks = torch.chunk(attention_mask, chunks = B, dim = 0)
-
-            def chunk_optional(tensor, chunks):
-                if tensor is None:
-                    return [None] * chunks
-                return torch.chunk(tensor, chunks = chunks, dim = 0)
+            def slice_sample_axis(value, start, end):
+                if value is None:
+                    return None
+                return value[start:end]
 
             import math
 
             total_samples = input_ids.shape[0]
             batch_size = math.ceil(total_samples / B)
+            if isinstance(num_images, torch.Tensor):
+                num_images = num_images.detach().cpu().reshape(-1).tolist()
+            if (
+                image_grid_thw is not None
+                and pixel_values is not None
+                and num_images is not None
+            ):
+                rows_per_image = image_grid_thw.prod(dim = -1)
+                rows_per_sample = torch.split(rows_per_image, num_images)
+                rows_per_sample = torch.stack([s.sum() for s in rows_per_sample])
+                cum_rows = torch.cat(
+                    [
+                        torch.tensor([0], device = rows_per_sample.device),
+                        rows_per_sample.cumsum(0),
+                    ]
+                )
+                cum_imgs = torch.tensor([0] + num_images).cumsum(0)
+            else:
+                cum_rows = None
+                cum_imgs = None
 
             input_ids_chunks = []
             attention_mask_chunks = []
             pixel_values_chunks = []
             image_grid_thw_chunks = []
             pixel_attention_mask_chunks = []
+            image_sizes_chunks = []
+            token_type_ids_chunks = []
+            mm_token_type_ids_chunks = []
 
             current_pixel_idx = 0
             # TRL 0.23.0 batching logic
             for start in range(0, total_samples, batch_size):
-                end = start + batch_size
+                end = min(start + batch_size, total_samples)
 
                 input_ids_chunks.append(input_ids[start:end])
                 attention_mask_chunks.append(attention_mask[start:end])
+                image_sizes_chunks.append(slice_sample_axis(image_sizes, start, end))
+                token_type_ids_chunks.append(
+                    slice_sample_axis(token_type_ids, start, end)
+                )
+                mm_token_type_ids_chunks.append(
+                    slice_sample_axis(mm_token_type_ids, start, end)
+                )
 
                 if image_grid_thw is not None and pixel_values is not None:
-                    grid_slice = image_grid_thw[start:end]
+                    if num_images is None:
+                        grid_slice = image_grid_thw[start:end]
+                        batch_pixel_count = grid_slice.prod(dim = -1).sum().item()
+                        start_pixel_idx = current_pixel_idx
+                        end_pixel_idx = current_pixel_idx + batch_pixel_count
+                        current_pixel_idx = end_pixel_idx
+                    else:
+                        start_pixel_idx = cum_rows[start].item()
+                        end_pixel_idx = cum_rows[end].item()
+                        img_start, img_end = cum_imgs[start], cum_imgs[end]
+                        grid_slice = image_grid_thw[img_start:img_end]
                     image_grid_thw_chunks.append(grid_slice)
 
-                    batch_pixel_count = grid_slice.prod(dim = -1).sum().item()
-
-                    start_pixel_idx = current_pixel_idx
-                    end_pixel_idx = current_pixel_idx + batch_pixel_count
-
                     pixel_values_chunks.append(
                         pixel_values[start_pixel_idx:end_pixel_idx]
                     )
 
                     if pixel_attention_mask is not None:
-                        pixel_attention_mask_chunks.append(
-                            pixel_attention_mask[start_pixel_idx:end_pixel_idx]
-                        )
+                        if pixel_attention_mask.shape[0] == pixel_values.shape[0]:
+                            pixel_attention_mask_chunks.append(
+                                pixel_attention_mask[start_pixel_idx:end_pixel_idx]
+                            )
+                        else:
+                            pixel_attention_mask_chunks.append(
+                                pixel_attention_mask[start:end]
+                            )
                     else:
                         pixel_attention_mask_chunks.append(None)
 
-                    current_pixel_idx = end_pixel_idx
-
                 else:
                     pixel_values_chunks.append(None)
                     image_grid_thw_chunks.append(None)
                     pixel_attention_mask_chunks.append(None)
 
-            if image_sizes is not None and not isinstance(image_sizes, torch.Tensor):
-                image_sizes_chunks = [[size] for size in image_sizes]
-            else:
-                image_sizes_chunks = chunk_optional(image_sizes, B)
-
             temperature = self.temperature
             logit_softcapping = _unsloth_get_final_logit_softcapping(model.config)
             logit_scale_multiply = getattr(model.config, "logit_scale", 0)
@@ -1167,10 +1198,6 @@ def chunk_optional(tensor, chunks):
             if logit_scale_divide is None:
                 logit_scale_divide = 0
 
-            # Transformers 5.x needs token_type_ids/mm_token_type_ids for some vision models
-            token_type_ids_chunks = chunk_optional(token_type_ids, B)
-            mm_token_type_ids_chunks = chunk_optional(mm_token_type_ids, B)
-
             zipped_inputs = zip(
                 input_ids_chunks,
                 attention_mask_chunks,
@@ -1375,6 +1402,7 @@ def compute_loss(
             inputs.get("pixel_attention_mask", None),
             inputs.get("image_sizes", None),
         )
+        num_images = inputs.get("num_images", None)
         # Transformers 5.x needs token_type_ids/mm_token_type_ids for some vision models
         token_type_ids = inputs.get("token_type_ids", None)
         mm_token_type_ids = inputs.get("mm_token_type_ids", None)
@@ -1504,6 +1532,9 @@ def compute_loss(
                     input_ids = _input_ids,
                     pixel_values = pixel_values,
                     image_grid_thw = image_grid_thw,
+                    pixel_attention_mask = pixel_attention_mask,
+                    image_sizes = image_sizes,
+                    num_images = num_images,
                     logits_to_keep = logits_to_keep,
                     completion_mask = completion_mask,
                     advantages = advantages,
@@ -1535,6 +1566,11 @@ def compute_loss(
                     grpo_accumulated_loss(
                         trainer = self,
                         input_ids = _input_ids,
+                        pixel_values = pixel_values,
+                        image_grid_thw = image_grid_thw,
+                        pixel_attention_mask = pixel_attention_mask,
+                        image_sizes = image_sizes,
+                        num_images = num_images,
                         logits_to_keep = logits_to_keep,
                         completion_mask = completion_mask,
                         advantages = advantages,