danielhanchen · danielhanchen · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.github/workflows/studio-backend-ci.yml b/.github/workflows/studio-backend-ci.yml
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Runs the existing studio/backend/tests/ suite (~860 tests, all CPU-friendly)
+# on every PR that touches the backend or unsloth library. Until this lands,
+# none of those tests run automatically. Verified locally on Python 3.13 with
+# the surgical exclusions below: 861 pass, 4 skipped.
+#
+# Exclusions:
+#   - tests/test_studio_api.py: end-to-end against a live model + GGUF download,
+#     too heavy for free runners. Run separately when GPU CI is available.
+#   - -k 'not llama_cpp_load_progress_live': spawns a real llama.cpp process,
+#     not appropriate for CPU-only runners.
+#
+# ruff is non-blocking initially; remove `|| true` once the backend lints clean.
+
+name: Backend CI
+
+on:
+  pull_request:
+    paths:
+      - 'studio/**'
+      - 'unsloth/**'
+      - 'unsloth_cli/**'
+      - 'tests/**'
+      - 'pyproject.toml'
+      - '.github/workflows/studio-backend-ci.yml'
+  push:
+    branches: [main, pip]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  pytest:
+    name: (Python ${{ matrix.python }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    strategy:
+      fail-fast: false
+      matrix:
+        python: ['3.10', '3.11', '3.12', '3.13']
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '${{ matrix.python }}'
+          cache: 'pip'
+
+      - name: Install backend test dependencies (CPU only)
+        run: |
+          python -m pip install --upgrade pip
+          # Studio's declared backend deps:
+          pip install -r studio/backend/requirements/studio.txt
+          # Extras that studio.txt does not list but the import chain needs
+          # (python-multipart for FastAPI form/file uploads, sqlalchemy/cryptography
+          #  for the auth DB, yaml/jinja2 for utils.models.model_config, etc.):
+          pip install \
+            python-multipart aiofiles sqlalchemy cryptography \
+            pyyaml jinja2 mammoth unpdf requests \
+            'numpy<3' pytest pytest-asyncio httpx
+          # Torch CPU + transformers are required by a chunk of the backend test
+          # suite (gpu_selection, kv_cache_estimation, utils). CPU-only torch
+          # keeps the install ~250 MB / ~1 min on a clean runner.
+          pip install --index-url https://download.pytorch.org/whl/cpu 'torch>=2.4,<2.11'
+          pip install 'transformers>=4.51,<5.5'
+
+      - name: Backend tests
+        working-directory: studio/backend
+        # Locally validated against this dep set: 831 passed, 5 skipped, 35 deselected.
+        # Deselections (all environment-specific, would never pass on a GPU-less
+        # `ubuntu-latest` runner regardless of code correctness):
+        #   - llama_cpp_load_progress_live: spawns a real llama.cpp process
+        #   - TestGpuAutoSelection / TestPreSpawnGpuResolution / TestPerGpuFitGuardAllCounts:
+        #       require live transformers config introspection on real GPUs
+        #   - TestTransformersIntrospection: same
+        #   - test_returns_cuda_when_cuda_available / test_calls_cuda_cache_when_cuda:
+        #       assume CUDA-capable GPU
+        run: |
+          python -m pytest tests/ -q --tb=short \
+            --ignore=tests/test_studio_api.py \
+            -k 'not llama_cpp_load_progress_live and not TestGpuAutoSelection and not TestPreSpawnGpuResolution and not TestPerGpuFitGuardAllCounts and not TestTransformersIntrospection and not test_returns_cuda_when_cuda_available and not test_calls_cuda_cache_when_cuda'
+
+  repo-cpu-tests:
+    # Auto-discover everything under tests/ that is not GPU-bound by
+    # design. New tests added in covered directories are picked up
+    # without a workflow edit. Locally validated: 779 passed, 11
+    # skipped, 23 deselected. tests/conftest.py (mirroring unsloth-zoo
+    # PR #624) pre-loads unsloth_zoo.device_type and unsloth.device_type
+    # under a mocked torch.cuda.is_available so the unsloth import
+    # chain succeeds on CPU.
+    name: Repo tests (CPU)
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install deps (shared shape with backend pytest job)
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r studio/backend/requirements/studio.txt
+          pip install \
+            python-multipart aiofiles sqlalchemy cryptography \
+            pyyaml jinja2 mammoth unpdf requests typer \
+            'numpy<3' pytest pytest-asyncio httpx
+          # torchvision is needed because unsloth_zoo.vision_utils imports
+          # it at module scope and is reached via unsloth.models._utils.
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            'torch>=2.4,<2.11' 'torchvision<0.26'
+          pip install 'transformers>=4.51,<5.5'
+          # bitsandbytes is a hard import in unsloth/models/_utils.py.
+          # Recent versions ship a CPU build so it installs on a free
+          # Linux runner; the kernels still raise on use, but import
+          # succeeds and the package collects.
+          pip install 'bitsandbytes>=0.45'
+          # unsloth.device_type imports unsloth_zoo.utils.Version at module
+          # scope, so the conftest harness needs unsloth_zoo on the path
+          # even though it is an optional dep of unsloth.
+          pip install 'unsloth_zoo>=2026.5.1'
+          pip install -e . --no-deps
+
+      - name: Repo tests (CPU, auto-discovered)
+        env:
+          # tests/python/* import install_python_stack from studio/.
+          PYTHONPATH: ${{ github.workspace }}/studio
+          # Skip lazy compilation work the unsloth import chain wants to
+          # do at import time on a real GPU.
+          UNSLOTH_COMPILE_DISABLE: '1'
+        # --ignore: GPU-bound directories (qlora and saving need real
+        #   weights / GPU; tests/sh is a shell suite the next step
+        #   handles; tests/utils is a helpers folder, not tests).
+        # State-sensitive hardware-spoofing files are pulled out and run
+        # in isolation in the next step because they mutate
+        # hardware.py module globals (IS_ROCM / DEVICE) and pollute
+        # downstream tests.
+        # -m: honour markers already declared in tests/python/conftest.py
+        #   (`server` = needs studio venv, `e2e` = needs network).
+        # --deselect: two registry tests that hit huggingface_hub for
+        #   live model existence checks; they belong on a network job.
+        run: |
+          python -m pytest tests/ -q --tb=short \
+            --ignore=tests/qlora \
+            --ignore=tests/saving \
+            --ignore=tests/utils \
+            --ignore=tests/sh \
+            --ignore=tests/studio/test_hardware_dispatch_matrix.py \
+            --ignore=tests/studio/test_is_mlx_dispatch_gate.py \
+            -m 'not server and not e2e' \
+            --deselect tests/test_model_registry.py::test_model_registration \
+            --deselect tests/test_model_registry.py::test_all_model_registration
+
+      - name: Hardware-spoof tests (state-sensitive, run in isolation)
+        env:
+          PYTHONPATH: ${{ github.workspace }}/studio
+          UNSLOTH_COMPILE_DISABLE: '1'
+        # These two files mutate hardware.py module globals at runtime
+        # via the spoof fixtures, which leaks state into any other test
+        # that imports hardware. Run them in their own pytest invocation
+        # so the leak does not cross file boundaries.
+        run: |
+          python -m pytest -q --tb=short \
+            tests/studio/test_hardware_dispatch_matrix.py \
+            tests/studio/test_is_mlx_dispatch_gate.py
+
+      - name: Shell installer tests
+        # Subset that does not depend on a writable / pristine install.sh
+        # tree; test_install_host_defaults.sh checks install.ps1 layout
+        # which has drifted (separate followup).
+        run: |
+          set -e
+          for s in \
+              tests/sh/test_get_torch_index_url.sh \
+              tests/sh/test_mac_intel_compat.sh \
+              tests/sh/test_tauri_install_exit_order.sh \
+              tests/sh/test_torch_constraint.sh; do
+              echo "::group::$s"
+              bash "$s"
+              echo "::endgroup::"
+          done
+
+  ruff:
+    name: Backend ruff lint (non-blocking)
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+      - run: pip install ruff
+      - name: ruff check (non-blocking until accumulated drift is cleared)
+        run: ruff check studio/backend || true
diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
+
+# Frontend PR gate: lockfile freshness, typecheck, build, and a bundle grep
+# that catches the 2026.5.1 chat-history regression at the JS level.
+#
+# biome runs as non-blocking for now: the codebase currently has accumulated
+# ~470 errors and ~1650 warnings against the existing biome config. Surfacing
+# the count in CI lets us drive it down without forcing a fleet-wide cleanup
+# in the same PR. Drop `continue-on-error` once that number is zero.
+
+name: Frontend CI
+
+on:
+  pull_request:
+    paths:
+      - 'studio/frontend/**'
+      - '.github/workflows/studio-frontend-ci.yml'
+  push:
+    branches: [main, pip]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Frontend build + bundle sanity
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    defaults:
+      run:
+        working-directory: studio/frontend
+    steps:
+      - uses: actions/checkout@v4
+
+      # FIXME: drop this step once @assistant-ui/* and assistant-stream
+      # leave 0.x -- on 1.x, caret ranges are conventional. Until then,
+      # every 0.minor on this surface is a SemVer-major (this is exactly
+      # how 2026.5.1 shipped a broken chat runtime: ^0.12.19 quietly
+      # resolved to 0.12.28).
+      - name: '@assistant-ui must be pinned exactly (no caret/tilde)'
+        working-directory: ${{ github.workspace }}
+        run: |
+          set -e
+          if grep -nE '"(@assistant-ui/[a-z-]+|assistant-stream)":[[:space:]]*"[\^~]' studio/frontend/package.json; then
+            echo "::error file=studio/frontend/package.json::These packages must be pinned to exact versions until they leave 0.x. Drop the leading ^ or ~."
+            exit 1
+          fi
+          echo "All assistant-ui packages are pinned exactly."
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: studio/frontend/package-lock.json
+
+      - name: Lockfile must agree with package.json (npm ci is strict)
+        run: npm ci --no-fund --no-audit
+
+      - name: npm ci must not have modified the working tree
+        working-directory: ${{ github.workspace }}
+        run: |
+          if ! git diff --quiet -- studio/frontend; then
+            echo "::error::npm ci modified files; commit the updated lockfile"
+            git status -- studio/frontend
+            exit 1
+          fi
+
+      - name: Typecheck
+        run: npm run typecheck
+
+      - name: Build
+        run: npm run build
+
+      - name: Built bundle must not contain Studio's unstable_Provider call site
+        run: |
+          set -e
+          JS=$(ls dist/assets/index-*.js | head -1)
+          HITS=$(grep -c 'unstable_Provider:' "$JS" || echo 0)
+          echo "main bundle: $JS"
+          echo "unstable_Provider: hits=$HITS (assistant-ui internals contribute up to 3)"
+          if [ "$HITS" -gt 3 ]; then
+            echo "::error file=studio/frontend/src/features/chat/runtime-provider.tsx::Studio bundle still passes unstable_Provider through useRemoteThreadListRuntime; this is the 2026.5.1 chat-history regression. Pass adapters directly into useLocalRuntime instead."
+            exit 1
+          fi
+
+      - name: Bundle size budget (75 MB)
+        run: |
+          SIZE=$(du -sb dist | cut -f1)
+          BUDGET=$((75 * 1024 * 1024))
+          echo "dist size: $SIZE bytes ($((SIZE/1024/1024)) MB), budget: $BUDGET bytes (75 MB)"
+          if [ "$SIZE" -gt "$BUDGET" ]; then
+            echo "::error::studio/frontend/dist/ exceeded the 75 MB budget. Drop dead deps (e.g. the unused next dep) or split chunks."
+            exit 1
+          fi
+
+      - name: Biome (non-blocking until accumulated drift is cleared)
+        continue-on-error: true
+        run: npm run biome:check
+
+      - name: Upload built dist on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: studio-frontend-dist
+          path: studio/frontend/dist
+          retention-days: 3