diff --git a/.github/workflows/studio-backend-ci.yml b/.github/workflows/studio-backend-ci.yml new file mode 100644 index 0000000000..5a858888e7 --- /dev/null +++ b/.github/workflows/studio-backend-ci.yml @@ -0,0 +1,200 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. + +# Runs the existing studio/backend/tests/ suite (~860 tests, all CPU-friendly) +# on every PR that touches the backend or unsloth library. Until this lands, +# none of those tests run automatically. Verified locally on Python 3.13 with +# the surgical exclusions below: 861 pass, 4 skipped. +# +# Exclusions: +# - tests/test_studio_api.py: end-to-end against a live model + GGUF download, +# too heavy for free runners. Run separately when GPU CI is available. +# - -k 'not llama_cpp_load_progress_live': spawns a real llama.cpp process, +# not appropriate for CPU-only runners. +# +# ruff is non-blocking initially; remove `|| true` once the backend lints clean. + +name: Backend CI + +on: + pull_request: + paths: + - 'studio/**' + - 'unsloth/**' + - 'unsloth_cli/**' + - 'tests/**' + - 'pyproject.toml' + - '.github/workflows/studio-backend-ci.yml' + push: + branches: [main, pip] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + pytest: + name: (Python ${{ matrix.python }}) + runs-on: ubuntu-latest + timeout-minutes: 15 + strategy: + fail-fast: false + matrix: + python: ['3.10', '3.11', '3.12', '3.13'] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '${{ matrix.python }}' + cache: 'pip' + + - name: Install backend test dependencies (CPU only) + run: | + python -m pip install --upgrade pip + # Studio's declared backend deps: + pip install -r studio/backend/requirements/studio.txt + # Extras that studio.txt does not list but the import chain needs + # (python-multipart for FastAPI form/file uploads, sqlalchemy/cryptography + # for the auth DB, yaml/jinja2 for utils.models.model_config, etc.): + pip install \ + python-multipart aiofiles sqlalchemy cryptography \ + pyyaml jinja2 mammoth unpdf requests \ + 'numpy<3' pytest pytest-asyncio httpx + # Torch CPU + transformers are required by a chunk of the backend test + # suite (gpu_selection, kv_cache_estimation, utils). CPU-only torch + # keeps the install ~250 MB / ~1 min on a clean runner. + pip install --index-url https://download.pytorch.org/whl/cpu 'torch>=2.4,<2.11' + pip install 'transformers>=4.51,<5.5' + + - name: Backend tests + working-directory: studio/backend + # Locally validated against this dep set: 831 passed, 5 skipped, 35 deselected. + # Deselections (all environment-specific, would never pass on a GPU-less + # `ubuntu-latest` runner regardless of code correctness): + # - llama_cpp_load_progress_live: spawns a real llama.cpp process + # - TestGpuAutoSelection / TestPreSpawnGpuResolution / TestPerGpuFitGuardAllCounts: + # require live transformers config introspection on real GPUs + # - TestTransformersIntrospection: same + # - test_returns_cuda_when_cuda_available / test_calls_cuda_cache_when_cuda: + # assume CUDA-capable GPU + run: | + python -m pytest tests/ -q --tb=short \ + --ignore=tests/test_studio_api.py \ + -k 'not llama_cpp_load_progress_live and not TestGpuAutoSelection and not TestPreSpawnGpuResolution and not TestPerGpuFitGuardAllCounts and not TestTransformersIntrospection and not test_returns_cuda_when_cuda_available and not test_calls_cuda_cache_when_cuda' + + repo-cpu-tests: + # Auto-discover everything under tests/ that is not GPU-bound by + # design. New tests added in covered directories are picked up + # without a workflow edit. Locally validated: 779 passed, 11 + # skipped, 23 deselected. tests/conftest.py (mirroring unsloth-zoo + # PR #624) pre-loads unsloth_zoo.device_type and unsloth.device_type + # under a mocked torch.cuda.is_available so the unsloth import + # chain succeeds on CPU. + name: Repo tests (CPU) + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + + - name: Install deps (shared shape with backend pytest job) + run: | + python -m pip install --upgrade pip + pip install -r studio/backend/requirements/studio.txt + pip install \ + python-multipart aiofiles sqlalchemy cryptography \ + pyyaml jinja2 mammoth unpdf requests typer \ + 'numpy<3' pytest pytest-asyncio httpx + # torchvision is needed because unsloth_zoo.vision_utils imports + # it at module scope and is reached via unsloth.models._utils. + pip install --index-url https://download.pytorch.org/whl/cpu \ + 'torch>=2.4,<2.11' 'torchvision<0.26' + pip install 'transformers>=4.51,<5.5' + # bitsandbytes is a hard import in unsloth/models/_utils.py. + # Recent versions ship a CPU build so it installs on a free + # Linux runner; the kernels still raise on use, but import + # succeeds and the package collects. + pip install 'bitsandbytes>=0.45' + # unsloth.device_type imports unsloth_zoo.utils.Version at module + # scope, so the conftest harness needs unsloth_zoo on the path + # even though it is an optional dep of unsloth. + pip install 'unsloth_zoo>=2026.5.1' + pip install -e . --no-deps + + - name: Repo tests (CPU, auto-discovered) + env: + # tests/python/* import install_python_stack from studio/. + PYTHONPATH: ${{ github.workspace }}/studio + # Skip lazy compilation work the unsloth import chain wants to + # do at import time on a real GPU. + UNSLOTH_COMPILE_DISABLE: '1' + # --ignore: GPU-bound directories (qlora and saving need real + # weights / GPU; tests/sh is a shell suite the next step + # handles; tests/utils is a helpers folder, not tests). + # State-sensitive hardware-spoofing files are pulled out and run + # in isolation in the next step because they mutate + # hardware.py module globals (IS_ROCM / DEVICE) and pollute + # downstream tests. + # -m: honour markers already declared in tests/python/conftest.py + # (`server` = needs studio venv, `e2e` = needs network). + # --deselect: two registry tests that hit huggingface_hub for + # live model existence checks; they belong on a network job. + run: | + python -m pytest tests/ -q --tb=short \ + --ignore=tests/qlora \ + --ignore=tests/saving \ + --ignore=tests/utils \ + --ignore=tests/sh \ + --ignore=tests/studio/test_hardware_dispatch_matrix.py \ + --ignore=tests/studio/test_is_mlx_dispatch_gate.py \ + -m 'not server and not e2e' \ + --deselect tests/test_model_registry.py::test_model_registration \ + --deselect tests/test_model_registry.py::test_all_model_registration + + - name: Hardware-spoof tests (state-sensitive, run in isolation) + env: + PYTHONPATH: ${{ github.workspace }}/studio + UNSLOTH_COMPILE_DISABLE: '1' + # These two files mutate hardware.py module globals at runtime + # via the spoof fixtures, which leaks state into any other test + # that imports hardware. Run them in their own pytest invocation + # so the leak does not cross file boundaries. + run: | + python -m pytest -q --tb=short \ + tests/studio/test_hardware_dispatch_matrix.py \ + tests/studio/test_is_mlx_dispatch_gate.py + + - name: Shell installer tests + # Subset that does not depend on a writable / pristine install.sh + # tree; test_install_host_defaults.sh checks install.ps1 layout + # which has drifted (separate followup). + run: | + set -e + for s in \ + tests/sh/test_get_torch_index_url.sh \ + tests/sh/test_mac_intel_compat.sh \ + tests/sh/test_tauri_install_exit_order.sh \ + tests/sh/test_torch_constraint.sh; do + echo "::group::$s" + bash "$s" + echo "::endgroup::" + done + + ruff: + name: Backend ruff lint (non-blocking) + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + - run: pip install ruff + - name: ruff check (non-blocking until accumulated drift is cleared) + run: ruff check studio/backend || true diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml new file mode 100644 index 0000000000..039bd5dd08 --- /dev/null +++ b/.github/workflows/studio-frontend-ci.yml @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. + +# Frontend PR gate: lockfile freshness, typecheck, build, and a bundle grep +# that catches the 2026.5.1 chat-history regression at the JS level. +# +# biome runs as non-blocking for now: the codebase currently has accumulated +# ~470 errors and ~1650 warnings against the existing biome config. Surfacing +# the count in CI lets us drive it down without forcing a fleet-wide cleanup +# in the same PR. Drop `continue-on-error` once that number is zero. + +name: Frontend CI + +on: + pull_request: + paths: + - 'studio/frontend/**' + - '.github/workflows/studio-frontend-ci.yml' + push: + branches: [main, pip] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + name: Frontend build + bundle sanity + runs-on: ubuntu-latest + timeout-minutes: 10 + defaults: + run: + working-directory: studio/frontend + steps: + - uses: actions/checkout@v4 + + # FIXME: drop this step once @assistant-ui/* and assistant-stream + # leave 0.x -- on 1.x, caret ranges are conventional. Until then, + # every 0.minor on this surface is a SemVer-major (this is exactly + # how 2026.5.1 shipped a broken chat runtime: ^0.12.19 quietly + # resolved to 0.12.28). + - name: '@assistant-ui must be pinned exactly (no caret/tilde)' + working-directory: ${{ github.workspace }} + run: | + set -e + if grep -nE '"(@assistant-ui/[a-z-]+|assistant-stream)":[[:space:]]*"[\^~]' studio/frontend/package.json; then + echo "::error file=studio/frontend/package.json::These packages must be pinned to exact versions until they leave 0.x. Drop the leading ^ or ~." + exit 1 + fi + echo "All assistant-ui packages are pinned exactly." + + - uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + cache-dependency-path: studio/frontend/package-lock.json + + - name: Lockfile must agree with package.json (npm ci is strict) + run: npm ci --no-fund --no-audit + + - name: npm ci must not have modified the working tree + working-directory: ${{ github.workspace }} + run: | + if ! git diff --quiet -- studio/frontend; then + echo "::error::npm ci modified files; commit the updated lockfile" + git status -- studio/frontend + exit 1 + fi + + - name: Typecheck + run: npm run typecheck + + - name: Build + run: npm run build + + - name: Built bundle must not contain Studio's unstable_Provider call site + run: | + set -e + JS=$(ls dist/assets/index-*.js | head -1) + HITS=$(grep -c 'unstable_Provider:' "$JS" || echo 0) + echo "main bundle: $JS" + echo "unstable_Provider: hits=$HITS (assistant-ui internals contribute up to 3)" + if [ "$HITS" -gt 3 ]; then + echo "::error file=studio/frontend/src/features/chat/runtime-provider.tsx::Studio bundle still passes unstable_Provider through useRemoteThreadListRuntime; this is the 2026.5.1 chat-history regression. Pass adapters directly into useLocalRuntime instead." + exit 1 + fi + + - name: Bundle size budget (75 MB) + run: | + SIZE=$(du -sb dist | cut -f1) + BUDGET=$((75 * 1024 * 1024)) + echo "dist size: $SIZE bytes ($((SIZE/1024/1024)) MB), budget: $BUDGET bytes (75 MB)" + if [ "$SIZE" -gt "$BUDGET" ]; then + echo "::error::studio/frontend/dist/ exceeded the 75 MB budget. Drop dead deps (e.g. the unused next dep) or split chunks." + exit 1 + fi + + - name: Biome (non-blocking until accumulated drift is cleared) + continue-on-error: true + run: npm run biome:check + + - name: Upload built dist on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: studio-frontend-dist + path: studio/frontend/dist + retention-days: 3 diff --git a/.github/workflows/studio-inference-smoke.yml b/.github/workflows/studio-inference-smoke.yml new file mode 100644 index 0000000000..8efe072d28 --- /dev/null +++ b/.github/workflows/studio-inference-smoke.yml @@ -0,0 +1,185 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. + +# End-to-end smoke: install Studio via install.sh --local --no-torch, download +# a tiny GGUF, boot Studio, log in, change password, load the model, send a +# chat completion, assert a non-empty response. Only workflow that tests "the +# app actually works". +# +# Model: Qwen3.5-2B UD-IQ3_XXS (~890 MiB) -- small enough that the cache miss +# is cheap and inference fits in the 25 min CPU-runner budget. GGUF is cached +# across runs via actions/cache. + +name: Studio GGUF CI + +on: + pull_request: + paths: + - 'studio/**' + - 'unsloth/**' + - 'unsloth_cli/**' + - 'install.sh' + - 'pyproject.toml' + - '.github/workflows/studio-inference-smoke.yml' + push: + branches: [main, pip] + # Manual trigger for pre-warming the GGUF cache on main, or re-running + # against an arbitrary branch without pushing a no-op commit. + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + GGUF_REPO: unsloth/Qwen3.5-2B-GGUF + GGUF_FILE: Qwen3.5-2B-UD-IQ3_XXS.gguf + STUDIO_PORT: '18888' + +jobs: + inference: + name: Studio boots, loads a GGUF, answers a chat completion + runs-on: ubuntu-latest + timeout-minutes: 25 + steps: + - uses: actions/checkout@v4 + + - name: Linux dependencies for llama.cpp prebuilt + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + libcurl4-openssl-dev libssl-dev jq + + - uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + cache-dependency-path: studio/frontend/package-lock.json + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + + - name: Cache GGUF model file + id: cache-gguf + uses: actions/cache@v4 + with: + path: gguf-cache + key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1 + + - name: Download GGUF if cache miss + if: steps.cache-gguf.outputs.cache-hit != 'true' + run: | + # huggingface-cli was deprecated in huggingface_hub 1.13; the new CLI is `hf`. + python -m pip install --upgrade huggingface_hub hf_transfer + mkdir -p gguf-cache + HF_HUB_ENABLE_HF_TRANSFER=1 \ + hf download "$GGUF_REPO" "$GGUF_FILE" --local-dir gguf-cache + + - name: Install Studio (--local, --no-torch keeps the install lean) + run: | + mkdir -p logs + set -o pipefail + bash install.sh --local --no-torch 2>&1 | tee logs/install.log + + - name: Assert llama.cpp prebuilt was installed (no source-build fallback) + # ubuntu-latest is CPU-only x86_64, so studio/setup.sh should route + # to ggml-org/llama.cpp and grab bin-ubuntu-x64.tar.gz. A source + # build here means the routing regressed. + run: | + if grep -q "falling back to source build" logs/install.log; then + echo "::error::llama.cpp prebuilt path failed on ubuntu-latest. studio/setup.sh routing regressed; CPU-only Linux x86_64 should hit ggml-org/llama.cpp's bin-ubuntu-x64.tar.gz." + grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60 + exit 1 + fi + if ! grep -qE "prebuilt installed and validated|prebuilt up to date and validated" logs/install.log; then + echo "::error::install.log does not contain the success marker for the llama.cpp prebuilt path. Did setup.sh skip the prebuilt install?" + grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60 + exit 1 + fi + echo "llama.cpp prebuilt path used successfully" + + - name: Reset auth + start Studio in the background + run: | + unsloth studio reset-password + mkdir -p logs + UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \ + > logs/studio.log 2>&1 & + echo "STUDIO_PID=$!" >> "$GITHUB_ENV" + + - name: Wait for /api/health + run: | + for i in $(seq 1 60); do + if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then + echo "ready after ${i}s" + cat /tmp/health.json + jq -e '.status == "healthy"' /tmp/health.json + exit 0 + fi + sleep 1 + done + echo "Studio did not become healthy in 60s" + tail -200 logs/studio.log + exit 1 + + - name: Login + change bootstrap password + run: | + PW=$(cat ~/.unsloth/studio/auth/.bootstrap_password) + NEW="CIPasswordSmoke12345!" + TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ + -H 'content-type: application/json' \ + -d "{\"username\":\"unsloth\",\"password\":\"$PW\"}" | jq -r .access_token) + curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \ + -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ + -d "{\"current_password\":\"$PW\",\"new_password\":\"$NEW\"}" > /dev/null + # Re-login to clear must_change_password flag. + NEW_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ + -H 'content-type: application/json' \ + -d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token) + echo "TOKEN=$NEW_TOKEN" >> "$GITHUB_ENV" + + - name: Load the GGUF into Studio + run: | + GGUF_PATH="$GITHUB_WORKSPACE/gguf-cache/${GGUF_FILE}" + ls -lh "$GGUF_PATH" + curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \ + -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ + --max-time 600 \ + -d "{\"model_path\":\"$GGUF_PATH\",\"is_lora\":false,\"max_seq_length\":2048}" \ + | jq '{status, display_name, is_gguf, context_length}' + + - name: Send a chat completion + assert non-empty response + run: | + RESP=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/chat/completions" \ + -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ + --max-time 900 \ + -d '{ + "messages":[{"role":"user","content":"Say hello in one short sentence."}], + "max_tokens":40, + "stream":false + }') + echo "raw response: $RESP" + CONTENT=$(echo "$RESP" | jq -r '.choices[0].message.content // empty') + echo "model response: $CONTENT" + if [ -z "$CONTENT" ]; then + echo "::error::Empty assistant response from Studio" + exit 1 + fi + + - name: Stop Studio + if: always() + run: | + kill "${STUDIO_PID}" || true + sleep 2 + ss -tln | grep ":${STUDIO_PORT}" || true + + - name: Upload Studio + install logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: studio-inference-log + path: | + logs/studio.log + logs/install.log + retention-days: 7 diff --git a/.github/workflows/studio-tauri-smoke.yml b/.github/workflows/studio-tauri-smoke.yml new file mode 100644 index 0000000000..fcc9c8d963 --- /dev/null +++ b/.github/workflows/studio-tauri-smoke.yml @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. + +# PR-time smoke for the Tauri desktop wrapper. Builds the frontend and the +# Tauri Linux debug binary, with no codesigning. Catches: +# - tauri.conf.json drift +# - src-tauri Cargo.toml or rust source breakage +# - Tauri CLI version drift (we pin 2.10.1, matching release-desktop.yml) +# - frontend output not picked up by Tauri's distDir +# +# Linux-only on a free `ubuntu-latest` runner. Mac and Windows desktop builds +# stay in release-desktop.yml (manual `workflow_dispatch`) because they need +# code-signing secrets and ~30 min of runner time each. + +name: Studio Tauri CI + +on: + pull_request: + paths: + - 'studio/frontend/**' + - 'studio/src-tauri/**' + - '.github/workflows/studio-tauri-smoke.yml' + push: + branches: [main, pip] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + linux-debug-build: + name: Tauri Linux debug build (no codesign) + runs-on: ubuntu-22.04 + timeout-minutes: 25 + steps: + - uses: actions/checkout@v4 + + - name: Linux native deps for Tauri / WebKit2GTK + run: | + sudo apt-get update + sudo apt-get install -y \ + libwebkit2gtk-4.1-dev libayatana-appindicator3-dev \ + librsvg2-dev libxdo-dev libssl-dev patchelf + + - uses: actions/setup-node@v4 + with: + node-version: '24' + cache: 'npm' + cache-dependency-path: studio/frontend/package-lock.json + + - uses: dtolnay/rust-toolchain@stable + + - uses: swatinem/rust-cache@v2 + with: + workspaces: studio/src-tauri -> target + + - name: Install pinned Tauri CLI (matches release-desktop.yml) + run: npm install --save-dev --prefix studio @tauri-apps/cli@2.10.1 + + - name: Verify pinned Tauri CLI version + run: | + out="$(npx --prefix studio tauri --version)" + echo "$out" + [ "$out" = "tauri-cli 2.10.1" ] || { echo "::error::expected tauri-cli 2.10.1, got $out"; exit 1; } + + - name: Frontend build (npm ci, vite) + working-directory: studio/frontend + run: | + npm ci --no-fund --no-audit + npm run build + test -f dist/index.html + + - name: Tauri debug build (Linux, no bundle, no codesign) + # `--debug` + `--no-bundle` keeps this lean: compiles the Rust crate, + # confirms the frontend dist is wired into Tauri, but skips the AppImage + # / .deb production. Code signing is irrelevant because we never produce + # a distributable artifact. + env: + TAURI_SIGNING_PRIVATE_KEY: '' + TAURI_SIGNING_PRIVATE_KEY_PASSWORD: '' + run: npx --prefix studio tauri build --debug --no-bundle + + - name: Inspect produced binary + run: | + BIN=$(find studio/src-tauri/target/debug -maxdepth 1 -type f -executable 2>/dev/null \ + | grep -Ev '\.(d|so|dylib|dll)$' \ + | grep -Ev '/(deps|build|examples)$' \ + | head -1) + echo "binary: $BIN" + if [ -z "$BIN" ]; then + echo "::error::Tauri debug binary not produced" + ls -la studio/src-tauri/target/debug/ || true + exit 1 + fi + file "$BIN" + du -h "$BIN" + + - uses: actions/upload-artifact@v4 + if: failure() + with: + name: tauri-debug-build + path: | + studio/src-tauri/target/debug + studio/frontend/dist + retention-days: 3 diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml new file mode 100644 index 0000000000..080a6bb261 --- /dev/null +++ b/.github/workflows/wheel-smoke.yml @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. + +# Builds the PyPI wheel from the PR branch, then verifies the built wheel +# actually contains what we expect to ship and does NOT contain the broken +# Studio bundle that 2026.5.1 published. This is the single workflow that +# would have blocked the 2026.5.1 release before twine upload. +# +# Verified locally end-to-end against this branch: +# - python -m build produces unsloth--py3-none-any.whl in 13s +# - wheel content sanity passes: +# lockfile shipped, frontend dist shipped, +# no node_modules in wheel, no bun.lock in wheel, +# main bundle has unstable_Provider hits=1 (assistant-ui internals only). +# - Studio backend imports cleanly from the installed wheel with the +# lightweight dep set below. + +name: Wheel CI + +on: + pull_request: + paths: + - 'pyproject.toml' + - 'studio/**' + - 'unsloth/**' + - 'unsloth_cli/**' + - '.github/workflows/wheel-smoke.yml' + push: + branches: [main, pip] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + wheel: + name: Wheel build + content sanity + import smoke + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + cache-dependency-path: studio/frontend/package-lock.json + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Build frontend + run: | + cd studio/frontend + npm ci --no-fund --no-audit + npm run build + + - name: Build wheel + sdist + run: | + python -m pip install --upgrade pip build + rm -rf dist build ./*.egg-info + python -m build + + - name: Wheel content sanity + run: | + python - <<'PY' + import zipfile, glob, sys + w = glob.glob("dist/unsloth-*.whl") + if not w: + print("FAIL: no wheel produced"); sys.exit(2) + w = w[0] + print(f"wheel: {w}") + with zipfile.ZipFile(w) as z: + n = z.namelist() + checks = { + "lockfile shipped": any(s.endswith("studio/frontend/package-lock.json") for s in n), + "frontend dist shipped": any(s.endswith("studio/frontend/dist/index.html") for s in n), + "no node_modules": not any("studio/frontend/node_modules/" in s for s in n), + "no bun.lock": not any(s.endswith("studio/frontend/bun.lock") for s in n), + } + js = [s for s in n + if "studio/frontend/dist/assets/" in s + and s.endswith(".js") + and "/index-" in s] + if not js: + print("FAIL: no main bundle index-*.js in wheel"); sys.exit(2) + data = z.read(js[0]).decode("utf-8", "replace") + hits = data.count("unstable_Provider:") + print(f"main bundle: {js[0]}") + print(f"unstable_Provider hits: {hits} (>=4 indicates 2026.5.1 regression)") + checks["bundle has no Studio unstable_Provider call site"] = (hits < 4) + + print() + for k, v in checks.items(): + print(f" [{'PASS' if v else 'FAIL'}] {k}") + sys.exit(0 if all(checks.values()) else 1) + PY + + - name: Studio backend import smoke + # Imports `studio.backend.main:app` from the freshly-installed wheel in + # a clean venv. This catches the class of bug that 2026.5.1 shipped with: + # frontend dist missing, package-lock.json missing, or the wheel's Python + # source tree broken in a way that surfaces only at app construction time. + run: | + python -m venv /tmp/v + /tmp/v/bin/pip install --upgrade pip + /tmp/v/bin/pip install -r studio/backend/requirements/studio.txt + /tmp/v/bin/pip install \ + python-multipart aiofiles sqlalchemy cryptography \ + pyyaml jinja2 mammoth unpdf requests \ + 'numpy<3' + /tmp/v/bin/pip install --no-deps dist/unsloth-*.whl + # Run from /tmp so Python imports the installed package, not the source tree. + cd /tmp + /tmp/v/bin/python -c "from studio.backend.main import app; print('Studio backend OK:', app.title)" + + - name: Upload wheel on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: unsloth-wheel + path: dist/ + retention-days: 7 diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py index 0f10847282..0611b63361 100755 --- a/unsloth/models/rl_replacements.py +++ b/unsloth/models/rl_replacements.py @@ -1045,6 +1045,7 @@ def _get_per_token_logps_and_entropies( kwargs.get("pixel_attention_mask", None), kwargs.get("image_sizes", None), ) + num_images = kwargs.get("num_images", None) # Transformers 5.x needs token_type_ids/mm_token_type_ids for some vision models token_type_ids = kwargs.get("token_type_ids", None) mm_token_type_ids = kwargs.get("mm_token_type_ids", None) @@ -1099,65 +1100,95 @@ def _get_per_token_logps_and_entropies( else: max_left_pad = 0 - # input_ids_chunks = torch.chunk(input_ids, chunks = B, dim = 0) - attention_mask_chunks = torch.chunk(attention_mask, chunks = B, dim = 0) - - def chunk_optional(tensor, chunks): - if tensor is None: - return [None] * chunks - return torch.chunk(tensor, chunks = chunks, dim = 0) + def slice_sample_axis(value, start, end): + if value is None: + return None + return value[start:end] import math total_samples = input_ids.shape[0] batch_size = math.ceil(total_samples / B) + if isinstance(num_images, torch.Tensor): + num_images = num_images.detach().cpu().reshape(-1).tolist() + if ( + image_grid_thw is not None + and pixel_values is not None + and num_images is not None + ): + rows_per_image = image_grid_thw.prod(dim = -1) + rows_per_sample = torch.split(rows_per_image, num_images) + rows_per_sample = torch.stack([s.sum() for s in rows_per_sample]) + cum_rows = torch.cat( + [ + torch.tensor([0], device = rows_per_sample.device), + rows_per_sample.cumsum(0), + ] + ) + cum_imgs = torch.tensor([0] + num_images).cumsum(0) + else: + cum_rows = None + cum_imgs = None input_ids_chunks = [] attention_mask_chunks = [] pixel_values_chunks = [] image_grid_thw_chunks = [] pixel_attention_mask_chunks = [] + image_sizes_chunks = [] + token_type_ids_chunks = [] + mm_token_type_ids_chunks = [] current_pixel_idx = 0 # TRL 0.23.0 batching logic for start in range(0, total_samples, batch_size): - end = start + batch_size + end = min(start + batch_size, total_samples) input_ids_chunks.append(input_ids[start:end]) attention_mask_chunks.append(attention_mask[start:end]) + image_sizes_chunks.append(slice_sample_axis(image_sizes, start, end)) + token_type_ids_chunks.append( + slice_sample_axis(token_type_ids, start, end) + ) + mm_token_type_ids_chunks.append( + slice_sample_axis(mm_token_type_ids, start, end) + ) if image_grid_thw is not None and pixel_values is not None: - grid_slice = image_grid_thw[start:end] + if num_images is None: + grid_slice = image_grid_thw[start:end] + batch_pixel_count = grid_slice.prod(dim = -1).sum().item() + start_pixel_idx = current_pixel_idx + end_pixel_idx = current_pixel_idx + batch_pixel_count + current_pixel_idx = end_pixel_idx + else: + start_pixel_idx = cum_rows[start].item() + end_pixel_idx = cum_rows[end].item() + img_start, img_end = cum_imgs[start], cum_imgs[end] + grid_slice = image_grid_thw[img_start:img_end] image_grid_thw_chunks.append(grid_slice) - batch_pixel_count = grid_slice.prod(dim = -1).sum().item() - - start_pixel_idx = current_pixel_idx - end_pixel_idx = current_pixel_idx + batch_pixel_count - pixel_values_chunks.append( pixel_values[start_pixel_idx:end_pixel_idx] ) if pixel_attention_mask is not None: - pixel_attention_mask_chunks.append( - pixel_attention_mask[start_pixel_idx:end_pixel_idx] - ) + if pixel_attention_mask.shape[0] == pixel_values.shape[0]: + pixel_attention_mask_chunks.append( + pixel_attention_mask[start_pixel_idx:end_pixel_idx] + ) + else: + pixel_attention_mask_chunks.append( + pixel_attention_mask[start:end] + ) else: pixel_attention_mask_chunks.append(None) - current_pixel_idx = end_pixel_idx - else: pixel_values_chunks.append(None) image_grid_thw_chunks.append(None) pixel_attention_mask_chunks.append(None) - if image_sizes is not None and not isinstance(image_sizes, torch.Tensor): - image_sizes_chunks = [[size] for size in image_sizes] - else: - image_sizes_chunks = chunk_optional(image_sizes, B) - temperature = self.temperature logit_softcapping = _unsloth_get_final_logit_softcapping(model.config) logit_scale_multiply = getattr(model.config, "logit_scale", 0) @@ -1167,10 +1198,6 @@ def chunk_optional(tensor, chunks): if logit_scale_divide is None: logit_scale_divide = 0 - # Transformers 5.x needs token_type_ids/mm_token_type_ids for some vision models - token_type_ids_chunks = chunk_optional(token_type_ids, B) - mm_token_type_ids_chunks = chunk_optional(mm_token_type_ids, B) - zipped_inputs = zip( input_ids_chunks, attention_mask_chunks, @@ -1375,6 +1402,7 @@ def compute_loss( inputs.get("pixel_attention_mask", None), inputs.get("image_sizes", None), ) + num_images = inputs.get("num_images", None) # Transformers 5.x needs token_type_ids/mm_token_type_ids for some vision models token_type_ids = inputs.get("token_type_ids", None) mm_token_type_ids = inputs.get("mm_token_type_ids", None) @@ -1504,6 +1532,9 @@ def compute_loss( input_ids = _input_ids, pixel_values = pixel_values, image_grid_thw = image_grid_thw, + pixel_attention_mask = pixel_attention_mask, + image_sizes = image_sizes, + num_images = num_images, logits_to_keep = logits_to_keep, completion_mask = completion_mask, advantages = advantages, @@ -1535,6 +1566,11 @@ def compute_loss( grpo_accumulated_loss( trainer = self, input_ids = _input_ids, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, + pixel_attention_mask = pixel_attention_mask, + image_sizes = image_sizes, + num_images = num_images, logits_to_keep = logits_to_keep, completion_mask = completion_mask, advantages = advantages,