diff --git a/.github/workflows/accelerator-local-llm-validate.yml b/.github/workflows/accelerator-local-llm-validate.yml new file mode 100644 index 0000000000..301e5c07e7 --- /dev/null +++ b/.github/workflows/accelerator-local-llm-validate.yml @@ -0,0 +1,81 @@ +# Accelerator — local-LLM entropy-lever validation (off-leash, accelerator branch). +# +# Proves the claim: a BARE runner + `install.sh` ⇒ working local-LLM substrate +# (operator 2026-05-30 "install.sh is our biggest lever against entropy"). Runs the +# real install graph, asserts the pinned model actually landed + serves, and runs +# a REAL (not mocked) selection through the local model. This is the gate that +# graduates the local-LLM core primitive from off-leash (accelerator) to main. +# +# Pushing this workflow / any local-LLM file to the accelerator branch triggers it. +# Heavy (full install + ~400MB model pull); concurrency cancels superseded runs. + +name: accelerator-local-llm-validate + +on: + workflow_dispatch: + push: + branches: [accelerator/pr-less-git-monster] + paths: + - "tools/setup/manifests/local-llm" + - "tools/setup/common/local-llm.sh" + - "tools/setup/linux.sh" + - "tools/setup/macos.sh" + - "tools/setup/manifests/brew" + - "tools/accelerator/local-llm.ts" + - "tools/accelerator/validate-local-llm.ts" + - ".github/workflows/accelerator-local-llm-validate.yml" + +concurrency: + group: accelerator-local-llm-validate-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + validate-linux: + runs-on: ubuntu-24.04 + timeout-minutes: 25 + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 + + - name: Run install.sh (the entropy lever — bare runner → substrate) + env: + # Authenticated mise (per the mise.sh fix) so the toolchain install + # doesn't hit the unauthenticated GitHub rate limit. + MISE_GITHUB_TOKEN: ${{ github.token }} + run: ./tools/setup/install.sh + + - name: Ensure ollama on PATH + daemon serving + run: | + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + export PATH="$HOME/.local/bin:$PATH" + command -v ollama + if ! curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; then + (ollama serve >/dev/null 2>&1 &) + for _ in $(seq 1 30); do + curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1 && break + sleep 1 + done + fi + curl -fsS http://127.0.0.1:11434/api/version + + - name: Assert the pinned model landed (declarative manifest) + run: | + export PATH="$HOME/.local/bin:$PATH" + MODEL=$(grep -E '^model' tools/setup/manifests/local-llm | awk '{print $2}') + echo "expected model: $MODEL" + ollama list + ollama list | awk 'NR>1 {print $1}' | grep -qx "$MODEL" + + - name: Mock-backed primitive tests (logic; run anywhere) + run: bun test tools/accelerator/local-llm.test.ts + + - name: REAL local-LLM validation (entropy-lever end-to-end) + run: | + export PATH="$HOME/.local/bin:$PATH" + bun tools/accelerator/validate-local-llm.ts --root "$PWD" diff --git a/.github/workflows/docker-nixos-install-sh-test.yml b/.github/workflows/docker-nixos-install-sh-test.yml index 2740b2adc9..6fd5e198ac 100644 --- a/.github/workflows/docker-nixos-install-sh-test.yml +++ b/.github/workflows/docker-nixos-install-sh-test.yml @@ -47,6 +47,12 @@ on: push: branches: - main + # Off-leash validation: install.sh changes are built on the accelerator + # branch (incl. the local-LLM step) BEFORE harvesting to main (the operator's + # off-leash-first model). This test validates install.sh, so it must + # re-run when install.sh changes there too — otherwise the primary OS is + # only re-validated at harvest time. + - accelerator/pr-less-git-monster paths: - 'tools/setup/**' - '.mise.toml' diff --git a/.github/workflows/docker-ubuntu-install-sh-test.yml b/.github/workflows/docker-ubuntu-install-sh-test.yml new file mode 100644 index 0000000000..a7b5bd33f4 --- /dev/null +++ b/.github/workflows/docker-ubuntu-install-sh-test.yml @@ -0,0 +1,73 @@ +# .github/workflows/docker-ubuntu-install-sh-test.yml +# +# Docker-based install.sh test on Ubuntu — sibling to docker-nixos-install-sh-test +# (operator 2026-05-30: "center our docker tests around ubuntu and nixos and have +# tests for both with install.sh"). The Dockerfile IS the test: it runs install.sh +# on a bare ubuntu image and validates the core local-LLM primitive (ollama + +# pinned model + real chooseIndex probe). A failing install.sh / assert fails the +# build, which fails this job. +# +# Off-leash on the accelerator branch (operator: "accelerator is for off-leash +# testing; once we get it right, main becomes off-leash too"). This is the gate +# that guards graduating the local-LLM install primitive to main. +# +# FIRST CUT uses a direct `docker build` (vs the nixos TS driver) for simplicity. +# FOLLOW-UP (the operator's GHA-cache point): consolidate both OS tests onto a shared TS +# driver + buildx `cache-from/to: type=gha` so the heavy install (1.2GB ollama + +# toolchain) bakes once and iteration runs inside the cached image. +# +# Security: no github.event.* values interpolated into run: lines. + +name: docker-ubuntu-install-sh-test + +on: + workflow_dispatch: + push: + branches: [accelerator/pr-less-git-monster] + paths: + - "tools/ci/dockerfiles/ubuntu-install-sh-test/**" + - "tools/setup/**" + - "tools/accelerator/local-llm.ts" + - "tools/accelerator/validate-local-llm.ts" + - ".mise.toml" + - ".dockerignore" + - ".github/workflows/docker-ubuntu-install-sh-test.yml" + # Run on PRs to main too — after harvest the install-graph lives on main, so a + # PR touching it must be Ubuntu-tested (mirrors docker-nixos-install-sh-test). + # The shield must cover main, not just the accelerator branch: a test that only + # fires off-leash is a hole that reads as covered. + pull_request: + types: [opened, reopened, synchronize, ready_for_review] + paths: + - "tools/ci/dockerfiles/ubuntu-install-sh-test/**" + - "tools/setup/**" + - "tools/accelerator/local-llm.ts" + - "tools/accelerator/validate-local-llm.ts" + - ".mise.toml" + - ".dockerignore" + - ".github/workflows/docker-ubuntu-install-sh-test.yml" + +concurrency: + group: docker-ubuntu-install-sh-test-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + docker-ubuntu-test: + name: docker-ubuntu-install-sh-test + runs-on: ubuntu-24.04 + # Cold build: full install.sh (mise toolchain + lean + jars) + ollama 1.2GB + + # 398MB model pull. Generous bound for the first uncached run. + timeout-minutes: 40 + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: docker build (the test — install.sh + local-LLM validation inside) + run: | + docker build \ + -f tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile \ + -t zeta-ubuntu-install-sh-test \ + . diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md index c25cd6a5da..fdafef4742 100644 --- a/docs/BACKLOG.md +++ b/docs/BACKLOG.md @@ -889,6 +889,8 @@ are closed (status: closed in frontmatter)._ - [ ] **[B-0934](backlog/P2/B-0934-backlog-index-integrity-required-or-advisory-decision-2026-05-29.md)** Decide whether backlog-index-integrity is required or explicitly advisory - [ ] **[B-0937](backlog/P2/B-0937-redundancy-checks-across-satellites-and-rules-mirror-beacon-rhyme-retirement-hub-over-budget-detector-2026-05-29.md)** Redundancy-checks across satellites + rules — duplicate-content audit, mirror→beacon rhyme-retirement, hub-over-budget detector - [ ] **[B-0939](backlog/P2/B-0939-self-propagating-markdown-compiler-rule-bootstrap-traveler-template-every-md-propagates-or-doesnt-compile-aaron-mika-2026-05-30.md)** Self-propagating-Markdown compiler-rule + bootstrap-traveler template (every .md is a self-propagating pattern through time or it doesn't compile) +- [ ] **[B-0940](backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md)** Evaluate what Ubuntu support brings us — NixOS is primary; Ubuntu's value is community/contributor reach +- [x] **[B-0941](backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md)** NixOS-native ollama for the local-LLM primitive — close the hole in the shield (NixOS test passes by SKIPPING, not validating) - [ ] **[B-0942](backlog/P2/B-0942-co-dominant-git-mirrors-git-native-crdt-coordination-no-host-needed-aaron-2026-05-30.md)** Co-dominant git mirrors + git-native CRDT coordination — no host needed for coordination (local + GitHub + GitLab/Gitea/Forgejo + free-OSS git as co-dominant collaborating mirrors) ## P3 — convenience / deferred diff --git a/docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md b/docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md new file mode 100644 index 0000000000..da61fe5289 --- /dev/null +++ b/docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md @@ -0,0 +1,88 @@ +--- +id: B-0940 +priority: P2 +status: open +title: Evaluate what Ubuntu support brings us — NixOS is primary; Ubuntu's value is community/contributor reach +tier: strategic-evaluation +ask: Aaron 2026-05-30 +created: 2026-05-30 +last_updated: 2026-05-30 +decomposition: leaf +composes_with: + - tools/setup/install.sh + - .github/workflows/docker-nixos-install-sh-test.yml + - .claude/rules/dv2-data-split-discipline-activated.md +tags: [install-sh, nixos, ubuntu, ci, docker, three-way-parity, strategic] +type: evaluation +--- + +# B-0940 — Evaluate what Ubuntu support brings us (NixOS primary) + +## Origin + +Aaron 2026-05-30 (during the Docker Ubuntu+NixOS test build): *"i would also say +nixos is our primary we should put on backlog and evaluate what ubuntu is bringing +us, the community of ubuntu is really why i'm thinking ubuntu matters."* + +## The question + +**NixOS is the primary target — declarative BY CONSTRUCTION.** Aaron 2026-05-30 +(the deeper rationale): *"nix is what boots the usb/iso our real hardware boots +cause it's declarative. ubuntu is not on its dependency management — we use +install.sh to make ubuntu work like nixos with declarative dependencies."* + +This is the load-bearing distinction: + +- **NixOS** boots the **real hardware** (the USB/ISO that boots actual machines) + *because* the whole system — OS config + dependency closure — is declarative and + reproducible by construction. No bridge needed; declarativeness is native. +- **Ubuntu** is **imperative** in its dependency management (apt, ad-hoc installs). + It has no native declarative-deps property. +- **`install.sh` + the declarative manifests** (manifests/local-llm, .mise.toml, + manifests/apt/brew, …) are the **bridge that retrofits NixOS-like declarative + dependencies ONTO Ubuntu** — i.e. install.sh's job on Ubuntu is literally "make + Ubuntu behave like NixOS." That's the entropy-lever framing applied to a + non-declarative base OS. + +So NixOS is primary not just by preference but by *kind*: it IS the declarative +substrate; Ubuntu is made to *act* declarative via install.sh. The cost of Ubuntu +is maintaining that simulation layer (the install.sh Ubuntu path + apt deps + +floating-binary installs); the value is what the next paragraph weighs. + +**Ubuntu's value is community/contributor reach**, not technical superiority. +Aaron's framing: Ubuntu matters because of its *community* — contributor +familiarity, the default-mental-model for most devs, GitHub-hosted runner +ubiquity (ubuntu-latest is the CI default), and the volume of Ubuntu-targeting +prior art. The question is whether that reach justifies Ubuntu as a *first-class* +install/CI target or whether it's community-convenience only. + +## What to evaluate + +- **Contributor reach**: how many would-be contributors are Ubuntu-default vs + willing to use NixOS? Does first-class Ubuntu lower the contribution barrier + enough to matter? +- **CI ubiquity**: `ubuntu-24.04` is the default GH-hosted runner; NixOS in CI is + container/QEMU-mediated. What does dropping/keeping Ubuntu cost in CI surface? +- **Maintenance cost** of the Ubuntu path: the `apt` manifest, the floating-binary + installs (e.g. the ollama `.tar.zst` linux install in `common/local-llm.sh`), + and the non-reproducibility vs NixOS's pinned closure. +- **Decision**: Ubuntu stays first-class (community justifies it) OR Ubuntu becomes + community-convenience-only (best-effort, NixOS is the supported/reproducible + path) OR some tiered support level. + +## Acceptance + +1. A short decision doc (in `docs/research/` or as this row's Resolution) weighing + Ubuntu's community-reach value against its maintenance + non-reproducibility + cost, with NixOS established as primary. +2. A clear support-tier statement for Ubuntu (first-class / community-convenience / + tiered) that the install-graph + CI strategy follow. + +## Notes + +Surfaced alongside the Docker Ubuntu+NixOS install.sh test pair (both OSes run +install.sh in containers; per Aaron's "center our docker tests around ubuntu and +nixos"). This row is the *strategic* counterpart: building the Ubuntu test does not +by itself decide Ubuntu's long-term support tier — this row does. NixOS-primary is +the standing default; Ubuntu is retained pending this evaluation because of its +community reach. diff --git a/docs/backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md b/docs/backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md new file mode 100644 index 0000000000..36d88de772 --- /dev/null +++ b/docs/backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md @@ -0,0 +1,138 @@ +--- +id: B-0941 +priority: P2 +status: closed +title: NixOS-native ollama for the local-LLM primitive — close the hole in the shield (NixOS test passes by SKIPPING, not validating) +tier: install-graph-correctness +ask: Aaron 2026-05-30 +created: 2026-05-30 +last_updated: 2026-05-30 +decomposition: leaf +composes_with: + - tools/setup/common/local-llm.sh + - tools/setup/manifests/local-llm + - .github/workflows/docker-nixos-install-sh-test.yml + - tools/accelerator/validate-local-llm.ts + - docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md +tags: [install-sh, nixos, ollama, local-llm, ci, docker, false-green, entropy-shield] +type: bug +--- + +# B-0941 — NixOS-native ollama: close the hole in the shield + +## Origin + +Surfaced 2026-05-30 while validating the local-LLM core primitive (ollama + +qwen2.5:0.5b CPU model) across the Docker Ubuntu+NixOS install.sh test matrix. + +Aaron 2026-05-30, on what actually holds back entropy: *"it's impossible to keep +all the install surfaces in your mind at once — only automation can be sure a +nixos change didn't break ubuntu or mac and vice versa. trying to manually make +sure everything is a losing game to entropy."* And the sharpening: the entropy +shield is not install.sh itself — *"the automated tests around install.sh +honestly — that's the shield."* + +This row is a **hole in that shield.** + +## The bug — false-green on the primary OS + +`tools/setup/common/local-llm.sh` installs ollama on Linux by downloading the +**generic upstream binary** (`ollama-linux-.tar.zst`) into `~/.local/bin`. +That works on Ubuntu (FHS). It does **NOT** work on NixOS: + +- NixOS is **non-FHS** — a generic dynamically-linked binary dropped into + `~/.local/bin` won't find its loader/libs. The ollama binary won't run. +- `local-llm.sh` is intentionally **graceful** (warn + `exit 0` on any failure) + so install.sh never hard-fails on the local-LLM step. + +Compose those two facts and the result is a **false-green**: on NixOS, +`local-llm.sh` fails to produce a working ollama, skips gracefully, and the +`docker-nixos-install-sh-test` build **passes anyway** — because the NixOS test +validates that *install.sh runs clean*, NOT that *the local-LLM actually works*. + +So the automated test (the shield) reports green on the **primary OS** while the +local-LLM primitive is non-functional there. A shield with a hole is worse than a +known gap, because it reads as covered. + +NixOS is the primary (B-0940: declarative-by-construction; boots the real +hardware via USB/ISO). The local-LLM primitive being silently broken on the +primary — behind a green check — is the exact failure mode the test matrix exists +to prevent. + +## Fix (two halves — both required to close the hole) + +### Half 1 — NixOS-native ollama (declarative) + +NixOS should get ollama the declarative-native way, not via the Ubuntu +generic-binary retrofit: + +- Add `services.ollama.enable = true;` (or `environment.systemPackages = [ pkgs.ollama ];` + + a oneshot model-pull unit) to the appropriate NixOS module + (`full-ai-cluster/nixos/modules/common.nix` or a dedicated `local-llm.nix`). +- Pin the model to `manifests/local-llm` (`qwen2.5:0.5b`) so the declarative + pin stays the single source of truth across all three OSes. +- `local-llm.sh` should **detect NixOS** (`/etc/NIXOS` or `$NIX_PATH`) and + no-op there (ollama comes from the system closure, not the script) — the + generic-binary path stays for Ubuntu only. + +Note: existing `ollama` mentions in `full-ai-cluster/nixos/` are the **big-cluster +GPU-serving** path (worker-gpu via Ollama/vLLM, per control-plane README) — a +different concern from this small-CPU dev/CI/DST local-LLM primitive. This row is +the latter. + +### Half 2 — make the NixOS test ASSERT, not skip + +Turn the false-green into a true signal: the `docker-nixos-install-sh-test` (and +its Dockerfile) must run the same local-LLM validation the Ubuntu test does — +start the daemon, assert the pinned model is present, run the **real** `chooseIndex` +probe (`tools/accelerator/validate-local-llm.ts`), and **fail the build if the +local-LLM is absent**. Graceful-skip is correct for `install.sh` (don't brick a +machine over an optional probe), but the **test** must not inherit that grace — +the test's job is to catch exactly this. + +## Acceptance + +1. On a NixOS image, the local-LLM primitive (ollama + pinned model + working + `chooseIndex`) is functional — installed the declarative-native way. +2. `docker-nixos-install-sh-test` ASSERTS the local-LLM works (real probe), and + **fails** if it doesn't — no more graceful-skip-to-green for the primitive. +3. The `manifests/local-llm` model pin remains the single cross-OS source of + truth (Ubuntu generic-binary, macOS brew, NixOS nixpkgs all read it). + +## Why P2 (not P1) + +The local-LLM primitive is a **testing/DST seam** (the move-next selector + the +planned observe.ts auto-classifier), not yet a production-serving path. The hole +is in test-fidelity on the primary OS, which matters before harvest-to-main but +doesn't block live behavior today. Raise to P1 if/when the local-LLM becomes +load-bearing for a shipped path on NixOS hardware. + +## Composes + +- **B-0940** (Ubuntu-value evaluation; NixOS primary) — this row is the concrete + correctness counterpart: NixOS-primary means the NixOS local-LLM must actually + work, not just pass-by-skip. +- The Docker Ubuntu+NixOS(+mac) install.sh test matrix — the shield; this row + patches a hole in it. +- `.claude/rules/dep-pin-search-first-authority.md` — `manifests/local-llm` model + pin as the single declarative source of truth across OSes. + +## Resolution (2026-05-30) + +Closed. Both halves landed: + +1. **NixOS-native ollama** — `common/local-llm.sh` detects `/etc/NIXOS` and installs + ollama via `nix build --out-link` (GC-rooted store path) + an + `LD_LIBRARY_PATH`-clean wrapper (the FHS-mise glibc hack would otherwise override + ollama's RPATH → `__nptl_change_stack_perm` symbol error). FHS-safe in the + container AND on real NixOS; the declarative real-hardware `services.ollama` path + stays a complementary follow-up. +2. **Test ASSERTS, not skips** — `docker-nixos-install-sh-test` now starts the + daemon, asserts the pinned model is present, and runs the real `chooseIndex` + probe (`validate-local-llm.ts`) — fails the build if the local-LLM is absent. + +Verified green-with-assert (runs 26686148178 + 26686797500): +`✓ ollama via nix build (GC-rooted out-link) + wrapper` → model pulled → +`validate-local-llm: backend=ollama:qwen2.5:0.5b raw="0" index=0 fallback=false` +(the local LLM genuinely answered — not skip-to-green). The false-green is closed. +Graduated to main via the narrowed install-graph harvest. diff --git a/tools/accelerator/local-llm.test.ts b/tools/accelerator/local-llm.test.ts new file mode 100644 index 0000000000..e296cd3db5 --- /dev/null +++ b/tools/accelerator/local-llm.test.ts @@ -0,0 +1,82 @@ +// tools/accelerator/local-llm.test.ts +// +// Backend-agnostic tests for the local-LLM primitive — mock the model, so these +// run anywhere with no model/account (the selection + fallback logic is what we +// validate here; the actual on-runner model is exercised by the workflow). + +import { describe, expect, test } from "bun:test"; +import { chooseIndex, classify, type ModelBackend } from "./local-llm.ts"; + +function mockBackend(reply: string): ModelBackend { + return { name: "mock", complete: async () => reply }; +} +function throwingBackend(): ModelBackend { + return { + name: "mock-throw", + complete: async () => { + throw new Error("model unavailable"); + }, + }; +} + +describe("chooseIndex — the CYOA / classifier choice primitive", () => { + test("parses a clean index", async () => { + const r = await chooseIndex(mockBackend("1"), { context: "x", options: ["a", "b", "c"] }); + expect(r).toEqual({ index: 1, raw: "1", fallback: false }); + }); + + test("extracts the first digit from noisy output", async () => { + const r = await chooseIndex(mockBackend("The best choice is 2 because…"), { + context: "x", + options: ["a", "b", "c"], + }); + expect(r.index).toBe(2); + expect(r.fallback).toBe(false); + }); + + test("falls back to 0 on an out-of-range index", async () => { + const r = await chooseIndex(mockBackend("9"), { context: "x", options: ["a", "b"] }); + expect(r.index).toBe(0); + expect(r.fallback).toBe(true); + }); + + test("falls back to 0 on non-numeric output", async () => { + const r = await chooseIndex(mockBackend("banana"), { context: "x", options: ["a", "b"] }); + expect(r.index).toBe(0); + expect(r.fallback).toBe(true); + }); + + test("falls back to 0 when the backend throws (loop never stalls)", async () => { + const r = await chooseIndex(throwingBackend(), { context: "x", options: ["a", "b"] }); + expect(r.index).toBe(0); + expect(r.fallback).toBe(true); + }); + + test("single option short-circuits with no model call", async () => { + // throwingBackend would throw if called — proves no call happened. + const r = await chooseIndex(throwingBackend(), { context: "x", options: ["only"] }); + expect(r).toEqual({ index: 0, raw: "", fallback: false }); + }); + + test("empty options throws (caller bug, not a model failure)", async () => { + await expect(chooseIndex(mockBackend("0"), { context: "x", options: [] })).rejects.toThrow(); + }); +}); + +describe("classify — observe.ts auto-classifier shape", () => { + test("maps the chosen index to its label", async () => { + const r = await classify(mockBackend("0"), { + input: "deploy rolled back after error spike", + labels: ["incident", "normal"], + }); + expect(r.label).toBe("incident"); + expect(r.index).toBe(0); + expect(r.fallback).toBe(false); + }); + + test("fallback picks the first label safely", async () => { + const r = await classify(throwingBackend(), { input: "x", labels: ["a", "b"] }); + expect(r.label).toBe("a"); + expect(r.fallback).toBe(true); + }); +}); diff --git a/tools/accelerator/local-llm.ts b/tools/accelerator/local-llm.ts new file mode 100644 index 0000000000..f16c69a98a --- /dev/null +++ b/tools/accelerator/local-llm.ts @@ -0,0 +1,169 @@ +// tools/accelerator/local-llm.ts +// +// A small, ACCOUNT-FREE local-LLM primitive for the accelerator. The whole +// point: validate the "LLM-in-the-loop" seam on a GitHub CPU runner at ZERO +// spend — no API key, no account — before attaching a real harness (Claude +// Code / Codex / …). Run a tiny instruct model (e.g. Qwen2.5-0.5B) locally on +// the runner; this module is the backend-agnostic core that talks to it. +// +// Reusable for TWO consumers (operator 2026-05-30): +// 1. move-next SELECTOR — "choose your own adventure": pick the next move +// from the menu (the SelectMove seam in move-next-harness.ts). +// 2. observe.ts AUTO-CLASSIFIER (future, Max's keystone) — "given an +// observation, pick one label." Same shape: constrained choice among N. +// +// Backend-swappable: ollamaBackend (localhost) today; node-llama-cpp (in-process, +// GBNF-grammar-constrained) or a real account-backed backend later. Selection is +// always validated + falls back safely, so a bad/slow/absent model never stalls +// the loop (exceptions-as-signals: the model is best-effort, the fallback is the +// safety rail). + +// ─── Backend interface ─────────────────────────────────────────────── +// DST note (operator 2026-05-30): a small local model at temperature 0 (greedy) + +// a fixed `seed` + a PINNED model/quantization is DETERMINISTIC — same input ⇒ +// same output, reproducibly — so it can be a real (not mocked) fixture in +// deterministic-simulation tests (e.g. observe.ts's auto-classifier), not just a +// runtime selector. Cross-hardware caveat: CPU float order can differ across +// runner architectures, so pin the runner image (or snapshot the output) when +// asserting exact classifications across machines; on one image it is stable. +export interface CompleteOptions { + readonly temperature?: number; // default 0 (greedy — reproducible, DST) + readonly seed?: number; // fix for deterministic-simulation reproducibility + readonly maxTokens?: number; // selection needs only a few tokens +} + +export interface ModelBackend { + readonly name: string; + /** Complete a prompt with a small local model. Returns raw text. */ + complete(prompt: string, opts?: CompleteOptions): Promise; +} + +// ─── Ollama backend (account-free; model runs on the runner) ───────── +export interface OllamaOptions { + readonly model?: string; // tiny instruct model + readonly host?: string; + readonly timeoutMs?: number; + readonly seed?: number; // default deterministic seed (DST); override per-call +} + +/** + * Validate the ollama host is loopback. The local LLM only ever talks to an + * ON-MACHINE daemon — a host from the (file-sourced) manifest must never point at + * a remote, which would exfiltrate prompts (the CodeQL "file data → outbound + * request" SSRF taint, #6123). Returns the validated host (an explicit guard + * between the file-source and the fetch sink); throws on a non-loopback host. + */ +function loopbackHostOrThrow(raw: string): string { + const hostname = new URL(raw).hostname.replace(/^\[|\]$/g, ""); // strip IPv6 [ ] + if (hostname !== "127.0.0.1" && hostname !== "localhost" && hostname !== "::1") { + throw new Error( + `local-llm host must be loopback (got "${hostname}") — the local LLM only talks to an on-machine daemon`, + ); + } + return raw; +} + +/** A ModelBackend backed by a local Ollama server (no account/key). */ +export function ollamaBackend(opts: OllamaOptions = {}): ModelBackend { + const model = opts.model ?? "qwen2.5:0.5b"; + const host = loopbackHostOrThrow(opts.host ?? "http://127.0.0.1:11434"); + const timeoutMs = opts.timeoutMs ?? 60_000; + const defaultSeed = opts.seed ?? 0; // fixed seed ⇒ reproducible (DST) + return { + name: `ollama:${model}`, + async complete(prompt, o) { + const ctrl = new AbortController(); + const timer = setTimeout(() => ctrl.abort(), timeoutMs); + try { + const res = await fetch(`${host}/api/generate`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model, + prompt, + stream: false, + options: { + temperature: o?.temperature ?? 0, + seed: o?.seed ?? defaultSeed, + num_predict: o?.maxTokens ?? 6, + }, + }), + signal: ctrl.signal, + }); + if (!res.ok) throw new Error(`ollama HTTP ${res.status}`); + const data = (await res.json()) as { response?: string }; + return data.response ?? ""; + } finally { + clearTimeout(timer); + } + }, + }; +} + +// ─── chooseIndex: the constrained-choice primitive ─────────────────── +export interface ChooseArgs { + readonly context: string; // describe the current state / observation + readonly options: readonly string[]; // human-readable option labels + readonly instruction?: string; +} + +export interface ChooseResult { + readonly index: number; // always a valid index into options + readonly raw: string; // the model's raw reply (for logging) + readonly fallback: boolean; // true ⇒ index 0 chosen because the model failed +} + +/** + * Ask the model to pick ONE option by index. Builds a numbered-options prompt, + * parses the first integer out of the reply, validates it is in range, and + * FALLS BACK to index 0 on any failure (empty menu is the only throw). A single + * option short-circuits with no model call. + */ +export async function chooseIndex(backend: ModelBackend, args: ChooseArgs): Promise { + const n = args.options.length; + if (n === 0) throw new Error("chooseIndex: options must be non-empty"); + if (n === 1) return { index: 0, raw: "", fallback: false }; + + const numbered = args.options.map((o, i) => `${i}: ${o}`).join("\n"); + const prompt = + `${args.instruction ?? "You are a selector. Choose the single best next action."}\n\n` + + `State:\n${args.context}\n\n` + + `Options:\n${numbered}\n\n` + + `Reply with ONLY the number of the chosen option (0-${n - 1}). Number:`; + + let raw = ""; + try { + raw = (await backend.complete(prompt, { temperature: 0, maxTokens: 6 })).trim(); + } catch { + return { index: 0, raw: "", fallback: true }; + } + const m = raw.match(/\d+/); + if (!m) return { index: 0, raw, fallback: true }; + const idx = Number.parseInt(m[0]!, 10); + if (!Number.isInteger(idx) || idx < 0 || idx >= n) return { index: 0, raw, fallback: true }; + return { index: idx, raw, fallback: false }; +} + +// ─── classify: observe.ts auto-classifier use case ─────────────────── +export interface ClassifyResult { + readonly label: string; + readonly index: number; + readonly fallback: boolean; +} + +/** + * Classify an input into exactly one of `labels` (the observe.ts auto-classifier + * shape). Thin wrapper over chooseIndex so the selector + classifier share one + * validated, fallback-safe code path. + */ +export async function classify( + backend: ModelBackend, + args: { input: string; labels: readonly string[]; instruction?: string }, +): Promise { + const r = await chooseIndex(backend, { + context: args.input, + options: args.labels, + instruction: args.instruction ?? "Classify the input into exactly one label.", + }); + return { label: args.labels[r.index]!, index: r.index, fallback: r.fallback }; +} diff --git a/tools/accelerator/validate-local-llm.ts b/tools/accelerator/validate-local-llm.ts new file mode 100644 index 0000000000..c5f5698921 --- /dev/null +++ b/tools/accelerator/validate-local-llm.ts @@ -0,0 +1,65 @@ +// tools/accelerator/validate-local-llm.ts +// +// Proves the CORE local-LLM primitive actually works on THIS machine — the +// "entropy lever" end-to-end check (operator 2026-05-30): after install.sh has run, +// a bare machine should be working substrate. Reads the declarative pins +// (manifests/local-llm), talks to the locally-installed ollama, runs a REAL +// chooseIndex, and asserts a valid, non-fallback choice. Exits non-zero on +// failure (CI gate). Run AFTER install.sh. +// +// Note: asserts the model RESPONDED with a valid in-range index (not a specific +// answer) — that proves the real local-LLM is live. Exact-output DST assertions +// (snapshotting the deterministic temp0+seed output) belong in the test suite. + +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { chooseIndex, ollamaBackend } from "./local-llm.ts"; + +function arg(flag: string, dflt: string): string { + const i = process.argv.indexOf(flag); + return i >= 0 && process.argv[i + 1] !== undefined ? process.argv[i + 1]! : dflt; +} + +const root = arg("--root", process.cwd()); +const manifestPath = join(root, "tools/setup/manifests/local-llm"); + +const txt = readFileSync(manifestPath, "utf8"); +const mget = (k: string): string | undefined => + txt + .split("\n") + .map((l) => l.trim()) + .filter((l) => l.length > 0 && !l.startsWith("#")) + .map((l) => l.split(/\s+/)) + .find(([key]) => key === k)?.[1]; + +const model = mget("model"); +const host = mget("host"); +const seed = Number.parseInt(mget("seed") ?? "0", 10); + +if (!model) { + console.error("validate-local-llm: no 'model' in manifest — cannot validate"); + process.exit(2); +} + +const backend = ollamaBackend({ model, seed, ...(host ? { host } : {}) }); + +const r = await chooseIndex(backend, { + context: "The agent is idle with no pending work this cycle.", + options: ["emit a heartbeat", "enter free time"], +}); + +console.log( + `validate-local-llm: backend=${backend.name} raw=${JSON.stringify(r.raw)} ` + + `index=${r.index} fallback=${r.fallback}`, +); + +if (r.fallback) { + console.error( + "validate-local-llm: FAILED — the model fell back (unreachable / unparseable). " + + "The real local-LLM did not produce a valid selection. Check that install.sh " + + "installed ollama + pulled the pinned model and the daemon is serving.", + ); + process.exit(1); +} + +console.log("validate-local-llm: OK — real local-LLM produced a valid in-range selection."); diff --git a/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile b/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile index 2c043fd14d..023474693e 100644 --- a/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile +++ b/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile @@ -116,6 +116,9 @@ COPY .mise.toml /workspace/.mise.toml # package.json + bun.lock pin TS-runtime deps if install.sh references # them (e.g., bun --version checks); copy to mirror dev environment COPY package.json bun.lock* /workspace/ +# tools/accelerator carries the local-LLM primitive's validator + tests +# (validate-local-llm.ts + local-llm.test.ts) for validation step 4 below. +COPY tools/accelerator /workspace/tools/accelerator # Run install.sh — this exercises: # 1. install.sh dispatch (detects Linux → linux.sh) @@ -156,6 +159,30 @@ RUN bash -lc 'set -o pipefail && eval "$(mise activate bash)" && \ # check). RUN nix-shell -p gh --run 'gh --version | head -1' +# Validation step 4 (B-0941): the local-LLM primitive ACTUALLY WORKS on NixOS — +# closes the false-green where the nixos test passed by SKIPPING. install.sh's +# local-llm.sh nix-branch installed ollama (FHS-safe via nix) + pulled the pinned +# model during step 130; the model persists on disk, the daemon does not across +# layers, so start it here and ASSERT (not skip): pinned model present + a REAL +# chooseIndex probe + the run-anywhere mock tests. A skip-to-green here would +# reintroduce the exact hole B-0941 names — so this RUN fails the build if the +# local-LLM is absent. (assert-don't-skip per the shield rule.) +RUN bash -lc 'set -eu; eval "$(mise activate bash)"; \ + export PATH="$HOME/.nix-profile/bin:/nix/var/nix/profiles/default/bin:$PATH"; \ + command -v ollama; \ + (ollama serve >/tmp/ollama.log 2>&1 &); \ + for _ in $(seq 1 30); do \ + curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1 && break; \ + sleep 1; \ + done; \ + curl -fsS http://127.0.0.1:11434/api/version || { echo "=== ollama serve log (/tmp/ollama.log) — daemon not reachable ==="; cat /tmp/ollama.log 2>/dev/null || echo "(no /tmp/ollama.log)"; echo "=== end serve log ==="; exit 7; }; \ + MODEL="$(grep -E "^model" tools/setup/manifests/local-llm | awk "{print \$2}")"; \ + echo "asserting model: $MODEL"; \ + ollama list; \ + ollama list | awk "NR>1 {print \$1}" | grep -qx "$MODEL"; \ + bun test tools/accelerator/local-llm.test.ts; \ + bun tools/accelerator/validate-local-llm.ts --root "$PWD"' + # Final marker — if all steps succeed, this echo lands in the build # output as the success signal for CI. RUN echo "B-0849 Phase 1 Docker harness validation COMPLETE — install.sh + mise + bun + claude-code all working on NixOS userspace" diff --git a/tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile b/tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile new file mode 100644 index 0000000000..656d7aef98 --- /dev/null +++ b/tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile @@ -0,0 +1,61 @@ +# tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile +# +# Docker-based install.sh test on Ubuntu userspace — sibling to +# nixos-install-sh-test (operator 2026-05-30: "center our docker tests around +# ubuntu and nixos and have tests for both with install.sh"). Proves the entropy +# lever on Ubuntu: a bare ubuntu image + install.sh => working substrate, +# INCLUDING the core local-LLM primitive (ollama + pinned model + real probe). +# +# NixOS is primary (declarative-by-construction; boots the real hardware via the +# USB/ISO). Ubuntu is made to ACT declarative via install.sh + the manifests +# (per B-0940) — this test guards that retrofit. +# +# The build IS the test: a failing install.sh / assert fails the build. +# install.sh runs as root (linux.sh handles root-vs-sudo via `id -u`). + +# Pinned by digest (per .claude/rules/dep-pin-search-first-authority.md; matches +# the nixos Dockerfile's digest-pin discipline). ubuntu:24.04 digest selected +# 2026-05-30 via the Docker registry API; bump: re-query +# registry-1.docker.io/v2/library/ubuntu/manifests/24.04 for the current digest. +FROM ubuntu:24.04@sha256:c4a8d5503dfb2a3eb8ab5f807da5bc69a85730fb49b5cfca2330194ebcc41c7b + +ENV DEBIAN_FRONTEND=noninteractive + +# Bootstrap prereqs install.sh needs before its own apt step runs: curl (mise + +# ollama downloads), ca-certificates (HTTPS), git, xz-utils. install.sh's apt +# step (manifests/apt) then installs the full set incl. zstd (ollama .tar.zst). +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates curl git xz-utils \ + && rm -rf /var/lib/apt/lists/* + +# Pre-stage mise + bun + ollama PATH for ALL subsequent RUN layers — Docker does +# NOT persist install.sh's in-process PATH exports across layers (same fix as the +# nixos Dockerfile). install.sh installs mise to ~/.local/bin, shims to +# ~/.local/share/mise/shims, bun to ~/.bun/bin, ollama to ~/.local/bin. +ENV PATH=/root/.bun/bin:/root/.local/share/mise/shims:/root/.local/bin:/usr/local/bin:/usr/bin:/bin + +WORKDIR /zeta +COPY . /zeta + +# The entropy lever: bare ubuntu -> working substrate (incl. the local-LLM core). +RUN ./tools/setup/install.sh + +# Validate the local-LLM primitive end-to-end. The MODEL persists on disk +# (install.sh pulled it into a layer); the DAEMON does not persist across RUN +# layers, so start it here, assert the pinned model, then run the REAL chooseIndex +# probe + the run-anywhere mock tests. +RUN set -eu; \ + export PATH="/root/.local/bin:$PATH"; \ + command -v ollama; \ + (ollama serve >/tmp/ollama.log 2>&1 &); \ + for _ in $(seq 1 30); do \ + curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1 && break; \ + sleep 1; \ + done; \ + curl -fsS http://127.0.0.1:11434/api/version; \ + MODEL="$(grep -E '^model' tools/setup/manifests/local-llm | awk '{print $2}')"; \ + echo "asserting model: $MODEL"; \ + ollama list; \ + ollama list | awk 'NR>1 {print $1}' | grep -qx "$MODEL"; \ + bun test tools/accelerator/local-llm.test.ts; \ + bun tools/accelerator/validate-local-llm.ts --root "$PWD" diff --git a/tools/hygiene/check-bash-retirement-inventory.test.ts b/tools/hygiene/check-bash-retirement-inventory.test.ts index f3a260089a..28591660df 100644 --- a/tools/hygiene/check-bash-retirement-inventory.test.ts +++ b/tools/hygiene/check-bash-retirement-inventory.test.ts @@ -273,7 +273,7 @@ describe("renderReport", () => { expect(renderReport(report)).toContain(`OK: retained non-Lean shell surface matches ${RETAINED_SHELL_SCOPE}.`); expect(renderReport(report)).toContain("## Retained shell categories"); - expect(renderReport(report)).toContain("- setup/bootstrap: 13"); + expect(renderReport(report)).toContain("- setup/bootstrap: 14"); expect(renderReport(report)).toContain("- host-service wrappers: 2"); }); diff --git a/tools/hygiene/check-bash-retirement-inventory.ts b/tools/hygiene/check-bash-retirement-inventory.ts index 744d63ce11..5adff9cc30 100644 --- a/tools/hygiene/check-bash-retirement-inventory.ts +++ b/tools/hygiene/check-bash-retirement-inventory.ts @@ -95,6 +95,7 @@ export const EXPECTED_RETAINED_SHELL: readonly string[] = [ "tools/setup/common/curl-fetch.sh", "tools/setup/common/dotnet-tools.sh", "tools/setup/common/elan.sh", + "tools/setup/common/local-llm.sh", "tools/setup/common/mise.sh", "tools/setup/common/profile-edit.sh", "tools/setup/common/python-tools.sh", @@ -131,6 +132,7 @@ export const RETAINED_SHELL_CATEGORY_BY_FILE: Readonly&2 + exit 0 +fi + +# ── 1. ensure the ollama binary (Linux installs pinned release; macOS via brew) ── +if ! command -v ollama >/dev/null 2>&1; then + case "$(uname -s)" in + Linux) + # NixOS: the generic glibc release binary won't run (non-FHS). Install ollama + # via nix instead — FHS-safe, works in the nixos/nix container AND on real + # NixOS, and floats with the channel (consistent with the float-ollama + # decision). This is the install.sh-retrofit path that closes B-0941's test + # false-green; the declarative real-hardware self-heal layer is + # services.ollama in configuration.nix (complementary). linux.sh already + # routes NixOS via /etc/NIXOS; honor the same marker here. + if [ -f /etc/NIXOS ]; then + echo "↓ NixOS detected — installing ollama via nix (FHS-safe)..." + # Diagnosed across runs 26685829032 + 26685902159 (surfaced stderr): + # - nix-env -iA nixpkgs.ollama → 'bad meta.outputsToInstall' + # - nix profile install [--priority N] → coreutils-full FILE COLLISION + # (ollama's closure brings coreutils-full vs the profile's existing one; + # --priority did not resolve it — profile-install is structurally + # collision-prone here). + # Robust fix: DON'T mutate the profile. `nix build` the store path with a + # GC-rooted out-link, then wrap bin/ollama onto PATH — no profile entry, no + # collision, FHS-safe in the container AND on real NixOS. (The declarative + # real-hardware path is services.ollama in configuration.nix — complementary.) + # - --out-link (NOT --no-link): registers an indirect GC root so a later + # nix-collect-garbage cannot delete ollama out from under the wrapper + # (Copilot #6120 — a raw --print-out-paths store path is not GC-protected). + # - run the build INSIDE the `if` condition: a failure is then GRACEFUL under + # `set -euo pipefail` (a failing `var=$(...)` command-substitution would + # exit the script before the warn+exit-0 fallback — Copilot #6120). + # Surface stderr (2>&1); warn + exit 0 so install.sh never bricks on a probe. + ollama_gcroot="$HOME/.local/state/zeta/ollama-result" + mkdir -p "$(dirname "$ollama_gcroot")" "$HOME/.local/bin" + if nix --extra-experimental-features 'nix-command flakes' build --out-link "$ollama_gcroot" nixpkgs#ollama 2>&1 \ + && [ -x "$ollama_gcroot/bin/ollama" ]; then + # WRAPPER (not bare symlink): the nix-built ollama has the correct glibc in + # its RPATH, but a polluting LD_LIBRARY_PATH (e.g. the docker-nixos test's + # FHS-mise glibc hack) OVERRIDES the RPATH → 'symbol lookup error: libc.so.6 + # undefined symbol __nptl_change_stack_perm GLIBC_PRIVATE' (run 26686054042). + # The wrapper runs ollama clear of LD_LIBRARY_PATH so EVERY call (install-time + # serve+pull AND the test's assert) uses ollama's own glibc. Harmless on real + # NixOS / ubuntu / mac (LD_LIBRARY_PATH unset there → env -u is a no-op). + # Points at the GC-rooted out-link, not a raw store path. + printf '#!/usr/bin/env bash\nexec env -u LD_LIBRARY_PATH %s/bin/ollama "$@"\n' "$ollama_gcroot" > "$HOME/.local/bin/ollama" + chmod +x "$HOME/.local/bin/ollama" + echo " ✓ ollama via nix build (GC-rooted out-link $ollama_gcroot) + LD_LIBRARY_PATH-clean wrapper" + else + echo "warn: nix build ollama failed; skipping local-llm (tests fall back to mock)" >&2; exit 0 + fi + export PATH="$HOME/.local/bin:$PATH" + command -v ollama >/dev/null 2>&1 || { echo "warn: ollama not on PATH after nix build; skipping local-llm" >&2; exit 0; } + else + case "$(uname -m)" in + x86_64 | amd64) oarch=amd64 ;; + aarch64 | arm64) oarch=arm64 ;; + *) echo "warn: unsupported arch $(uname -m) for ollama; skipping local-llm" >&2; exit 0 ;; + esac + tmp="$(mktemp -d)" + # FLOATING latest (operator 2026-05-30): the ollama *runtime* version does not + # affect DST reproducibility — the pinned MODEL + temp0 + seed do — so we + # track latest (less maintenance). GitHub's /releases/latest/download/ + # auto-redirects to the newest release's asset (no API call, no pin). + # Asset is .tar.zst (zstd), NOT .tgz — verified against the release API + # 2026-05-30 (ollama-linux-amd64.tar.zst). The bare ollama-linux-.tgz + # name 404s; this was caught by the validation workflow. + url="https://github.com/ollama/ollama/releases/latest/download/ollama-linux-${oarch}.tar.zst" + echo "↓ installing ollama (latest, linux-${oarch})..." + if ! curl_fetch --output "${tmp}/ollama.tar.zst" "$url"; then + echo "warn: ollama download failed; skipping local-llm (tests fall back to mock)" >&2; exit 0 + fi + mkdir -p "$HOME/.local" + # ollama-linux-.tar.zst extracts bin/ollama + lib/ollama under the + # prefix. zstd-compressed → tar --zstd (zstd is present on ubuntu runners; + # GNU tar + bsdtar both support --zstd). + if ! tar -C "$HOME/.local" --zstd -xf "${tmp}/ollama.tar.zst"; then + echo "warn: ollama extract failed (zstd?); skipping local-llm (tests fall back to mock)" >&2; exit 0 + fi + export PATH="$HOME/.local/bin:$PATH" + fi + ;; + Darwin) + echo "warn: ollama not found on macOS — expected via manifests/brew (brew install ollama)." >&2 + echo " Skipping model pull; re-run after the brew step installs it." >&2 + exit 0 + ;; + *) + echo "warn: unknown OS '$(uname -s)' for ollama install; skipping local-llm" >&2; exit 0 ;; + esac +fi + +# ── 2. ensure the daemon is reachable (start in background if needed) ── +if ! curl -fsS "${HOST}/api/version" >/dev/null 2>&1; then + echo "↓ starting ollama serve (background)..." + (ollama serve >/dev/null 2>&1 &) + for _ in $(seq 1 30); do + curl -fsS "${HOST}/api/version" >/dev/null 2>&1 && break + sleep 1 + done +fi +if ! curl -fsS "${HOST}/api/version" >/dev/null 2>&1; then + echo "warn: ollama daemon not reachable at ${HOST}; skipping model pull (tests fall back to mock)" >&2 + exit 0 +fi + +# ── 3. pull the pinned model (idempotent) ── +if ollama list 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "$MODEL"; then + echo "✓ local-llm model ${MODEL} already present" +else + echo "↓ pulling ${MODEL} (~400MB, one-time)..." + if ! ollama pull "$MODEL"; then + echo "warn: 'ollama pull ${MODEL}' failed; skipping (tests fall back to mock)" >&2 + exit 0 + fi +fi +echo "✓ local-llm primitive ready: ${MODEL} via ollama $(ollama --version 2>/dev/null | head -1 || echo '(version unknown)')" diff --git a/tools/setup/linux.sh b/tools/setup/linux.sh index 4aea388313..b84bda80a4 100755 --- a/tools/setup/linux.sh +++ b/tools/setup/linux.sh @@ -176,5 +176,8 @@ export PATH="$HOME/.dotnet/tools:$PATH" "$SETUP_DIR/common/elan.sh" "$SETUP_DIR/common/dotnet-tools.sh" "$SETUP_DIR/common/verifiers.sh" +# Local-LLM core primitive — installs pinned ollama binary + pulls the pinned +# tiny model (manifests/local-llm). Graceful: warns + continues on failure. +"$SETUP_DIR/common/local-llm.sh" "$SETUP_DIR/common/shellenv.sh" "$SETUP_DIR/common/profile-edit.sh" diff --git a/tools/setup/macos.sh b/tools/setup/macos.sh index 7efaeb5224..5012a1cbe0 100755 --- a/tools/setup/macos.sh +++ b/tools/setup/macos.sh @@ -142,5 +142,8 @@ export PATH="$HOME/.dotnet/tools:$PATH" "$SETUP_DIR/common/elan.sh" "$SETUP_DIR/common/dotnet-tools.sh" "$SETUP_DIR/common/verifiers.sh" +# Local-LLM core primitive — macOS gets the ollama binary via manifests/brew +# (above); this pulls the pinned tiny model (manifests/local-llm). Graceful. +"$SETUP_DIR/common/local-llm.sh" "$SETUP_DIR/common/shellenv.sh" "$SETUP_DIR/common/profile-edit.sh" diff --git a/tools/setup/manifests/apt b/tools/setup/manifests/apt index d1d0d1cdb3..bf78413959 100644 --- a/tools/setup/manifests/apt +++ b/tools/setup/manifests/apt @@ -15,3 +15,19 @@ git p7zip-full # cascade #4 ISO content audit (7z list); ubuntu-24.04 # default-installs but Linux maintainers running setup # locally need explicit declaration + +# Local-LLM core primitive (operator 2026-05-30): the ollama Linux release is a +# .tar.zst (zstd), so common/local-llm.sh needs `tar --zstd` ⇒ the zstd binary. +zstd # required to extract ollama-linux-.tar.zst + +# .NET runtime native deps (mise installs the dotnet SDK; it needs these shared +# libs to RUN). Present on full ubuntu runners (implicit), MISSING on a minimal +# ubuntu:24.04 image — the docker-ubuntu-install-sh-test exposed this (dotnet +# exited with no status = missing libicu). Declaring them makes the entropy lever +# work on TRULY bare ubuntu. Per Microsoft Learn linux-scripted-manual deps; +# build-essential already pulls libstdc++6/libgcc-s1/zlib1g. Names are Ubuntu +# 24.04 (Noble: libicu74, libssl3t64 post-time_t-transition). +libicu74 # ICU — .NET globalization (the classic "dotnet exited" cause) +libssl3t64 # OpenSSL 3 runtime (Noble t64 name) +libgssapi-krb5-2 # Kerberos/GSSAPI — .NET networking +tzdata # timezone data — .NET DateTime diff --git a/tools/setup/manifests/brew b/tools/setup/manifests/brew index c8410b4495..f995cdbd27 100644 --- a/tools/setup/manifests/brew +++ b/tools/setup/manifests/brew @@ -23,3 +23,10 @@ hermes-agent # "Self-improving AI agent that creates skills from # resolved by brew (see `brew info hermes-agent` for # current list). Idempotent: brew install skips if # present. + +# Local-LLM core primitive (operator 2026-05-30 — "core, not optional"; small +# CPU model = baseline substrate). macOS installs the ollama binary here; the +# pinned MODEL is pulled by common/local-llm.sh per manifests/local-llm (the +# model is the reproducible/pinned artifact; brew tracks latest ollama binary). +ollama # CPU-served tiny model for the move-next selector + observe.ts + # classifier + DST fixtures. Idempotent: brew install skips if present. diff --git a/tools/setup/manifests/local-llm b/tools/setup/manifests/local-llm new file mode 100644 index 0000000000..842f3fc8a2 --- /dev/null +++ b/tools/setup/manifests/local-llm @@ -0,0 +1,27 @@ +# tools/setup/manifests/local-llm — declarative pins for the CORE local-LLM +# primitive: a small CPU-only model served by Ollama, account-free. +# +# Why core (operator 2026-05-30): small CPU-capable local LLMs are a baseline +# substrate primitive (like a language runtime), not an optional extra. Consumers: +# - accelerator move-next selector ("choose your own adventure") +# - observe.ts auto-classifier (input -> one label) +# - DST test fixtures: temp 0 + fixed seed + pinned model = reproducible, so a +# real (not mocked) model can back deterministic-simulation tests. +# +# Installed by tools/setup/common/local-llm.sh (idempotent, graceful). Format: +# `key value` (one per line; comments start with `#`). +# +# Ollama runtime: FLOATING latest (operator 2026-05-30) — the runtime version does +# not affect DST reproducibility (the pinned MODEL + temp0 + seed do), so we track +# latest for less maintenance. Installed per-OS: macOS via manifests/brew; Linux +# via GitHub /releases/latest; Windows via install.ps1 (peer surface) — all read +# the model/seed/host below from THIS manifest (OS-agnostic shared contract). + +# Tiny instruct model — 398MB Q4_K_M, CPU-friendly +# (https://ollama.com/library/qwen2.5:0.5b). PINNED tag — the reproducible +# artifact for DST (temp0 + seed + this pin = deterministic). +model qwen2.5:0.5b + +# Deterministic defaults for DST reproducibility (greedy + fixed seed). +seed 0 +host http://127.0.0.1:11434