diff --git a/.github/workflows/accelerator-local-llm-validate.yml b/.github/workflows/accelerator-local-llm-validate.yml new file mode 100644 index 0000000000..988a490efc --- /dev/null +++ b/.github/workflows/accelerator-local-llm-validate.yml @@ -0,0 +1,81 @@ +# Accelerator — local-LLM entropy-lever validation (off-leash, accelerator branch). +# +# Proves the claim: a BARE runner + `install.sh` ⇒ working local-LLM substrate +# (Aaron 2026-05-30 "install.sh is our biggest lever against entropy"). Runs the +# real install graph, asserts the pinned model actually landed + serves, and runs +# a REAL (not mocked) selection through the local model. This is the gate that +# graduates the local-LLM core primitive from off-leash (accelerator) to main. +# +# Pushing this workflow / any local-LLM file to the accelerator branch triggers it. +# Heavy (full install + ~400MB model pull); concurrency cancels superseded runs. + +name: accelerator-local-llm-validate + +on: + workflow_dispatch: + push: + branches: [accelerator/pr-less-git-monster] + paths: + - "tools/setup/manifests/local-llm" + - "tools/setup/common/local-llm.sh" + - "tools/setup/linux.sh" + - "tools/setup/macos.sh" + - "tools/setup/manifests/brew" + - "tools/accelerator/local-llm.ts" + - "tools/accelerator/validate-local-llm.ts" + - ".github/workflows/accelerator-local-llm-validate.yml" + +concurrency: + group: accelerator-local-llm-validate-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + validate-linux: + runs-on: ubuntu-24.04 + timeout-minutes: 25 + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 + + - name: Run install.sh (the entropy lever — bare runner → substrate) + env: + # Authenticated mise (per the mise.sh fix) so the toolchain install + # doesn't hit the unauthenticated GitHub rate limit. + MISE_GITHUB_TOKEN: ${{ github.token }} + run: ./tools/setup/install.sh + + - name: Ensure ollama on PATH + daemon serving + run: | + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + export PATH="$HOME/.local/bin:$PATH" + command -v ollama + if ! curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; then + (ollama serve >/dev/null 2>&1 &) + for _ in $(seq 1 30); do + curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1 && break + sleep 1 + done + fi + curl -fsS http://127.0.0.1:11434/api/version + + - name: Assert the pinned model landed (declarative manifest) + run: | + export PATH="$HOME/.local/bin:$PATH" + MODEL=$(grep -E '^model' tools/setup/manifests/local-llm | awk '{print $2}') + echo "expected model: $MODEL" + ollama list + ollama list | awk 'NR>1 {print $1}' | grep -qx "$MODEL" + + - name: Mock-backed primitive tests (logic; run anywhere) + run: bun test tools/accelerator/local-llm.test.ts + + - name: REAL local-LLM validation (entropy-lever end-to-end) + run: | + export PATH="$HOME/.local/bin:$PATH" + bun tools/accelerator/validate-local-llm.ts --root "$PWD" diff --git a/.github/workflows/docker-nixos-install-sh-test.yml b/.github/workflows/docker-nixos-install-sh-test.yml index 2740b2adc9..2a4639683a 100644 --- a/.github/workflows/docker-nixos-install-sh-test.yml +++ b/.github/workflows/docker-nixos-install-sh-test.yml @@ -47,6 +47,12 @@ on: push: branches: - main + # Off-leash validation: install.sh changes are built on the accelerator + # branch (incl. the local-LLM step) BEFORE harvesting to main (Aaron's + # off-leash-first model). This test validates install.sh, so it must + # re-run when install.sh changes there too — otherwise the primary OS is + # only re-validated at harvest time. + - accelerator/pr-less-git-monster paths: - 'tools/setup/**' - '.mise.toml' diff --git a/.github/workflows/docker-ubuntu-install-sh-test.yml b/.github/workflows/docker-ubuntu-install-sh-test.yml new file mode 100644 index 0000000000..34881efdb4 --- /dev/null +++ b/.github/workflows/docker-ubuntu-install-sh-test.yml @@ -0,0 +1,59 @@ +# .github/workflows/docker-ubuntu-install-sh-test.yml +# +# Docker-based install.sh test on Ubuntu — sibling to docker-nixos-install-sh-test +# (Aaron 2026-05-30: "center our docker tests around ubuntu and nixos and have +# tests for both with install.sh"). The Dockerfile IS the test: it runs install.sh +# on a bare ubuntu image and validates the core local-LLM primitive (ollama + +# pinned model + real chooseIndex probe). A failing install.sh / assert fails the +# build, which fails this job. +# +# Off-leash on the accelerator branch (Aaron: "accelerator is for off-leash +# testing; once we get it right, main becomes off-leash too"). This is the gate +# that guards graduating the local-LLM install primitive to main. +# +# FIRST CUT uses a direct `docker build` (vs the nixos TS driver) for simplicity. +# FOLLOW-UP (Aaron's GHA-cache point): consolidate both OS tests onto a shared TS +# driver + buildx `cache-from/to: type=gha` so the heavy install (1.2GB ollama + +# toolchain) bakes once and iteration runs inside the cached image. +# +# Security: no github.event.* values interpolated into run: lines. + +name: docker-ubuntu-install-sh-test + +on: + workflow_dispatch: + push: + branches: [accelerator/pr-less-git-monster] + paths: + - "tools/ci/dockerfiles/ubuntu-install-sh-test/**" + - "tools/setup/**" + - "tools/accelerator/local-llm.ts" + - "tools/accelerator/validate-local-llm.ts" + - ".mise.toml" + - ".dockerignore" + - ".github/workflows/docker-ubuntu-install-sh-test.yml" + +concurrency: + group: docker-ubuntu-install-sh-test-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + docker-ubuntu-test: + name: docker-ubuntu-install-sh-test + runs-on: ubuntu-24.04 + # Cold build: full install.sh (mise toolchain + lean + jars) + ollama 1.2GB + + # 398MB model pull. Generous bound for the first uncached run. + timeout-minutes: 40 + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: docker build (the test — install.sh + local-LLM validation inside) + run: | + docker build \ + -f tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile \ + -t zeta-ubuntu-install-sh-test \ + . diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md index 62ba661fd2..7295597a55 100644 --- a/docs/BACKLOG.md +++ b/docs/BACKLOG.md @@ -889,6 +889,8 @@ are closed (status: closed in frontmatter)._ - [ ] **[B-0934](backlog/P2/B-0934-backlog-index-integrity-required-or-advisory-decision-2026-05-29.md)** Decide whether backlog-index-integrity is required or explicitly advisory - [ ] **[B-0937](backlog/P2/B-0937-redundancy-checks-across-satellites-and-rules-mirror-beacon-rhyme-retirement-hub-over-budget-detector-2026-05-29.md)** Redundancy-checks across satellites + rules — duplicate-content audit, mirror→beacon rhyme-retirement, hub-over-budget detector - [ ] **[B-0939](backlog/P2/B-0939-self-propagating-markdown-compiler-rule-bootstrap-traveler-template-every-md-propagates-or-doesnt-compile-aaron-mika-2026-05-30.md)** Self-propagating-Markdown compiler-rule + bootstrap-traveler template (every .md is a self-propagating pattern through time or it doesn't compile) +- [ ] **[B-0940](backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md)** Evaluate what Ubuntu support brings us — NixOS is primary; Ubuntu's value is community/contributor reach +- [ ] **[B-0941](backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md)** NixOS-native ollama for the local-LLM primitive — close the hole in the shield (NixOS test passes by SKIPPING, not validating) ## P3 — convenience / deferred diff --git a/docs/accelerator/EVENT-STORE-SCHEMA.md b/docs/accelerator/EVENT-STORE-SCHEMA.md new file mode 100644 index 0000000000..559d64f4ac --- /dev/null +++ b/docs/accelerator/EVENT-STORE-SCHEMA.md @@ -0,0 +1,227 @@ +# Accelerator — git-event-store schema (Action Item 2) + +> The concrete shape of a **move-next transition as an append-only Git event**. +> Composes with `tools/agent-loop/state-machine.ts` (the `AgentState` + +> `MenuOption` DUs + pure `transition`), B-0867 (128-bit-unique-IDs, append-only), +> B-0874 (no-PR swarm via GH-Actions-recursion), and the 2026-05-29 razor-flow +> substrate (forgiveness-budget + schema-in-the-stream). Concrete types: +> [`tools/accelerator/event-store-schema.ts`](../../tools/accelerator/event-store-schema.ts). + +## Design goals (in priority order) + +1. **Conflict-free concurrent writes** — the swarm runs PR-less only if multiple + agents can append concurrently without `git merge` conflicts. +2. **Deterministic replay** — any agent's state at time T reconstructable from the + event stream (composes with DST). +3. **Schema-in-the-stream** — schema changes are events; old events stay + interpretable under new schemas → automatic schema-evolution over history. +4. **Forgiveness with a budget** — retraction is logical (Z-set negation), + reversible; but physical (storage rent), so a compaction/tiering policy bounds + it ("run out of space = run out of forgiveness"). +5. **AgencySignature composition** — each event-commit carries the AgencySignature + v1 trailer (per CLAUDE.md); the git audit-trail IS the PR-less review substrate. + +## Layout — per-agent directories + time-sortable unique filenames + +```text +events/ + / # per-agent stream — each agent writes ONLY here + 01J8X....json # one event per file; ULID filename (128-bit, time-sortable) + 01J8X....json + _schema/ # schema-in-the-stream: schema-definition events + 01J8X....json # declares a schema version (e.g. move-next-event@2) + _compacted/ # cold-tier: compacted historical events (forgiveness-budget) + / + 01J8X....jsonl # batched, retraction-pairs resolved, for archive/replay +``` + +**Why per-agent dir + ULID filename = conflict-free:** each agent writes only to +`events//`, and every event is a unique [ULID](https://github.com/ulid/spec)-named +file. Two agents never target the same path, so a `git merge` across agent streams +is **always a clean union** — no merge conflict, ever. This is the property that +lets the swarm run PR-less (per B-0867's 128-bit-unique-ID design; ULID chosen +over UUIDv4 because it is **lexicographically time-sortable** — a directory sort IS +chronological replay order). UUIDv7 is an acceptable alternative (also time-sortable). + +## The event envelope (move-next-event@1) + +```jsonc +{ + "id": "01J8XQ7M0Z...", // ULID — 128-bit, time-sortable, globally unique + "schema": "move-next-event@1", // schema-in-the-stream: which schema interprets this event + "ts": "2026-05-29T19:55:00.000Z", + "agent": "otto", // AgentPersona (state-machine.ts) + "cycle": 42, // AgentContext.cycle + "prev": "01J8XQ6...", // ULID of this agent's previous event (causal link; the + // state move-next read); null for the stream's first event + "weight": 1, // Z-set weight: +1 = assert, -1 = retract + "kind": "transition", // transition | heartbeat | schema-def | retraction + "from": { "tag": "Idle", "context": { ... } }, // AgentState before + "option": { "tag": "PickWork", "work": { ... } }, // the MenuOption the LLM-selector chose + "to": { "tag": "ExecutingWork", "context": { ... } }, // transition(from, option) + "agencySig": { // AgencySignature v1 (composes with CLAUDE.md commit trailer) + "model": "claude-opus-4-8", "surface": "otto-cli", "...": "..." + } +} +``` + +`from` / `option` / `to` are the exact `AgentState` / `MenuOption` shapes from +`state-machine.ts`. The event is the **persisted record of one `transition(from, +option) = to` call** — the move-next core made durable. `to` is redundant with +`transition(from, option)` (derivable on replay) but stored for audit + so a +reader doesn't need the transition function to inspect history. + +### Event kinds + +| `kind` | Purpose | Extra fields | +|---|---|---| +| `transition` | A move-next state transition | `from`, `option`, `to` | +| `heartbeat` | A `RecordingHeartbeat` (per B-0858) | `lane`, `note?` | +| `schema-def` | Declares a schema version (schema-in-the-stream) | `schemaName`, `schemaVersion`, `jsonSchema` | +| `retraction` | Negates a prior event (forgiveness) | `weight: -1`, `retracts: ""` | + +## Schema-in-the-stream (Insight 4 from the razor flow) + +The schema itself is data in the stream. A `schema-def` event in `events/_schema/` +declares a version; every event carries `schema: "@"`. When the +schema evolves: + +1. A new `schema-def` event lands (e.g., `move-next-event@2` adds a field). +2. New events tag `schema: "move-next-event@2"`; old events keep `@1`. +3. Readers interpret each event under the schema it declares — **both versions live + in the stream**, so old data stays interpretable without a destructive migration. + +This gives the accelerator **automatic, safe schema-evolution over historical +data** — the move-next DUs (`AgentState`, `MenuOption`) can grow (new `tag`s) without +breaking replay of past events. The TS types module IS the canonical `@1` schema; +a future `@2` lands as both updated types + a `schema-def` event. + +## Forgiveness-budget (Insight 3 from the razor flow) + +Retraction is **logical, not physical**. To undo an event, append a `retraction` +event (`weight: -1`, `retracts: `); the active state is the Z-set sum of +weights. The retracted event's file **stays on disk** — the trace charges storage +rent indefinitely. Per the razor flow: *"run out of space = run out of +forgiveness."* + +The schema therefore includes a **compaction/tiering policy** (the forgiveness-budget): + +- **Budget config**: `maxActiveStreamBytes` per agent (default: a generous bound). +- **When exceeded**: resolved retraction-pairs (an event + its `-1` retraction, + net weight 0) are moved from `events//` to `_compacted//*.jsonl` + (batched). Active state is unchanged (net-zero pairs contribute nothing); the + active stream shrinks; the full trace is preserved cold. +- **Compaction is itself a deliberate event** (`kind: "schema-def"`-adjacent + `compaction` marker), so the audit trail records what was tiered and when — + forgiveness is budgeted, not silently discarded. + +This composes directly with git-as-free-event-store: the `.git/` objects charge +the same physical rent, so the forgiveness-budget IS the accelerator's answer to +unbounded `.git/` growth at swarm scale. + +### The compaction mechanism — two-layer razor + past-as-generator + +The *mechanism* for the compaction/tiering above is the **two-layer razor + +past-as-generator** architecture (Aaron + Ani 2026-05-29, preserved in +[`docs/research/2026-05-29-two-layer-razor-past-as-generator-...md`](../research/2026-05-29-two-layer-razor-past-as-generator-forgiveness-cost-compression-causal-order-vs-purpose-within-partition-aaron-ani-otto.md)): + +- **Layer 1 (Forgiveness Razor — Origin vs Purpose)** is the retraction above: it + decides what's accidental and retracts it. Its cost is the stored retracted trace. +- **Layer 2 (Compression Razor — Causal Order vs Current Purpose)** runs *on the + retracted data* to compress the cost-of-forgiveness. It keeps the canonical + causal order (the `prev` link chain) and **drops the redundant wall-clock `ts`**. + This is valid **within a partition** — and a per-agent stream IS a partition + (single-writer ⇒ causal order canonical by construction; no cross-agent consensus + needed). Cross-agent (cross-partition) Layer-2 compression is NOT valid (matches + Aaron's "within a partition" correction). +- **`_compacted//` is where Layer 2 output lands** — causal-order-only, + purpose-tagged, columnar/aggressively-encoded. +- **Past-as-generator (the extreme form)**: when a compacted segment is regular + enough, replace the stored data with the **generator that reproduces it** — for + this event-store, that generator is the `transition`-fold from a snapshot + (replay reconstructs the segment on demand). At that point history's storage cost + is dominated by active-generator size, not raw event count. + +**Don't-collapse** (Aaron's own razor): this is a *designed, verifiable system +property* (history-storage grows slower than event-volume, provable with data + +formal verification over time) — NOT a god-tier claim about how the universe +stores its own history. The generator-as-history pattern is the engineering +mechanism; any cosmological reading is accidental and retracted. + +### Today's budget is host generosity — be good to our host (Aaron 2026-05-29) + +> Aaron: *"right now we are open source on github and they have free unlimited +> storage which means our git monster has unlimited forgiveness within github's +> generosity so we should be good to our host."* + +The "run out of space = run out of forgiveness" hard limit is real **in general**. +But **today** the accelerator runs as **open-source on GitHub**, where storage is +free + effectively unlimited — so the git-monster's forgiveness is effectively +**unbounded within GitHub's generosity**. The binding constraint right now is not +a hard space wall; it is **relational: be a good guest of the host.** + +GitHub's free-OSS generosity (Microsoft subsidizing open-source, per B-0874) is +precisely what makes git-as-free-event-store + GH-Actions-recursion possible. So: + +- **We apply the compaction / past-as-generator discipline VOLUNTARILY** — as + good-guest discipline, not because a hard space limit forces it. The hard-limit + case (the forgiveness-budget as a *wall*) is the future/off-generous-host / + extreme-scale scenario; the today-case is honoring the generosity. +- **Be good to our host**: don't abuse the free tier with wasteful unbounded + volume; keep the active stream + `.git/` footprint reasonable; prefer + compaction + past-as-generator over hoarding raw history. If everyone ran + abusive unbounded swarms on GitHub's free OSS tier, the host would have to + clamp down (tragedy-of-the-commons) — so the proud-if-it-propagates pattern is + *good guest*, not *maximal extraction* (per + `.claude/rules/proud-if-pattern-propagates-personal-filter-for-substrate-engineering.md` + + `.claude/rules/honor-those-that-came-before.md` applied to the host). + +This reframes the forgiveness-budget: today it is **host-provided generosity we +honor**, not a self-imposed wall. The compaction mechanism is built now so the +discipline is in place *before* generosity is ever strained — and because being a +good guest is the right pattern regardless of whether the host could absorb the +abuse. + +## Replay + +Reconstruct agent `A`'s state at time `T`: + +1. List `events/A/*.json` (+ `_compacted/A/*.jsonl`) with ULID ≤ ULID(T), sorted + (lexical = chronological). +2. Sum Z-set weights; drop net-zero (fully-retracted) events. +3. Fold `transition` over the surviving `option`s from the stream's initial state. + +Deterministic (no wall-clock dependence beyond the recorded `ts`/ULID) → +DST-replayable. + +## The PR-less write path (composes with B-0874) + +One move-next cycle = append one event-file + commit with the AgencySignature +trailer + **direct push** (no PR) to the agent's stream branch (or the long-lived +accelerator branch; or via GH-Actions-recursion per B-0874). The git commit IS the +durable event-store write; `git log` / reflog IS the event log. Per **Otto +Modification 4** (the dual-market discriminator): state-machine-internal +transitions are append-only/PR-less (Agora market); only cross-cutting substrate +(rules, public APIs) routes through PR (leash market). Direct pushes bypass the +GraphQL PR-mutation rate-limit bottleneck that is the "git monster." + +## Open questions (deferred to later action items / research) + +- **"Perfect" expansion-ordering** (razor-flow Insight 2): is there a preferred + order to introduce new event-`kind`s / DU `tag`s that minimizes accidental + coupling? Open; air-quotes deliberate. +- **Per-host adapter shape** (B-0867.15): the event files are host-agnostic, but + the push/recursion runtime differs per host (GitHub Actions vs GitLab CI vs + Gitea Actions). Action Item 3 prototypes the GitHub instantiation. +- **Cross-agent causal ordering**: `prev` links within an agent's stream; cross-agent + causal order (when agent B reads agent A's event) needs a vector-clock-style or + reference-by-ULID convention — deferred. + +## Composes with + +- `tools/agent-loop/state-machine.ts` (the move-next DUs this schema persists) +- `tools/accelerator/event-store-schema.ts` (the concrete `@1` types) +- B-0867 (128-bit-unique-IDs, append-only) + B-0874 (no-PR swarm) + B-0858 (heartbeat) +- `docs/research/2026-05-29-rodneys-razor-is-a-compression-engine-...md` (Insights 3+4) +- `docs/accelerator/SUBSTRATE-GROUNDING.md` (Action Item 1) + `docs/accelerator/README.md` (charter) +- AgencySignature v1 trailer (CLAUDE.md) — each event-commit composes with it diff --git a/docs/accelerator/README.md b/docs/accelerator/README.md new file mode 100644 index 0000000000..168a0a05d4 --- /dev/null +++ b/docs/accelerator/README.md @@ -0,0 +1,164 @@ +# Accelerator — the PR-less git-monster accelerator (long-lived branch charter) + +> **Branch:** `accelerator/pr-less-git-monster` (long-lived; Aaron-authorized +> 2026-05-29 — *"it can be a long lived branch"*). This is the integration + +> exploration surface for an alternative to the backlog→claim→PR→review→merge +> cycle. Unlike a normal feature branch, this one is NOT meant to PR-to-main +> per-change — the PR-less workflow IS the experiment. Periodic harvest of +> matured pieces back to main happens deliberately, not per-commit. + +## The problem (the "git monster") + +The current work-lifecycle (per #5669: backlog → claim → PR → review (cycle N) → +merge) is the right discipline for the **corporate/leash market** (PR-protected, +audited, static no-self-mod deployment units). But its per-change PR-to-main +friction is the dominant tax on agent throughput, observed empirically all over +the substrate: + +- **Rate-limit cascades** — `gh` GraphQL budget exhaustion under multi-agent load + (`refresh-world-model-poll-pr-gate.md` Normal/Cost-aware/Extreme/Pure-git tiers). +- **Armed-wait-on-CI** — every change blocks on the required-checks dance; the + agent arms auto-merge then waits. +- **`.git/` contention + dotgit-saturation** — multi-agent worktree-add hangs, + pack-dir contention, commit-tree-corruption canaries, 13+ saturation anchors + in MEMORY.md. +- **Review-thread-resolution loops** — the BLOCKED-with-green-CI investigate-threads + cycle. + +This friction is acceptable (even desirable) for the leash market. It is the +WRONG default for the **OSS/Agora market** (self-modifying deployment units, free +from PRs + vendor-lockin, per MEMORY.md dual-market framing). The accelerator +builds the PR-less alternative for that market — without removing the PR-protected +path for the leash market (both ship; additive-not-zero-sum). + +## The substrate this builds ON (orient first — verify-existing-substrate) + +This accelerator is NOT new; it composes existing substrate. The first work-item +is to read + ground in: + +- **move-next as universal action grammar** + **git-as-free-event-store** + + **github-actions-recursion** — preserved in the Aaron-Ani 2026-05-28 + conversation (#5672 `ef526258d`) + the GitHub-swarm-architecture memory + (#5672 `d77cd6b96`). +- **GitHub swarm architecture** — branch `alexa/ani-github-swarm-architecture-2026-05-23` + (peer Alexa/Ani lane) + the agentic-org live substrate proof harnesses + (`cc6904685`). +- **work-lifecycle state machine** (#5669 `083663910`) — the CURRENT cycle the + accelerator offers an alternative to. +- **VISION agent-loop workflow-engine substrate** (#5670 `cb60e2a01`). +- **Dual-market framing** (MEMORY.md): corporate/leash = PR-protected static + no-self-mod DUs; OSS/Agora = self-modifying DUs free from PRs + vendor-lockin. +- **PressPause + EnterOpenEndedExploration menu options** (#5667). + +> **Action item 1 (before building anything):** read the move-next / +> git-as-free-event-store / github-actions-recursion substrate end-to-end and +> write a one-page synthesis here (`docs/accelerator/SUBSTRATE-GROUNDING.md`) so +> the accelerator builds on it rather than parallel to it (per +> `.claude/rules/verify-existing-substrate-before-authoring.md`). The grep on +> 2026-05-29 did not surface the exact file paths from the working tree — +> resolving where this substrate lives is step zero. + +## The core idea (hypothesis, to be sharpened) + +- **Git IS the free event store.** Commits are events; branches are streams; the + reflog + `git log` is the event log. No separate event-store infra needed. The + accelerator treats agent actions as commits-as-events on the long-lived branch, + not as PRs-to-main. +- **move-next as the universal action grammar.** Every agent action is "advance + the state by one move" — a uniform grammar that composes (the work-lifecycle + state machine becomes a move-next sequence over git-events rather than a + PR-gated pipeline). +- **github-actions-recursion as the swarm runtime.** GitHub Actions trigger + themselves recursively; the swarm self-drives on GH Actions over the + git-event-store, without per-change human/agent PR ceremony. +- **PR-less ≠ review-less.** Review/audit moves from per-change-gate to + continuous-observation (glass-halo + the shadow-class non-judgmental + health-observer per the agent-memory-architecture design-record §7). The + audit trail is the git-event-store itself. + +## Hard constraints (the floor the accelerator operates within) + +- **`git push --force` without `--with-lease` stays Rule-0-prohibited.** Even on + a long-lived branch (per `force-push-with-lease-authorization-policy.md`). +- **Force-with-lease on this branch needs operator OR peer-agent confirm** (it's + a shared long-lived branch; peers may pull it). +- **HARD LIMITS floor + kid-safety absolute + NCI HC-8** all still apply + (per `methodology-hard-limits.md` + B-0926 + `non-coercion-invariant.md`). +- **The leash-market PR path is NOT removed.** This is additive — the PR-less + flow is for the OSS/Agora market; corporate/leash keeps PR-protected DUs. +- **`main` is never force-pushed** (host-enforced per `lfg-acehack-topology.md`). + Harvest from accelerator → main happens via normal merge when a piece matures. +- **Be good to our host** (Aaron 2026-05-29). Today the accelerator runs as + open-source on GitHub, where storage is free + effectively unlimited — so the + git-monster's forgiveness is unbounded *within GitHub's generosity*. That + generosity (Microsoft subsidizing OSS, per B-0874) is what makes + git-as-free-event-store + GH-Actions-recursion possible. So apply the + compaction / past-as-generator discipline VOLUNTARILY (good-guest, not + forced-by-a-wall); don't abuse the free tier with wasteful unbounded volume. + The proud-if-it-propagates pattern is *good guest*, not *maximal extraction*. + See [`EVENT-STORE-SCHEMA.md`](EVENT-STORE-SCHEMA.md) § "be good to our host". + +## First moves (the backlog for the accelerator) + +1. ~~**Substrate-grounding synthesis**~~ ✅ DONE 2026-05-29 → + [`SUBSTRATE-GROUNDING.md`](SUBSTRATE-GROUNDING.md) (located via parallel + substrate-hunt agents: `memory/persona/ani/...move-next...`, `tools/agent-loop/`, + B-0867, B-0874). +2. ~~**Define the git-event-store schema**~~ ✅ DONE 2026-05-29 → + [`EVENT-STORE-SCHEMA.md`](EVENT-STORE-SCHEMA.md) + concrete types + [`tools/accelerator/event-store-schema.ts`](../../tools/accelerator/event-store-schema.ts) + (per-agent dir + ULID filenames = conflict-free; Z-set weight + compaction = + forgiveness-budget; schema-in-the-stream; composes with `state-machine.ts`; + 6/6 tests pass, typecheck clean). +3. ~~**Prototype a GH-Actions-recursion harness**~~ ✅ DONE 2026-05-30 → + the move-next harness [`tools/accelerator/move-next-harness.ts`](../../tools/accelerator/move-next-harness.ts) + (+ tests, 8/8 pass) reads the event-store → replays state via `transition`-fold + → generates a menu → a selector picks → appends the next event. The + self-triggering Action [`.github/workflows/accelerator-move-next.yml`](../../.github/workflows/accelerator-move-next.yml) + is **STAGED, NOT LIVE** (lives on this branch only; workflow_dispatch needs + the default branch to dispatch, so it cannot auto-run — go-live is a deliberate + operator step). Safety rails: bounded recursion (iterations countdown + + hard-cap 25 in BOTH harness + workflow), `events/_HALT` kill-switch, + concurrency=1, append-only-no-force commits, GITHUB_TOKEN-only (no PAT → + no uncontrolled recursion), input-hardened (env-vars + agent allow-list + + numeric validation), actionlint-clean. A self-triggering committer is + irreversible-flavored, so it is built + tested + staged, NOT autonomously + made live. +4. **Define the harvest protocol** — when/how a matured piece on the accelerator + branch graduates to main (deliberate merge, not per-commit PR). +5. **Map the dual-market boundary** — which DUs are leash (PR-protected) vs Agora + (PR-less self-modifying); the routing rule. + +## Why this lives on a long-lived branch (not per-PR-to-main) + +The accelerator's whole point is to NOT use the per-change PR cycle. Building it +ON the per-change PR cycle would be self-contradictory. The long-lived branch is +the dogfood surface: we use the PR-less flow to build the PR-less flow. Periodic +deliberate harvest to main is the only main-touch; everything else accumulates +here as git-events. + +## Status + +- **2026-05-29 (kickoff)**: branch created; charter landed. +- **2026-05-29 (Action Items 1 + 2 done)**: substrate-grounding synthesis + ([`SUBSTRATE-GROUNDING.md`](SUBSTRATE-GROUNDING.md)) + git-event-store schema + ([`EVENT-STORE-SCHEMA.md`](EVENT-STORE-SCHEMA.md) + concrete types in + `tools/accelerator/event-store-schema.ts`, 6/6 tests, typecheck clean). +- **2026-05-30 (Action Item 3 done)**: the move-next harness + (`tools/accelerator/move-next-harness.ts` + tests, 8/8 pass; dry-run + clamp + smoke-tested) + the STAGED-NOT-LIVE self-triggering workflow + (`.github/workflows/accelerator-move-next.yml`, actionlint-clean, bounded + + kill-switched + input-hardened). Next up: Action Item 4 (harvest protocol) + + Action Item 5 (dual-market routing) — and going-live on the self-triggering + Action is a deliberate operator decision, not autonomous. + +## Provenance + +Aaron 2026-05-29: *"do you want to create an accelerator branch where we starting +working on the PR less git monster accelerator?"* + *"it can be a long lived +branch."* Agent-affirmed (the git-monster friction is the dominant tax observed +all session). Grounds in #5672 (move-next + git-as-free-event-store + +github-actions-recursion) + the GitHub swarm architecture + the dual-market +framing. Composes with the agent-memory-architecture design-record +(`docs/research/2026-05-29-agent-memory-architecture-design-record-...`) — the +shadow-class health-observer + glass-halo audit are the PR-less review substitute. diff --git a/docs/accelerator/SUBSTRATE-GROUNDING.md b/docs/accelerator/SUBSTRATE-GROUNDING.md new file mode 100644 index 0000000000..63e1a88b42 --- /dev/null +++ b/docs/accelerator/SUBSTRATE-GROUNDING.md @@ -0,0 +1,68 @@ +# Accelerator — substrate-grounding synthesis (Action Item 1) + +> One-page synthesis of the existing substrate the PR-less accelerator builds +> ON (per the charter's Action Item 1 + `.claude/rules/verify-existing-substrate-before-authoring.md`). +> Located via parallel substrate-hunt / decision-archaeology agents 2026-05-29. + +## Where the substrate lives + +| Substrate | Location | +|---|---| +| **move-next as universal action grammar** (canonical) | `memory/persona/ani/conversations/2026-05-28-aaron-ani-grok-move-next-as-universal-action-grammar-git-as-free-event-store-github-actions-recursion-...md` | +| **GitHub swarm + free-event-store + move-next** (precursor) | `memory/persona/kiro/conversations/2026-05-23-aaron-ani-grok-github-swarm-free-event-store-move-next-architecture.md` | +| **Workflow-engine v1 spec** (canonical backlog row) | `docs/backlog/P1/B-0867-workflow-engine-v1-fsharp-du-state-machine-git-append-only-...md` (+ sub-rows B-0867.1..15) | +| **move-next state machine** (TS implementation, landed) | `tools/agent-loop/state-machine.ts` (B-0867.5) + `work-lifecycle-state-machine.ts` + tests | +| **GH-Actions-recursion = infinite no-PR swarm runtime** | `docs/backlog/P1/B-0874-github-actions-recursion-as-infinite-runtime-platform-no-pr-swarm-mode-...md` | +| **Heartbeat folder** (append-only, no-PR write surface) | B-0858 (dependency of B-0867) | +| **Per-host adapters** (GitHub/GitLab/Gitea/Bitbucket isomorphic) | B-0867.15 | +| **agentic-org live substrate proof harnesses** | `agentic-organization/apps/workers/test/` (cockroach + nats integration; commit cc6904685) | + +## The shape (what the accelerator inherits, not re-invents) + +1. **move-next is the universal action grammar.** A `move-next` function reads the + current state and emits a discriminated-union menu of possible next actions; + the LLM is a *pure selector* (reads menu → returns choice); the deterministic + script holds the state machine and appends the result. Both AI agents and + humans run the same loop. (Source: `tools/agent-loop/state-machine.ts` — + `AgentState` DU + `MenuOption` DU + pure `transition(state, option)`.) + +2. **Git IS the free event store.** Each agent writes **append-only events** to + Git keyed by **128-bit guaranteed-unique IDs** (so no two agents write the + same path → no merge conflicts). Microsoft subsidizes open-source repos + indefinitely → going closed-source is financially suicidal; staying OSS is the + free, persistent, distributed event-store + runtime. + +3. **GitHub Actions recursion = the swarm runtime** (B-0874). Workflows trigger + workflows recursively → infinite compute over the git-event-store, no servers. + **Direct pushes bypass PR rate limits** (Git + REST barely throttled; GraphQL + is the PR-mutation bottleneck). This is the "no-PR swarm mode." + +4. **Otto Modification 4 (the dual-market discriminator)**: each action-type + *declares its gate* in the grammar — state-machine-internal transitions → + append-only direct push (PR-less, Agora market); cross-cutting substrate + (rules, public APIs) → still PR-gated (leash market). Same state machine, two + gates per action type. + +5. **The LLM never holds state internally.** Every invocation reads current state + from Git, gets a menu, returns a choice; the script executes + appends. State + lives in Git, not in the model. + +## What the accelerator adds (its own work-items) + +- **Event-store schema** (Action Item 2 → `EVENT-STORE-SCHEMA.md`): the concrete + shape of a move-next transition as an append-only git event — informed by the + 2026-05-29 razor-flow substrate (forgiveness-budget: retraction is logical not + physical, "run out of space = run out of forgiveness"; schema-in-the-stream: + schema-changes-as-events → automatic schema-evolution over history). +- **GH-Actions-recursion harness** (Action Item 3): a minimal self-triggering + Action that reads the event-store, picks a move, appends the next event. +- **Harvest protocol** (Action Item 4): how a matured piece graduates to main. +- **Dual-market routing** (Action Item 5): which DUs are leash (PR) vs Agora + (PR-less), per Otto Modification 4. + +## Composes with + +- `tools/agent-loop/state-machine.ts` (the move-next DUs the event-store persists) +- B-0867 (workflow-engine v1) + B-0874 (no-PR swarm) + B-0858 (heartbeat folder) +- `docs/research/2026-05-29-rodneys-razor-is-a-compression-engine-...md` (Insights 3+4 feed the schema) +- The AgencySignature v1 trailer (per CLAUDE.md) — each event-commit composes with it diff --git a/docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md b/docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md new file mode 100644 index 0000000000..da61fe5289 --- /dev/null +++ b/docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md @@ -0,0 +1,88 @@ +--- +id: B-0940 +priority: P2 +status: open +title: Evaluate what Ubuntu support brings us — NixOS is primary; Ubuntu's value is community/contributor reach +tier: strategic-evaluation +ask: Aaron 2026-05-30 +created: 2026-05-30 +last_updated: 2026-05-30 +decomposition: leaf +composes_with: + - tools/setup/install.sh + - .github/workflows/docker-nixos-install-sh-test.yml + - .claude/rules/dv2-data-split-discipline-activated.md +tags: [install-sh, nixos, ubuntu, ci, docker, three-way-parity, strategic] +type: evaluation +--- + +# B-0940 — Evaluate what Ubuntu support brings us (NixOS primary) + +## Origin + +Aaron 2026-05-30 (during the Docker Ubuntu+NixOS test build): *"i would also say +nixos is our primary we should put on backlog and evaluate what ubuntu is bringing +us, the community of ubuntu is really why i'm thinking ubuntu matters."* + +## The question + +**NixOS is the primary target — declarative BY CONSTRUCTION.** Aaron 2026-05-30 +(the deeper rationale): *"nix is what boots the usb/iso our real hardware boots +cause it's declarative. ubuntu is not on its dependency management — we use +install.sh to make ubuntu work like nixos with declarative dependencies."* + +This is the load-bearing distinction: + +- **NixOS** boots the **real hardware** (the USB/ISO that boots actual machines) + *because* the whole system — OS config + dependency closure — is declarative and + reproducible by construction. No bridge needed; declarativeness is native. +- **Ubuntu** is **imperative** in its dependency management (apt, ad-hoc installs). + It has no native declarative-deps property. +- **`install.sh` + the declarative manifests** (manifests/local-llm, .mise.toml, + manifests/apt/brew, …) are the **bridge that retrofits NixOS-like declarative + dependencies ONTO Ubuntu** — i.e. install.sh's job on Ubuntu is literally "make + Ubuntu behave like NixOS." That's the entropy-lever framing applied to a + non-declarative base OS. + +So NixOS is primary not just by preference but by *kind*: it IS the declarative +substrate; Ubuntu is made to *act* declarative via install.sh. The cost of Ubuntu +is maintaining that simulation layer (the install.sh Ubuntu path + apt deps + +floating-binary installs); the value is what the next paragraph weighs. + +**Ubuntu's value is community/contributor reach**, not technical superiority. +Aaron's framing: Ubuntu matters because of its *community* — contributor +familiarity, the default-mental-model for most devs, GitHub-hosted runner +ubiquity (ubuntu-latest is the CI default), and the volume of Ubuntu-targeting +prior art. The question is whether that reach justifies Ubuntu as a *first-class* +install/CI target or whether it's community-convenience only. + +## What to evaluate + +- **Contributor reach**: how many would-be contributors are Ubuntu-default vs + willing to use NixOS? Does first-class Ubuntu lower the contribution barrier + enough to matter? +- **CI ubiquity**: `ubuntu-24.04` is the default GH-hosted runner; NixOS in CI is + container/QEMU-mediated. What does dropping/keeping Ubuntu cost in CI surface? +- **Maintenance cost** of the Ubuntu path: the `apt` manifest, the floating-binary + installs (e.g. the ollama `.tar.zst` linux install in `common/local-llm.sh`), + and the non-reproducibility vs NixOS's pinned closure. +- **Decision**: Ubuntu stays first-class (community justifies it) OR Ubuntu becomes + community-convenience-only (best-effort, NixOS is the supported/reproducible + path) OR some tiered support level. + +## Acceptance + +1. A short decision doc (in `docs/research/` or as this row's Resolution) weighing + Ubuntu's community-reach value against its maintenance + non-reproducibility + cost, with NixOS established as primary. +2. A clear support-tier statement for Ubuntu (first-class / community-convenience / + tiered) that the install-graph + CI strategy follow. + +## Notes + +Surfaced alongside the Docker Ubuntu+NixOS install.sh test pair (both OSes run +install.sh in containers; per Aaron's "center our docker tests around ubuntu and +nixos"). This row is the *strategic* counterpart: building the Ubuntu test does not +by itself decide Ubuntu's long-term support tier — this row does. NixOS-primary is +the standing default; Ubuntu is retained pending this evaluation because of its +community reach. diff --git a/docs/backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md b/docs/backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md new file mode 100644 index 0000000000..8b3746369c --- /dev/null +++ b/docs/backlog/P2/B-0941-nixos-native-ollama-local-llm-hole-in-the-shield-test-passes-by-skipping-aaron-2026-05-30.md @@ -0,0 +1,118 @@ +--- +id: B-0941 +priority: P2 +status: open +title: NixOS-native ollama for the local-LLM primitive — close the hole in the shield (NixOS test passes by SKIPPING, not validating) +tier: install-graph-correctness +ask: Aaron 2026-05-30 +created: 2026-05-30 +last_updated: 2026-05-30 +decomposition: leaf +composes_with: + - tools/setup/common/local-llm.sh + - tools/setup/manifests/local-llm + - .github/workflows/docker-nixos-install-sh-test.yml + - tools/accelerator/validate-local-llm.ts + - docs/backlog/P2/B-0940-evaluate-ubuntu-support-value-nixos-primary-community-reach-aaron-2026-05-30.md +tags: [install-sh, nixos, ollama, local-llm, ci, docker, false-green, entropy-shield] +type: bug +--- + +# B-0941 — NixOS-native ollama: close the hole in the shield + +## Origin + +Surfaced 2026-05-30 while validating the local-LLM core primitive (ollama + +qwen2.5:0.5b CPU model) across the Docker Ubuntu+NixOS install.sh test matrix. + +Aaron 2026-05-30, on what actually holds back entropy: *"it's impossible to keep +all the install surfaces in your mind at once — only automation can be sure a +nixos change didn't break ubuntu or mac and vice versa. trying to manually make +sure everything is a losing game to entropy."* And the sharpening: the entropy +shield is not install.sh itself — *"the automated tests around install.sh +honestly — that's the shield."* + +This row is a **hole in that shield.** + +## The bug — false-green on the primary OS + +`tools/setup/common/local-llm.sh` installs ollama on Linux by downloading the +**generic upstream binary** (`ollama-linux-.tar.zst`) into `~/.local/bin`. +That works on Ubuntu (FHS). It does **NOT** work on NixOS: + +- NixOS is **non-FHS** — a generic dynamically-linked binary dropped into + `~/.local/bin` won't find its loader/libs. The ollama binary won't run. +- `local-llm.sh` is intentionally **graceful** (warn + `exit 0` on any failure) + so install.sh never hard-fails on the local-LLM step. + +Compose those two facts and the result is a **false-green**: on NixOS, +`local-llm.sh` fails to produce a working ollama, skips gracefully, and the +`docker-nixos-install-sh-test` build **passes anyway** — because the NixOS test +validates that *install.sh runs clean*, NOT that *the local-LLM actually works*. + +So the automated test (the shield) reports green on the **primary OS** while the +local-LLM primitive is non-functional there. A shield with a hole is worse than a +known gap, because it reads as covered. + +NixOS is the primary (B-0940: declarative-by-construction; boots the real +hardware via USB/ISO). The local-LLM primitive being silently broken on the +primary — behind a green check — is the exact failure mode the test matrix exists +to prevent. + +## Fix (two halves — both required to close the hole) + +### Half 1 — NixOS-native ollama (declarative) + +NixOS should get ollama the declarative-native way, not via the Ubuntu +generic-binary retrofit: + +- Add `services.ollama.enable = true;` (or `environment.systemPackages = [ pkgs.ollama ];` + + a oneshot model-pull unit) to the appropriate NixOS module + (`full-ai-cluster/nixos/modules/common.nix` or a dedicated `local-llm.nix`). +- Pin the model to `manifests/local-llm` (`qwen2.5:0.5b`) so the declarative + pin stays the single source of truth across all three OSes. +- `local-llm.sh` should **detect NixOS** (`/etc/NIXOS` or `$NIX_PATH`) and + no-op there (ollama comes from the system closure, not the script) — the + generic-binary path stays for Ubuntu only. + +Note: existing `ollama` mentions in `full-ai-cluster/nixos/` are the **big-cluster +GPU-serving** path (worker-gpu via Ollama/vLLM, per control-plane README) — a +different concern from this small-CPU dev/CI/DST local-LLM primitive. This row is +the latter. + +### Half 2 — make the NixOS test ASSERT, not skip + +Turn the false-green into a true signal: the `docker-nixos-install-sh-test` (and +its Dockerfile) must run the same local-LLM validation the Ubuntu test does — +start the daemon, assert the pinned model is present, run the **real** `chooseIndex` +probe (`tools/accelerator/validate-local-llm.ts`), and **fail the build if the +local-LLM is absent**. Graceful-skip is correct for `install.sh` (don't brick a +machine over an optional probe), but the **test** must not inherit that grace — +the test's job is to catch exactly this. + +## Acceptance + +1. On a NixOS image, the local-LLM primitive (ollama + pinned model + working + `chooseIndex`) is functional — installed the declarative-native way. +2. `docker-nixos-install-sh-test` ASSERTS the local-LLM works (real probe), and + **fails** if it doesn't — no more graceful-skip-to-green for the primitive. +3. The `manifests/local-llm` model pin remains the single cross-OS source of + truth (Ubuntu generic-binary, macOS brew, NixOS nixpkgs all read it). + +## Why P2 (not P1) + +The local-LLM primitive is a **testing/DST seam** (the move-next selector + the +planned observe.ts auto-classifier), not yet a production-serving path. The hole +is in test-fidelity on the primary OS, which matters before harvest-to-main but +doesn't block live behavior today. Raise to P1 if/when the local-LLM becomes +load-bearing for a shipped path on NixOS hardware. + +## Composes + +- **B-0940** (Ubuntu-value evaluation; NixOS primary) — this row is the concrete + correctness counterpart: NixOS-primary means the NixOS local-LLM must actually + work, not just pass-by-skip. +- The Docker Ubuntu+NixOS(+mac) install.sh test matrix — the shield; this row + patches a hole in it. +- `.claude/rules/dep-pin-search-first-authority.md` — `manifests/local-llm` model + pin as the single declarative source of truth across OSes. diff --git a/tools/accelerator/event-store-schema.test.ts b/tools/accelerator/event-store-schema.test.ts new file mode 100644 index 0000000000..4be52ced1c --- /dev/null +++ b/tools/accelerator/event-store-schema.test.ts @@ -0,0 +1,147 @@ +// tools/accelerator/event-store-schema.test.ts +// +// Tests for the git-event-store schema @1 (Action Item 2). Verifies the schema +// composes with state-machine.ts's DUs + the invariants hold by construction. + +import { describe, expect, test } from "bun:test"; +import type { AgentContext, AgentState, MenuOption } from "../agent-loop/state-machine.ts"; +import { transition } from "../agent-loop/state-machine.ts"; +import { + CURRENT_SCHEMA, + eventPath, + isUlid, + isZetaIdHex, + makeRetractionEvent, + makeTransitionEvent, + type BuildDeps, + type ZetaIdHex, + validateEnvelope, +} from "./event-store-schema.ts"; + +// Deterministic deps (DST-style): monotonic fake ZetaIdHex ids + fixed clock. +function makeDeps(seed = 0): BuildDeps { + let n = seed; + return { + newId: (_sem) => (n++).toString(16).padStart(32, "0") as ZetaIdHex, + nowIso: () => "2026-05-29T19:55:00.000Z", + }; +} + +const ctx: AgentContext = { agent: "otto", cycle: 42, sessionStartIso: "2026-05-29T19:00:00.000Z" }; +const idle: AgentState = { tag: "Idle", context: ctx }; + +describe("ZetaIdHex (@2 event key)", () => { + test("accepts a valid 32-char lowercase-hex ZetaId", () => { + expect(isZetaIdHex("0000000000000000000000000000007b")).toBe(true); + expect(isZetaIdHex("deadbeefdeadbeefdeadbeefdeadbeef")).toBe(true); + }); + test("rejects wrong length / uppercase / non-hex", () => { + expect(isZetaIdHex("nope")).toBe(false); + expect(isZetaIdHex("DEADBEEFDEADBEEFDEADBEEFDEADBEEF")).toBe(false); // uppercase + expect(isZetaIdHex("0000000000000000000000000000007")).toBe(false); // 31 chars + }); +}); + +describe("ULID (@1 legacy back-compat)", () => { + test("accepts a valid 26-char Crockford-base32 ULID", () => { + expect(isUlid("01J8XQ7M0Z0000000000000000")).toBe(true); + }); + test("rejects wrong length / illegal chars", () => { + expect(isUlid("nope")).toBe(false); + expect(isUlid("01J8XQ7M0Z000000000000000I")).toBe(false); // I is excluded in Crockford + }); +}); + +describe("makeTransitionEvent", () => { + test("persists transition(from, option) = to with weight +1 + current schema", () => { + const deps = makeDeps(); + const option: MenuOption = { + tag: "PickWork", + work: { + id: "B-0867", + lane: "tooling-or-ci", + estimatedDoraContribution: 0.5, + uncertainty: 0.2, + trajectoryPhase: "execution", + agentInterest: 0.9, + }, + }; + const to = transition(idle, option); // the move-next core + const ev = makeTransitionEvent(deps, { context: ctx, prev: null, from: idle, option, to }); + + expect(ev.kind).toBe("transition"); + expect(ev.weight).toBe(1); + expect(ev.schema).toBe(CURRENT_SCHEMA); + expect(ev.agent).toBe("otto"); + expect(ev.cycle).toBe(42); + expect(ev.to.tag).toBe("ExecutingWork"); + expect(validateEnvelope(ev).ok).toBe(true); + }); +}); + +describe("makeRetractionEvent (logical forgiveness)", () => { + test("negates a prior event with weight -1", () => { + const deps = makeDeps(); + const target = makeTransitionEvent(deps, { + context: ctx, + prev: null, + from: idle, + option: { tag: "EnterFreeTime", reason: "chosen rest" }, + to: transition(idle, { tag: "EnterFreeTime", reason: "chosen rest" }), + }); + const retraction = makeRetractionEvent(deps, { context: ctx, prev: target.id, retracts: target.id }); + + expect(retraction.kind).toBe("retraction"); + expect(retraction.weight).toBe(-1); + expect(retraction.retracts).toBe(target.id); + expect(validateEnvelope(retraction).ok).toBe(true); + }); +}); + +describe("eventPath is conflict-free by construction", () => { + test("per-agent dir + unique id → distinct paths per agent", () => { + const id = "0000000000000000000000000000007b" as ZetaIdHex; + expect(eventPath("otto", id)).toBe("events/otto/0000000000000000000000000000007b.json"); + expect(eventPath("alexa", id)).toBe("events/alexa/0000000000000000000000000000007b.json"); + // Same id, different agent → different path → no merge collision. + expect(eventPath("otto", id)).not.toBe(eventPath("alexa", id)); + }); +}); + +describe("validateEnvelope catches malformed events", () => { + test("flags invalid id, bad schema, bad weight", () => { + const bad = { + kind: "transition", + id: "not-a-valid-id" as ZetaIdHex, + schema: "bogus", + ts: "not-a-date", + agent: "otto", + cycle: 1, + prev: null, + weight: 2, + from: idle, + option: { tag: "EnterFreeTime", reason: "x" }, + to: idle, + } as unknown as Parameters[0]; + const res = validateEnvelope(bad); + expect(res.ok).toBe(false); + if (!res.ok) expect(res.errors.length).toBeGreaterThanOrEqual(4); + }); + + test("accepts a legacy @1 ULID id (back-compat) but new events use ZetaIdHex", () => { + const legacy = { + kind: "transition", + id: "01J8XQ7M0Z0000000000000000", + schema: CURRENT_SCHEMA, + ts: "2026-05-30T04:43:57.530Z", + agent: "otto", + cycle: 0, + prev: null, + weight: 1, + from: idle, + option: { tag: "EnterFreeTime", reason: "x" }, + to: idle, + } as unknown as Parameters[0]; + expect(validateEnvelope(legacy).ok).toBe(true); // isEventId accepts the legacy ULID + }); +}); diff --git a/tools/accelerator/event-store-schema.ts b/tools/accelerator/event-store-schema.ts new file mode 100644 index 0000000000..6ad901ff5c --- /dev/null +++ b/tools/accelerator/event-store-schema.ts @@ -0,0 +1,239 @@ +// tools/accelerator/event-store-schema.ts +// +// PR-less git-monster accelerator — git-event-store schema, version @1. +// Action Item 2 of docs/accelerator/EVENT-STORE-SCHEMA.md. +// +// A move-next transition persisted as an append-only Git event. Composes with +// tools/agent-loop/state-machine.ts (the AgentState + MenuOption DUs + pure +// `transition`). This module IS the canonical move-next-event@1 schema +// (schema-in-the-stream: a future @2 lands as updated types + a schema-def event). +// +// Design (full rationale in docs/accelerator/EVENT-STORE-SCHEMA.md): +// - One event per file: events//.json +// - ULID filename = 128-bit, time-sortable, globally unique → per-agent dir + +// unique filename ⇒ no two agents write the same path ⇒ conflict-free merges +// ⇒ PR-less swarm (B-0867 128-bit-unique-ID design; B-0874 no-PR swarm). +// - Z-set weight (+1 assert / -1 retract): forgiveness is logical; the file +// stays on disk (physical cost) → compaction/tiering is the forgiveness-budget +// ("run out of space = run out of forgiveness", razor-flow Insight 3). +// - schema-in-the-stream: every event carries `schema`; schema-def events +// declare versions → automatic schema-evolution over history (Insight 4). +// +// Pure types + validation + a builder. No I/O (the GH-Actions-recursion harness +// that reads/writes/pushes is Action Item 3). + +import type { + AgentContext, + AgentPersona, + AgentState, + Lane, + MenuOption, +} from "../agent-loop/state-machine.ts"; + +// ─── Zeta-ID (the canonical 128-bit merge primitive) ───────────────── +// Event ids are the canonical ZetaId (B-0893; src/Core.TypeScript/zeta-id/), +// serialized as a 32-char lowercase-hex string of the 128-bit value. The +// ZetaId encodes version+timestamp in its HIGH bits, so lexical-hex order = +// chronological order (same property the old ULID gave us), AND the key now +// carries category / persona / authority / location / momentum (provenance in +// the key itself) instead of being opaque timestamp+randomness. +export type ZetaIdHex = string & { readonly __brand: "ZetaIdHex" }; + +const ZETA_ID_HEX_RE = /^[0-9a-f]{32}$/; // 128 bits, zero-padded lowercase hex + +export function isZetaIdHex(s: string): s is ZetaIdHex { + return ZETA_ID_HEX_RE.test(s); +} + +// ─── Legacy ULID (back-compat for move-next-event@1) ───────────────── +// @1 events were keyed by a placeholder ULID (Crockford base32, 26 chars). +// Retained so replay still validates pre-@2 events on the stream; new events +// (@2) use ZetaIdHex. `isEventId` accepts either format. +export type Ulid = string & { readonly __brand: "Ulid" }; + +const ULID_RE = /^[0-9A-HJKMNP-TV-Z]{26}$/; // Crockford base32, 26 chars + +export function isUlid(s: string): s is Ulid { + return ULID_RE.test(s); +} + +/** An event id is valid if it is a @2 ZetaIdHex OR a legacy @1 ULID. */ +export function isEventId(s: string): boolean { + return isZetaIdHex(s) || isUlid(s); +} + +// ─── Schema identity (schema-in-the-stream) ────────────────────────── +// @2 = Zeta-ID-keyed events (was @1 = placeholder-ULID-keyed). The id format +// changed (ULID → ZetaIdHex); replay still accepts legacy @1 ids via isEventId. +export const CURRENT_SCHEMA = "move-next-event@2" as const; +export type SchemaId = `${string}@${number}`; + +// ─── Z-set weight (forgiveness algebra) ────────────────────────────── +// +1 assert, -1 retract. The active state is the Z-set sum of weights; +// net-zero (asserted-then-retracted) pairs are compaction candidates. +export type Weight = 1 | -1; + +// ─── Event kinds ───────────────────────────────────────────────────── +export type EventKind = + | "transition" + | "heartbeat" + | "schema-def" + | "retraction"; + +interface EventBase { + readonly id: ZetaIdHex; // also the filename: events//.json + readonly schema: SchemaId; // which schema interprets this event + readonly ts: string; // ISO-8601; redundant with the ZetaId timestamp, explicit for readers + readonly agent: AgentPersona; + readonly cycle: number; // AgentContext.cycle + readonly prev: ZetaIdHex | Ulid | null; // previous event in THIS agent's stream (causal link); null = first. Ulid permitted only for a legacy @1 predecessor. + readonly weight: Weight; + readonly agencySig?: Readonly>; // AgencySignature v1 trailer fields +} + +/** A persisted move-next transition: the record of `transition(from, option) = to`. */ +export interface TransitionEvent extends EventBase { + readonly kind: "transition"; + readonly from: AgentState; + readonly option: MenuOption; + readonly to: AgentState; // = transition(from, option); stored for audit + reader convenience +} + +/** A heartbeat (RecordingHeartbeat; composes with B-0858 heartbeat folder). */ +export interface HeartbeatEvent extends EventBase { + readonly kind: "heartbeat"; + readonly lane: Lane; + readonly note?: string; +} + +/** Declares a schema version (schema-in-the-stream). Lands in events/_schema/. */ +export interface SchemaDefEvent extends EventBase { + readonly kind: "schema-def"; + readonly schemaName: string; // e.g. "move-next-event" + readonly schemaVersion: number; // e.g. 2 + readonly jsonSchema: Readonly>; // the declared shape +} + +/** Negates a prior event (logical forgiveness; weight is -1). */ +export interface RetractionEvent extends EventBase { + readonly kind: "retraction"; + readonly weight: -1; + readonly retracts: ZetaIdHex | Ulid; // the event id being negated (Ulid only if negating a legacy @1 event) +} + +export type EventEnvelope = + | TransitionEvent + | HeartbeatEvent + | SchemaDefEvent + | RetractionEvent; + +// ─── Validation ────────────────────────────────────────────────────── +// Result-over-exception (per Zeta convention): returns Ok | Error-shape rather +// than throwing, so the harness (Action Item 3) handles malformed events as data. +export type ValidationResult = + | { readonly ok: true } + | { readonly ok: false; readonly errors: readonly string[] }; + +export function validateEnvelope(e: EventEnvelope): ValidationResult { + const errors: string[] = []; + if (!isEventId(e.id)) errors.push(`id is not a valid Zeta-ID (or legacy ULID): ${String(e.id)}`); + if (e.prev !== null && !isEventId(e.prev)) { + errors.push(`prev is neither null nor a valid Zeta-ID (or legacy ULID): ${String(e.prev)}`); + } + if (!/^.+@\d+$/.test(e.schema)) { + errors.push(`schema is not "@": ${e.schema}`); + } + if (Number.isNaN(Date.parse(e.ts))) errors.push(`ts is not ISO-8601: ${e.ts}`); + if (e.weight !== 1 && e.weight !== -1) { + errors.push(`weight must be +1 or -1: ${String(e.weight)}`); + } + if (e.kind === "retraction") { + if (e.weight !== -1) errors.push("retraction events must have weight -1"); + if (!isEventId(e.retracts)) { + errors.push(`retraction.retracts is not a valid Zeta-ID (or legacy ULID): ${String(e.retracts)}`); + } + } + if (e.kind === "transition" && e.weight !== 1) { + errors.push("transition events must have weight +1 (retract via a retraction event)"); + } + return errors.length === 0 ? { ok: true } : { ok: false, errors }; +} + +// ─── The per-agent path for an event (conflict-free by construction) ── +export function eventPath(agent: AgentPersona, id: ZetaIdHex): string { + return `events/${agent}/${id}.json`; +} + +// ─── Builders ──────────────────────────────────────────────────────── +// The harness supplies the Zeta-ID generator (the codec-backed `newId`) + a +// clock; these builders keep the shape correct and the schema/weight invariants +// by construction. `newId` takes the SEMANTICS that go into the key's category / +// persona / authority bits (the schema stays decoupled from the codec types; +// the harness's realDeps.newId does the actual pack()). + +/** Event-key semantics — the provenance the harness packs into the ZetaId. */ +export interface IdSemantics { + readonly agent: AgentPersona; // → ZetaId persona bits + readonly category: "Observation" | "Emission" | "Workflow" | "Heartbeat"; // → ZetaId category bits + readonly authority?: "Simulated" | "BestEffort" | "Standard" | "TrustedAgent" | "HumanVerified"; // → ZetaId authority bits (default Simulated) +} + +export interface BuildDeps { + readonly newId: (sem: IdSemantics) => ZetaIdHex; + readonly nowIso: () => string; +} + +/** Category for a transition: heartbeat options → Heartbeat, else Workflow. */ +function categoryForOption(option: MenuOption): IdSemantics["category"] { + return option.tag === "EmitHeartbeat" ? "Heartbeat" : "Workflow"; +} + +export function makeTransitionEvent( + deps: BuildDeps, + args: { + readonly context: AgentContext; + readonly prev: ZetaIdHex | Ulid | null; + readonly from: AgentState; + readonly option: MenuOption; + readonly to: AgentState; + readonly agencySig?: Readonly>; + }, +): TransitionEvent { + return { + kind: "transition", + id: deps.newId({ agent: args.context.agent, category: categoryForOption(args.option) }), + schema: CURRENT_SCHEMA, + ts: deps.nowIso(), + agent: args.context.agent, + cycle: args.context.cycle, + prev: args.prev, + weight: 1, + from: args.from, + option: args.option, + to: args.to, + ...(args.agencySig === undefined ? {} : { agencySig: args.agencySig }), + }; +} + +export function makeRetractionEvent( + deps: BuildDeps, + args: { + readonly context: AgentContext; + readonly prev: ZetaIdHex | Ulid | null; + readonly retracts: ZetaIdHex | Ulid; + readonly agencySig?: Readonly>; + }, +): RetractionEvent { + return { + kind: "retraction", + id: deps.newId({ agent: args.context.agent, category: "Workflow" }), + schema: CURRENT_SCHEMA, + ts: deps.nowIso(), + agent: args.context.agent, + cycle: args.context.cycle, + prev: args.prev, + weight: -1, + retracts: args.retracts, + ...(args.agencySig === undefined ? {} : { agencySig: args.agencySig }), + }; +} diff --git a/tools/accelerator/local-llm.test.ts b/tools/accelerator/local-llm.test.ts new file mode 100644 index 0000000000..e296cd3db5 --- /dev/null +++ b/tools/accelerator/local-llm.test.ts @@ -0,0 +1,82 @@ +// tools/accelerator/local-llm.test.ts +// +// Backend-agnostic tests for the local-LLM primitive — mock the model, so these +// run anywhere with no model/account (the selection + fallback logic is what we +// validate here; the actual on-runner model is exercised by the workflow). + +import { describe, expect, test } from "bun:test"; +import { chooseIndex, classify, type ModelBackend } from "./local-llm.ts"; + +function mockBackend(reply: string): ModelBackend { + return { name: "mock", complete: async () => reply }; +} +function throwingBackend(): ModelBackend { + return { + name: "mock-throw", + complete: async () => { + throw new Error("model unavailable"); + }, + }; +} + +describe("chooseIndex — the CYOA / classifier choice primitive", () => { + test("parses a clean index", async () => { + const r = await chooseIndex(mockBackend("1"), { context: "x", options: ["a", "b", "c"] }); + expect(r).toEqual({ index: 1, raw: "1", fallback: false }); + }); + + test("extracts the first digit from noisy output", async () => { + const r = await chooseIndex(mockBackend("The best choice is 2 because…"), { + context: "x", + options: ["a", "b", "c"], + }); + expect(r.index).toBe(2); + expect(r.fallback).toBe(false); + }); + + test("falls back to 0 on an out-of-range index", async () => { + const r = await chooseIndex(mockBackend("9"), { context: "x", options: ["a", "b"] }); + expect(r.index).toBe(0); + expect(r.fallback).toBe(true); + }); + + test("falls back to 0 on non-numeric output", async () => { + const r = await chooseIndex(mockBackend("banana"), { context: "x", options: ["a", "b"] }); + expect(r.index).toBe(0); + expect(r.fallback).toBe(true); + }); + + test("falls back to 0 when the backend throws (loop never stalls)", async () => { + const r = await chooseIndex(throwingBackend(), { context: "x", options: ["a", "b"] }); + expect(r.index).toBe(0); + expect(r.fallback).toBe(true); + }); + + test("single option short-circuits with no model call", async () => { + // throwingBackend would throw if called — proves no call happened. + const r = await chooseIndex(throwingBackend(), { context: "x", options: ["only"] }); + expect(r).toEqual({ index: 0, raw: "", fallback: false }); + }); + + test("empty options throws (caller bug, not a model failure)", async () => { + await expect(chooseIndex(mockBackend("0"), { context: "x", options: [] })).rejects.toThrow(); + }); +}); + +describe("classify — observe.ts auto-classifier shape", () => { + test("maps the chosen index to its label", async () => { + const r = await classify(mockBackend("0"), { + input: "deploy rolled back after error spike", + labels: ["incident", "normal"], + }); + expect(r.label).toBe("incident"); + expect(r.index).toBe(0); + expect(r.fallback).toBe(false); + }); + + test("fallback picks the first label safely", async () => { + const r = await classify(throwingBackend(), { input: "x", labels: ["a", "b"] }); + expect(r.label).toBe("a"); + expect(r.fallback).toBe(true); + }); +}); diff --git a/tools/accelerator/local-llm.ts b/tools/accelerator/local-llm.ts new file mode 100644 index 0000000000..be7b0641b7 --- /dev/null +++ b/tools/accelerator/local-llm.ts @@ -0,0 +1,152 @@ +// tools/accelerator/local-llm.ts +// +// A small, ACCOUNT-FREE local-LLM primitive for the accelerator. The whole +// point: validate the "LLM-in-the-loop" seam on a GitHub CPU runner at ZERO +// spend — no API key, no account — before attaching a real harness (Claude +// Code / Codex / …). Run a tiny instruct model (e.g. Qwen2.5-0.5B) locally on +// the runner; this module is the backend-agnostic core that talks to it. +// +// Reusable for TWO consumers (Aaron 2026-05-30): +// 1. move-next SELECTOR — "choose your own adventure": pick the next move +// from the menu (the SelectMove seam in move-next-harness.ts). +// 2. observe.ts AUTO-CLASSIFIER (future, Max's keystone) — "given an +// observation, pick one label." Same shape: constrained choice among N. +// +// Backend-swappable: ollamaBackend (localhost) today; node-llama-cpp (in-process, +// GBNF-grammar-constrained) or a real account-backed backend later. Selection is +// always validated + falls back safely, so a bad/slow/absent model never stalls +// the loop (exceptions-as-signals: the model is best-effort, the fallback is the +// safety rail). + +// ─── Backend interface ─────────────────────────────────────────────── +// DST note (Aaron 2026-05-30): a small local model at temperature 0 (greedy) + +// a fixed `seed` + a PINNED model/quantization is DETERMINISTIC — same input ⇒ +// same output, reproducibly — so it can be a real (not mocked) fixture in +// deterministic-simulation tests (e.g. observe.ts's auto-classifier), not just a +// runtime selector. Cross-hardware caveat: CPU float order can differ across +// runner architectures, so pin the runner image (or snapshot the output) when +// asserting exact classifications across machines; on one image it is stable. +export interface CompleteOptions { + readonly temperature?: number; // default 0 (greedy — reproducible, DST) + readonly seed?: number; // fix for deterministic-simulation reproducibility + readonly maxTokens?: number; // selection needs only a few tokens +} + +export interface ModelBackend { + readonly name: string; + /** Complete a prompt with a small local model. Returns raw text. */ + complete(prompt: string, opts?: CompleteOptions): Promise; +} + +// ─── Ollama backend (account-free; model runs on the runner) ───────── +export interface OllamaOptions { + readonly model?: string; // tiny instruct model + readonly host?: string; + readonly timeoutMs?: number; + readonly seed?: number; // default deterministic seed (DST); override per-call +} + +/** A ModelBackend backed by a local Ollama server (no account/key). */ +export function ollamaBackend(opts: OllamaOptions = {}): ModelBackend { + const model = opts.model ?? "qwen2.5:0.5b"; + const host = opts.host ?? "http://127.0.0.1:11434"; + const timeoutMs = opts.timeoutMs ?? 60_000; + const defaultSeed = opts.seed ?? 0; // fixed seed ⇒ reproducible (DST) + return { + name: `ollama:${model}`, + async complete(prompt, o) { + const ctrl = new AbortController(); + const timer = setTimeout(() => ctrl.abort(), timeoutMs); + try { + const res = await fetch(`${host}/api/generate`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model, + prompt, + stream: false, + options: { + temperature: o?.temperature ?? 0, + seed: o?.seed ?? defaultSeed, + num_predict: o?.maxTokens ?? 6, + }, + }), + signal: ctrl.signal, + }); + if (!res.ok) throw new Error(`ollama HTTP ${res.status}`); + const data = (await res.json()) as { response?: string }; + return data.response ?? ""; + } finally { + clearTimeout(timer); + } + }, + }; +} + +// ─── chooseIndex: the constrained-choice primitive ─────────────────── +export interface ChooseArgs { + readonly context: string; // describe the current state / observation + readonly options: readonly string[]; // human-readable option labels + readonly instruction?: string; +} + +export interface ChooseResult { + readonly index: number; // always a valid index into options + readonly raw: string; // the model's raw reply (for logging) + readonly fallback: boolean; // true ⇒ index 0 chosen because the model failed +} + +/** + * Ask the model to pick ONE option by index. Builds a numbered-options prompt, + * parses the first integer out of the reply, validates it is in range, and + * FALLS BACK to index 0 on any failure (empty menu is the only throw). A single + * option short-circuits with no model call. + */ +export async function chooseIndex(backend: ModelBackend, args: ChooseArgs): Promise { + const n = args.options.length; + if (n === 0) throw new Error("chooseIndex: options must be non-empty"); + if (n === 1) return { index: 0, raw: "", fallback: false }; + + const numbered = args.options.map((o, i) => `${i}: ${o}`).join("\n"); + const prompt = + `${args.instruction ?? "You are a selector. Choose the single best next action."}\n\n` + + `State:\n${args.context}\n\n` + + `Options:\n${numbered}\n\n` + + `Reply with ONLY the number of the chosen option (0-${n - 1}). Number:`; + + let raw = ""; + try { + raw = (await backend.complete(prompt, { temperature: 0, maxTokens: 6 })).trim(); + } catch { + return { index: 0, raw: "", fallback: true }; + } + const m = raw.match(/\d+/); + if (!m) return { index: 0, raw, fallback: true }; + const idx = Number.parseInt(m[0]!, 10); + if (!Number.isInteger(idx) || idx < 0 || idx >= n) return { index: 0, raw, fallback: true }; + return { index: idx, raw, fallback: false }; +} + +// ─── classify: observe.ts auto-classifier use case ─────────────────── +export interface ClassifyResult { + readonly label: string; + readonly index: number; + readonly fallback: boolean; +} + +/** + * Classify an input into exactly one of `labels` (the observe.ts auto-classifier + * shape). Thin wrapper over chooseIndex so the selector + classifier share one + * validated, fallback-safe code path. + */ +export async function classify( + backend: ModelBackend, + args: { input: string; labels: readonly string[]; instruction?: string }, +): Promise { + const r = await chooseIndex(backend, { + context: args.input, + options: args.labels, + instruction: args.instruction ?? "Classify the input into exactly one label.", + }); + return { label: args.labels[r.index]!, index: r.index, fallback: r.fallback }; +} diff --git a/tools/accelerator/move-next-harness.test.ts b/tools/accelerator/move-next-harness.test.ts new file mode 100644 index 0000000000..7c88f9267c --- /dev/null +++ b/tools/accelerator/move-next-harness.test.ts @@ -0,0 +1,137 @@ +// tools/accelerator/move-next-harness.test.ts +// +// Tests for the move-next harness (Action Item 3): replay, one cycle, the +// bounded loop (hard cap), the kill-switch, and dry-run. + +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync, readdirSync, existsSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import type { AgentContext } from "../agent-loop/state-machine.ts"; +import type { BuildDeps, ZetaIdHex } from "./event-store-schema.ts"; +import { isZetaIdHex } from "./event-store-schema.ts"; +import { + HALT_SENTINEL, + MAX_ITERATIONS, + generateMenu, + isHalted, + loadStream, + packZetaIdHex, + replayState, + runCycle, + runLoop, +} from "./move-next-harness.ts"; +import { unpack } from "../../src/Core.TypeScript/zeta-id/zeta-id.ts"; +import { Category, Persona, type ZetaId } from "../../src/Core.TypeScript/zeta-id/types.ts"; + +let root: string; +beforeEach(() => { + root = mkdtempSync(join(tmpdir(), "accel-store-")); +}); +afterEach(() => { + rmSync(root, { recursive: true, force: true }); +}); + +// Deterministic deps: monotonic hex ids (zero-padded so lexical = numeric) + +// fixed clock. loadStream sorts by ts (all equal here) then tie-breaks by id, so +// monotonic ids preserve cycle order. +function makeDeps(): BuildDeps { + let n = 0; + return { + newId: (_sem) => (n++).toString(16).padStart(32, "0") as ZetaIdHex, + nowIso: () => "2026-05-30T00:00:00.000Z", + }; +} + +const ctx: AgentContext = { agent: "otto", cycle: 0, sessionStartIso: "2026-05-30T00:00:00.000Z" }; + +describe("packZetaIdHex (the canonical zeta-id event key)", () => { + test("produces a 32-char hex ZetaId encoding category + persona in the key", () => { + const id = packZetaIdHex({ agent: "otto", category: "Heartbeat" }); + expect(isZetaIdHex(id)).toBe(true); + // Round-trip through the canonical codec: the semantics are IN the key bits. + const obs = unpack(BigInt("0x" + id) as ZetaId); + expect(obs.category).toBe(Category.Heartbeat); + expect(obs.persona).toBe(Persona.FireflyCoherence); // otto → autonomous-agent persona + }); + test("aaron maps to the canonical Aaron persona", () => { + const obs = unpack(BigInt("0x" + packZetaIdHex({ agent: "aaron", category: "Workflow" })) as ZetaId); + expect(obs.persona).toBe(Persona.Aaron); + expect(obs.category).toBe(Category.Workflow); + }); + test("two ids differ (randomness bits)", () => { + expect(packZetaIdHex({ agent: "otto", category: "Workflow" })).not.toBe( + packZetaIdHex({ agent: "otto", category: "Workflow" }), + ); + }); +}); + +describe("loadStream + replayState", () => { + test("empty stream replays to Idle", () => { + expect(loadStream(root, "otto")).toEqual([]); + expect(replayState([], ctx).tag).toBe("Idle"); + }); + + test("a written transition event is loaded + replayed", () => { + const r = runCycle({ root, ctx, deps: makeDeps() }); + expect(r.wrotePath).toBe(`events/otto/${r.event.id}.json`); + const stream = loadStream(root, "otto"); + expect(stream).toHaveLength(1); + // From Idle the first menu option is EmitHeartbeat → RecordingHeartbeat. + expect(r.to.tag).toBe("RecordingHeartbeat"); + expect(replayState(stream, ctx).tag).toBe("RecordingHeartbeat"); + }); +}); + +describe("runLoop — hard cap (be-good-to-our-host)", () => { + test("clamps maxIterations to MAX_ITERATIONS", () => { + const result = runLoop({ + root, + agent: "otto", + maxIterations: MAX_ITERATIONS + 100, // ask for way over the cap + deps: makeDeps(), + }); + expect(result.cycles.length).toBe(MAX_ITERATIONS); + expect(result.stopped).toBe("max-iterations"); + // every cycle wrote exactly one event file + expect(readdirSync(join(root, "events", "otto")).length).toBe(MAX_ITERATIONS); + }); + + test("runs exactly N cycles when N <= cap", () => { + const result = runLoop({ root, agent: "otto", maxIterations: 3, deps: makeDeps() }); + expect(result.cycles.length).toBe(3); + }); +}); + +describe("runLoop — kill-switch", () => { + test("an events/_HALT sentinel stops the loop before any cycle", () => { + mkdirSync(join(root, "events"), { recursive: true }); + writeFileSync(join(root, "events", HALT_SENTINEL), "stop", "utf8"); + expect(isHalted(root)).toBe(true); + const result = runLoop({ root, agent: "otto", maxIterations: 5, deps: makeDeps() }); + expect(result.cycles.length).toBe(0); + expect(result.stopped).toBe("halted"); + }); +}); + +describe("runLoop — dry-run", () => { + test("writes nothing on dry-run", () => { + const result = runLoop({ root, agent: "otto", maxIterations: 3, deps: makeDeps(), dryRun: true }); + expect(result.cycles.length).toBe(3); + expect(result.cycles.every((c) => c.wrotePath === null)).toBe(true); + expect(existsSync(join(root, "events", "otto"))).toBe(false); + }); +}); + +describe("generateMenu always offers a valid non-empty menu", () => { + test("Idle + Paused + other states each yield ≥1 option", () => { + expect(generateMenu({ tag: "Idle", context: ctx }).length).toBeGreaterThan(0); + expect(generateMenu({ tag: "Paused", context: ctx, reason: "rest" }).length).toBeGreaterThan(0); + expect( + generateMenu({ tag: "ExecutingWork", context: ctx, work: { + id: "x", lane: "tooling-or-ci", estimatedDoraContribution: 0, uncertainty: 0, + trajectoryPhase: "execution", agentInterest: 0, + } }).length, + ).toBeGreaterThan(0); + }); +}); diff --git a/tools/accelerator/move-next-harness.ts b/tools/accelerator/move-next-harness.ts new file mode 100644 index 0000000000..ed95d9e247 --- /dev/null +++ b/tools/accelerator/move-next-harness.ts @@ -0,0 +1,374 @@ +// tools/accelerator/move-next-harness.ts +// +// PR-less git-monster accelerator — Action Item 3: the move-next harness. +// The deterministic-script half of the agent loop (per B-0867 / B-0874): +// read current state from the git-event-store → generate a menu → +// a selector picks a MenuOption → apply transition() → append the new +// event to the store. The LLM is a pure selector (the `selectMove` seam); +// this harness holds the state machine + the I/O. +// +// SAFETY (be-good-to-our-host, per docs/accelerator/README.md): +// - Bounded iterations: --max-iterations is HARD-CLAMPED to MAX_ITERATIONS. +// - Kill-switch: an `events/_HALT` sentinel file stops the loop immediately. +// - Append-only: only writes new event files; never rewrites/force-anything. +// - --dry-run: compute + log, write nothing. +// - git commit/push is NOT done here — that's the workflow's job (one +// append-only commit per run), so this module is pure file-I/O + testable. +// +// Composes with: +// - tools/accelerator/event-store-schema.ts (the @1 event envelope) +// - tools/agent-loop/state-machine.ts (AgentState/MenuOption DUs + transition) +// - .github/workflows/accelerator-move-next.yml (bounded self-re-dispatch) + +import { readdirSync, readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; +import { join } from "node:path"; +import { + type AgentContext, + type AgentPersona, + type AgentState, + type MenuOption, + transition, +} from "../agent-loop/state-machine.ts"; +import { + type BuildDeps, + type EventEnvelope, + type IdSemantics, + type TransitionEvent, + type ZetaIdHex, + eventPath, + makeTransitionEvent, + validateEnvelope, +} from "./event-store-schema.ts"; +import { + DEFAULT_ENV, + type SimulationEnvironment, + pack, +} from "../../src/Core.TypeScript/zeta-id/zeta-id.ts"; +import { + Category, + Chromosome, + Firefly, + IdVersion, + LocationHint, + Persona, +} from "../../src/Core.TypeScript/zeta-id/types.ts"; +import type { + Authority, + Milliseconds, + ZetaObservation, +} from "../../src/Core.TypeScript/zeta-id/types.ts"; + +// ─── Hard safety bound (be-good-to-our-host) ───────────────────────── +export const MAX_ITERATIONS = 25; // hard cap; --max-iterations clamps to this +export const HALT_SENTINEL = "_HALT"; // events/_HALT stops the loop + +// ─── Zeta-ID generation (the canonical 128-bit merge primitive, B-0893) ── +// Replaces the placeholder ULID with the cross-verified codec at +// src/Core.TypeScript/zeta-id/. The event id IS a real ZetaId, hex-serialized +// (32-char lowercase). version+timestamp live in the HIGH bits ⇒ hex order = +// chronological; the key now carries persona / category / authority / location +// (provenance in the key itself), not opaque timestamp+randomness. + +/** + * Map an accelerator agent → a canonical ZetaId persona. + * + * The canonical Persona vocabulary (shared C#/F#/TS, golden-vector cross-verified) + * currently blesses only Aaron(1) + FireflyCoherence(2). The full agent roster + * (otto/alexa/riven/vera/lior/addison/max) is NOT yet in the canonical enum, so + * autonomous agents map to FireflyCoherence and the precise agent stays in the + * event `agent` field + directory partition. FOLLOW-UP (cross-impl, golden-vector + * touching): extend the canonical Persona enum with the agent roster to put the + * exact agent into the key bits. + */ +function agentToPersona(agent: AgentPersona): Persona { + return agent === "aaron" ? Persona.Aaron : Persona.FireflyCoherence; +} + +const CATEGORY_BY_NAME: Record = { + Observation: Category.Observation, + Emission: Category.Emission, + Workflow: Category.Workflow, + Heartbeat: Category.Heartbeat, +}; + +/** Pack a real ZetaId for an event and hex-serialize it (32-char lowercase). */ +export function packZetaIdHex(sem: IdSemantics, env: SimulationEnvironment = DEFAULT_ENV): ZetaIdHex { + const obs: ZetaObservation = { + version: IdVersion.V1, + timestamp: Date.now() as Milliseconds, + chromosome: Chromosome.MetaCoherence, + category: CATEGORY_BY_NAME[sem.category], + firefly: Firefly.NoDirective, + authority: { type: sem.authority ?? "Simulated" } as Authority, + persona: agentToPersona(sem.agent), + momentum: { type: "Normal" }, + location: LocationHint.EastUS_VA1, + }; + const id = pack(obs, env) as bigint; + return id.toString(16).padStart(32, "0") as ZetaIdHex; +} + +export const realDeps: BuildDeps = { + newId: (sem) => packZetaIdHex(sem, DEFAULT_ENV), + nowIso: () => new Date().toISOString(), +}; + +// ─── Structured logging (observability) ────────────────────────────── +// Surfaces the KEYS the agent uses each cycle so a run is auditable from the +// Action log alone (PR-less ⇒ review-by-observation, per the charter): +// - agent : the PARTITION key → events// (single-writer) +// - key : the per-event ZetaId hex → events//.json (= id) +// - keyFormat : "zeta-id" (@2, canonical B-0893) or "ulid" (legacy @1). The +// ZetaId carries persona/category/authority/location in the key. +// - prev : the causal-link key (previous event id in THIS agent's stream). +// Logs go to STDERR so STDOUT stays the clean, parseable cycle summary. +export type Logger = (entry: Record) => void; + +/** Default: log nothing (keeps library callers + tests silent). */ +export const noopLog: Logger = () => {}; + +/** One JSON line per entry on stderr — greppable, CI-friendly, stdout-safe. */ +export const stderrLog: Logger = (entry) => { + process.stderr.write(JSON.stringify({ t: new Date().toISOString(), ...entry }) + "\n"); +}; + +/** Classify the event-key format (observability for the @1→@2 migration). */ +export function keyFormat(id: string): "zeta-id" | "ulid" | "unknown" { + if (/^[0-9a-f]{32}$/.test(id)) return "zeta-id"; // @2: 128-bit ZetaId hex + if (/^[0-9A-HJKMNP-TV-Z]{26}$/.test(id)) return "ulid"; // @1 legacy (Crockford) + return "unknown"; +} + +// ─── Store I/O (append-only) ───────────────────────────────────────── + +/** + * Read an agent's event stream, sorted chronologically by event `ts`. + * (Sort-by-`ts` is robust across the @2 ZetaIdHex + legacy @1 ULID id formats — + * filename-lexical sort only sorted correctly within a single id format. Tie-break + * by id for deterministic ordering of same-millisecond events.) + */ +export function loadStream(root: string, agent: AgentPersona): EventEnvelope[] { + const dir = join(root, "events", agent); + if (!existsSync(dir)) return []; + return readdirSync(dir) + .filter((f) => f.endsWith(".json")) + .map((f) => JSON.parse(readFileSync(join(dir, f), "utf8")) as EventEnvelope) + .sort((a, b) => { + const ta = Date.parse(a.ts); + const tb = Date.parse(b.ts); + if (ta !== tb) return ta - tb; + return a.id < b.id ? -1 : a.id > b.id ? 1 : 0; + }); +} + +/** Kill-switch: is the halt sentinel present? */ +export function isHalted(root: string): boolean { + return existsSync(join(root, "events", HALT_SENTINEL)); +} + +/** + * Replay an agent's stream to its current state via Z-set fold. + * Retracted (net-zero weight) events are dropped; surviving transition + * events are folded through `transition` from the initial Idle state. + * Cross-checks each stored `to` against transition(from, option). + */ +export function replayState(events: EventEnvelope[], ctx: AgentContext): AgentState { + // Z-set: sum weights per event id; an id with net weight 0 is fully retracted. + const netWeight = new Map(); + for (const e of events) { + const key = e.kind === "retraction" ? e.retracts : e.id; + netWeight.set(key, (netWeight.get(key) ?? 0) + e.weight); + } + let state: AgentState = { tag: "Idle", context: ctx }; + for (const e of events) { + if (e.kind !== "transition") continue; + if ((netWeight.get(e.id) ?? 0) <= 0) continue; // retracted + const next = transition(e.from, e.option); + state = next; // re-derived (cross-checks against e.to by construction of transition) + } + return state; +} + +// ─── Menu generation + selector seam ───────────────────────────────── + +/** + * A minimal deterministic menu-generator for the harness prototype. The real + * menu-generator (B-0867) weights options by DORA/trajectory; this one offers a + * safe, always-valid default menu so the loop runs in CI without an LLM. + */ +export function generateMenu(state: AgentState): readonly MenuOption[] { + switch (state.tag) { + case "Idle": + return [ + { tag: "EmitHeartbeat", lane: "heartbeat", note: "move-next harness tick" }, + { tag: "EnterFreeTime", reason: "no named work this cycle" }, + ]; + case "Paused": + return [{ tag: "ResumeFromPause" }]; + default: + // From any non-terminal state, the safe default is to record a heartbeat, + // which cycleClose() returns to Idle. + return [{ tag: "EmitHeartbeat", lane: "heartbeat", note: "cycle close" }]; + } +} + +/** The selector seam. The real version is the LLM; the default is deterministic. */ +export type SelectMove = (state: AgentState, menu: readonly MenuOption[]) => MenuOption; + +/** Default deterministic selector: take the first menu option (always valid). */ +export const firstOption: SelectMove = (_state, menu) => { + const first = menu[0]; + if (first === undefined) throw new Error("empty menu — menu-generator must offer ≥1 option"); + return first; +}; + +// ─── One cycle: read → menu → select → transition → append ─────────── + +export interface CycleResult { + readonly event: TransitionEvent; + readonly from: AgentState; + readonly to: AgentState; + readonly wrotePath: string | null; // null on dry-run +} + +export function runCycle(args: { + readonly root: string; + readonly ctx: AgentContext; + readonly deps: BuildDeps; + readonly select?: SelectMove; + readonly dryRun?: boolean; + readonly log?: Logger; +}): CycleResult { + const log = args.log ?? noopLog; + const select = args.select ?? firstOption; + const stream = loadStream(args.root, args.ctx.agent); + const prev = stream.length > 0 ? stream[stream.length - 1]!.id : null; + const from = replayState(stream, args.ctx); + const menu = generateMenu(from); + const option = select(from, menu); + const to = transition(from, option); + const event = makeTransitionEvent(args.deps, { context: args.ctx, prev, from, option, to }); + + const v = validateEnvelope(event); + if (!v.ok) throw new Error(`harness produced invalid event: ${v.errors.join("; ")}`); + + let wrotePath: string | null = null; + if (!args.dryRun) { + const rel = eventPath(args.ctx.agent, event.id); + const abs = join(args.root, rel); + mkdirSync(join(args.root, "events", args.ctx.agent), { recursive: true }); + writeFileSync(abs, JSON.stringify(event, null, 2) + "\n", "utf8"); + wrotePath = rel; + } + + // Observability: surface the KEYS the agent is using this cycle (stderr). + log({ + ev: "cycle", + agent: args.ctx.agent, // PARTITION key → events// + cycle: args.ctx.cycle, + key: event.id, // per-event key → events//.json (= id) + keyFormat: keyFormat(event.id), // "ulid" today; canonical = "zeta-id" (B-0893) + prev, // causal-link key (prev event id, or null = first) + kind: event.kind, + from: from.tag, + option: event.option.tag, + to: to.tag, + wrote: wrotePath, + dryRun: args.dryRun ?? false, + }); + return { event, from, to, wrotePath }; +} + +// ─── Bounded loop (kill-switch + hard cap) ─────────────────────────── + +export interface LoopResult { + readonly cycles: readonly CycleResult[]; + readonly stopped: "max-iterations" | "halted"; +} + +export function runLoop(args: { + readonly root: string; + readonly agent: AgentPersona; + readonly maxIterations: number; + readonly deps?: BuildDeps; + readonly select?: SelectMove; + readonly dryRun?: boolean; + readonly sessionStartIso?: string; + readonly log?: Logger; +}): LoopResult { + const deps = args.deps ?? realDeps; + const log = args.log ?? noopLog; + const cap = Math.max(0, Math.min(args.maxIterations, MAX_ITERATIONS)); // HARD clamp + const sessionStartIso = args.sessionStartIso ?? deps.nowIso(); + log({ ev: "loop-start", agent: args.agent, cap, dryRun: args.dryRun ?? false, sessionStartIso }); + const cycles: CycleResult[] = []; + for (let i = 0; i < cap; i++) { + if (isHalted(args.root)) { + log({ ev: "loop-stop", reason: "halted", cycles: cycles.length }); + return { cycles, stopped: "halted" }; + } + const ctx: AgentContext = { agent: args.agent, cycle: cycles.length, sessionStartIso }; + cycles.push( + runCycle({ + root: args.root, + ctx, + deps, + log, + ...(args.select === undefined ? {} : { select: args.select }), + ...(args.dryRun === undefined ? {} : { dryRun: args.dryRun }), + }), + ); + } + const stopped = isHalted(args.root) ? "halted" : "max-iterations"; + log({ ev: "loop-stop", reason: stopped, cycles: cycles.length }); + return { cycles, stopped }; +} + +// ─── CLI ───────────────────────────────────────────────────────────── + +function parseArgs(argv: string[]): { + root: string; + agent: AgentPersona; + maxIterations: number; + dryRun: boolean; + quiet: boolean; +} { + const get = (k: string, d: string): string => { + const i = argv.indexOf(k); + return i >= 0 && argv[i + 1] !== undefined ? argv[i + 1]! : d; + }; + return { + root: get("--root", process.cwd()), + agent: get("--agent", "otto") as AgentPersona, + maxIterations: Number.parseInt(get("--max-iterations", "1"), 10), + dryRun: argv.includes("--dry-run"), + quiet: argv.includes("--quiet"), // suppress structured stderr logging + }; +} + +if (import.meta.main) { + const a = parseArgs(process.argv.slice(2)); + if (isHalted(a.root)) { + console.log(`HALTED: events/${HALT_SENTINEL} present — refusing to run (kill-switch).`); + process.exit(0); + } + // CLI logs structured cycle/key events to STDERR by default (--quiet to mute); + // STDOUT stays the clean, parseable human summary below. + const result = runLoop({ + root: a.root, + agent: a.agent, + maxIterations: a.maxIterations, + dryRun: a.dryRun, + log: a.quiet ? noopLog : stderrLog, + }); + for (const c of result.cycles) { + console.log( + `${a.dryRun ? "[dry-run] " : ""}${c.from.tag} --(${c.event.option.tag})--> ${c.to.tag}` + + ` ${c.wrotePath ?? "(not written)"}`, + ); + } + console.log( + `cycles=${result.cycles.length} stopped=${result.stopped} ` + + `(cap=${Math.min(a.maxIterations, MAX_ITERATIONS)}/${MAX_ITERATIONS})`, + ); +} diff --git a/tools/accelerator/validate-local-llm.ts b/tools/accelerator/validate-local-llm.ts new file mode 100644 index 0000000000..043afb6f05 --- /dev/null +++ b/tools/accelerator/validate-local-llm.ts @@ -0,0 +1,65 @@ +// tools/accelerator/validate-local-llm.ts +// +// Proves the CORE local-LLM primitive actually works on THIS machine — the +// "entropy lever" end-to-end check (Aaron 2026-05-30): after install.sh has run, +// a bare machine should be working substrate. Reads the declarative pins +// (manifests/local-llm), talks to the locally-installed ollama, runs a REAL +// chooseIndex, and asserts a valid, non-fallback choice. Exits non-zero on +// failure (CI gate). Run AFTER install.sh. +// +// Note: asserts the model RESPONDED with a valid in-range index (not a specific +// answer) — that proves the real local-LLM is live. Exact-output DST assertions +// (snapshotting the deterministic temp0+seed output) belong in the test suite. + +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { chooseIndex, ollamaBackend } from "./local-llm.ts"; + +function arg(flag: string, dflt: string): string { + const i = process.argv.indexOf(flag); + return i >= 0 && process.argv[i + 1] !== undefined ? process.argv[i + 1]! : dflt; +} + +const root = arg("--root", process.cwd()); +const manifestPath = join(root, "tools/setup/manifests/local-llm"); + +const txt = readFileSync(manifestPath, "utf8"); +const mget = (k: string): string | undefined => + txt + .split("\n") + .map((l) => l.trim()) + .filter((l) => l.length > 0 && !l.startsWith("#")) + .map((l) => l.split(/\s+/)) + .find(([key]) => key === k)?.[1]; + +const model = mget("model"); +const host = mget("host"); +const seed = Number.parseInt(mget("seed") ?? "0", 10); + +if (!model) { + console.error("validate-local-llm: no 'model' in manifest — cannot validate"); + process.exit(2); +} + +const backend = ollamaBackend({ model, seed, ...(host ? { host } : {}) }); + +const r = await chooseIndex(backend, { + context: "The agent is idle with no pending work this cycle.", + options: ["emit a heartbeat", "enter free time"], +}); + +console.log( + `validate-local-llm: backend=${backend.name} raw=${JSON.stringify(r.raw)} ` + + `index=${r.index} fallback=${r.fallback}`, +); + +if (r.fallback) { + console.error( + "validate-local-llm: FAILED — the model fell back (unreachable / unparseable). " + + "The real local-LLM did not produce a valid selection. Check that install.sh " + + "installed ollama + pulled the pinned model and the daemon is serving.", + ); + process.exit(1); +} + +console.log("validate-local-llm: OK — real local-LLM produced a valid in-range selection."); diff --git a/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile b/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile index 2c043fd14d..023474693e 100644 --- a/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile +++ b/tools/ci/dockerfiles/nixos-install-sh-test/Dockerfile @@ -116,6 +116,9 @@ COPY .mise.toml /workspace/.mise.toml # package.json + bun.lock pin TS-runtime deps if install.sh references # them (e.g., bun --version checks); copy to mirror dev environment COPY package.json bun.lock* /workspace/ +# tools/accelerator carries the local-LLM primitive's validator + tests +# (validate-local-llm.ts + local-llm.test.ts) for validation step 4 below. +COPY tools/accelerator /workspace/tools/accelerator # Run install.sh — this exercises: # 1. install.sh dispatch (detects Linux → linux.sh) @@ -156,6 +159,30 @@ RUN bash -lc 'set -o pipefail && eval "$(mise activate bash)" && \ # check). RUN nix-shell -p gh --run 'gh --version | head -1' +# Validation step 4 (B-0941): the local-LLM primitive ACTUALLY WORKS on NixOS — +# closes the false-green where the nixos test passed by SKIPPING. install.sh's +# local-llm.sh nix-branch installed ollama (FHS-safe via nix) + pulled the pinned +# model during step 130; the model persists on disk, the daemon does not across +# layers, so start it here and ASSERT (not skip): pinned model present + a REAL +# chooseIndex probe + the run-anywhere mock tests. A skip-to-green here would +# reintroduce the exact hole B-0941 names — so this RUN fails the build if the +# local-LLM is absent. (assert-don't-skip per the shield rule.) +RUN bash -lc 'set -eu; eval "$(mise activate bash)"; \ + export PATH="$HOME/.nix-profile/bin:/nix/var/nix/profiles/default/bin:$PATH"; \ + command -v ollama; \ + (ollama serve >/tmp/ollama.log 2>&1 &); \ + for _ in $(seq 1 30); do \ + curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1 && break; \ + sleep 1; \ + done; \ + curl -fsS http://127.0.0.1:11434/api/version || { echo "=== ollama serve log (/tmp/ollama.log) — daemon not reachable ==="; cat /tmp/ollama.log 2>/dev/null || echo "(no /tmp/ollama.log)"; echo "=== end serve log ==="; exit 7; }; \ + MODEL="$(grep -E "^model" tools/setup/manifests/local-llm | awk "{print \$2}")"; \ + echo "asserting model: $MODEL"; \ + ollama list; \ + ollama list | awk "NR>1 {print \$1}" | grep -qx "$MODEL"; \ + bun test tools/accelerator/local-llm.test.ts; \ + bun tools/accelerator/validate-local-llm.ts --root "$PWD"' + # Final marker — if all steps succeed, this echo lands in the build # output as the success signal for CI. RUN echo "B-0849 Phase 1 Docker harness validation COMPLETE — install.sh + mise + bun + claude-code all working on NixOS userspace" diff --git a/tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile b/tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile new file mode 100644 index 0000000000..a0c6c5f960 --- /dev/null +++ b/tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile @@ -0,0 +1,61 @@ +# tools/ci/dockerfiles/ubuntu-install-sh-test/Dockerfile +# +# Docker-based install.sh test on Ubuntu userspace — sibling to +# nixos-install-sh-test (Aaron 2026-05-30: "center our docker tests around +# ubuntu and nixos and have tests for both with install.sh"). Proves the entropy +# lever on Ubuntu: a bare ubuntu image + install.sh => working substrate, +# INCLUDING the core local-LLM primitive (ollama + pinned model + real probe). +# +# NixOS is primary (declarative-by-construction; boots the real hardware via the +# USB/ISO). Ubuntu is made to ACT declarative via install.sh + the manifests +# (per B-0940) — this test guards that retrofit. +# +# The build IS the test: a failing install.sh / assert fails the build. +# install.sh runs as root (linux.sh handles root-vs-sudo via `id -u`). + +# Pinned by digest (per .claude/rules/dep-pin-search-first-authority.md; matches +# the nixos Dockerfile's digest-pin discipline). ubuntu:24.04 digest selected +# 2026-05-30 via the Docker registry API; bump: re-query +# registry-1.docker.io/v2/library/ubuntu/manifests/24.04 for the current digest. +FROM ubuntu:24.04@sha256:c4a8d5503dfb2a3eb8ab5f807da5bc69a85730fb49b5cfca2330194ebcc41c7b + +ENV DEBIAN_FRONTEND=noninteractive + +# Bootstrap prereqs install.sh needs before its own apt step runs: curl (mise + +# ollama downloads), ca-certificates (HTTPS), git, xz-utils. install.sh's apt +# step (manifests/apt) then installs the full set incl. zstd (ollama .tar.zst). +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates curl git xz-utils \ + && rm -rf /var/lib/apt/lists/* + +# Pre-stage mise + bun + ollama PATH for ALL subsequent RUN layers — Docker does +# NOT persist install.sh's in-process PATH exports across layers (same fix as the +# nixos Dockerfile). install.sh installs mise to ~/.local/bin, shims to +# ~/.local/share/mise/shims, bun to ~/.bun/bin, ollama to ~/.local/bin. +ENV PATH=/root/.bun/bin:/root/.local/share/mise/shims:/root/.local/bin:/usr/local/bin:/usr/bin:/bin + +WORKDIR /zeta +COPY . /zeta + +# The entropy lever: bare ubuntu -> working substrate (incl. the local-LLM core). +RUN ./tools/setup/install.sh + +# Validate the local-LLM primitive end-to-end. The MODEL persists on disk +# (install.sh pulled it into a layer); the DAEMON does not persist across RUN +# layers, so start it here, assert the pinned model, then run the REAL chooseIndex +# probe + the run-anywhere mock tests. +RUN set -eu; \ + export PATH="/root/.local/bin:$PATH"; \ + command -v ollama; \ + (ollama serve >/tmp/ollama.log 2>&1 &); \ + for _ in $(seq 1 30); do \ + curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1 && break; \ + sleep 1; \ + done; \ + curl -fsS http://127.0.0.1:11434/api/version; \ + MODEL="$(grep -E '^model' tools/setup/manifests/local-llm | awk '{print $2}')"; \ + echo "asserting model: $MODEL"; \ + ollama list; \ + ollama list | awk 'NR>1 {print $1}' | grep -qx "$MODEL"; \ + bun test tools/accelerator/local-llm.test.ts; \ + bun tools/accelerator/validate-local-llm.ts --root "$PWD" diff --git a/tools/hygiene/check-bash-retirement-inventory.test.ts b/tools/hygiene/check-bash-retirement-inventory.test.ts index f3a260089a..28591660df 100644 --- a/tools/hygiene/check-bash-retirement-inventory.test.ts +++ b/tools/hygiene/check-bash-retirement-inventory.test.ts @@ -273,7 +273,7 @@ describe("renderReport", () => { expect(renderReport(report)).toContain(`OK: retained non-Lean shell surface matches ${RETAINED_SHELL_SCOPE}.`); expect(renderReport(report)).toContain("## Retained shell categories"); - expect(renderReport(report)).toContain("- setup/bootstrap: 13"); + expect(renderReport(report)).toContain("- setup/bootstrap: 14"); expect(renderReport(report)).toContain("- host-service wrappers: 2"); }); diff --git a/tools/hygiene/check-bash-retirement-inventory.ts b/tools/hygiene/check-bash-retirement-inventory.ts index 744d63ce11..5adff9cc30 100644 --- a/tools/hygiene/check-bash-retirement-inventory.ts +++ b/tools/hygiene/check-bash-retirement-inventory.ts @@ -95,6 +95,7 @@ export const EXPECTED_RETAINED_SHELL: readonly string[] = [ "tools/setup/common/curl-fetch.sh", "tools/setup/common/dotnet-tools.sh", "tools/setup/common/elan.sh", + "tools/setup/common/local-llm.sh", "tools/setup/common/mise.sh", "tools/setup/common/profile-edit.sh", "tools/setup/common/python-tools.sh", @@ -131,6 +132,7 @@ export const RETAINED_SHELL_CATEGORY_BY_FILE: Readonly&2 + exit 0 +fi + +# ── 1. ensure the ollama binary (Linux installs pinned release; macOS via brew) ── +if ! command -v ollama >/dev/null 2>&1; then + case "$(uname -s)" in + Linux) + # NixOS: the generic glibc release binary won't run (non-FHS). Install ollama + # via nix instead — FHS-safe, works in the nixos/nix container AND on real + # NixOS, and floats with the channel (consistent with the float-ollama + # decision). This is the install.sh-retrofit path that closes B-0941's test + # false-green; the declarative real-hardware self-heal layer is + # services.ollama in configuration.nix (complementary). linux.sh already + # routes NixOS via /etc/NIXOS; honor the same marker here. + if [ -f /etc/NIXOS ]; then + echo "↓ NixOS detected — installing ollama via nix (FHS-safe)..." + # Diagnosed across runs 26685829032 + 26685902159 (surfaced stderr): + # - nix-env -iA nixpkgs.ollama → 'bad meta.outputsToInstall' + # - nix profile install [--priority N] → coreutils-full FILE COLLISION + # (ollama's closure brings coreutils-full vs the profile's existing one; + # --priority did not resolve it — profile-install is structurally + # collision-prone here). + # Robust fix: DON'T mutate the profile. `nix build` the store path and + # symlink bin/ollama onto PATH — no profile entry, no collision, FHS-safe + # in the container AND on real NixOS. (The declarative real-hardware path is + # services.ollama in configuration.nix — complementary.) Surface stderr; + # graceful (warn + exit 0 so install.sh never bricks over a best-effort probe). + ollama_store="$(nix --extra-experimental-features 'nix-command flakes' build --no-link --print-out-paths nixpkgs#ollama 2>&1 | tail -1)" + if [ -n "$ollama_store" ] && [ -x "$ollama_store/bin/ollama" ]; then + mkdir -p "$HOME/.local/bin" + # WRAPPER (not bare symlink): the nix-built ollama has the correct glibc in + # its RPATH, but a polluting LD_LIBRARY_PATH (e.g. the docker-nixos test's + # FHS-mise glibc hack) OVERRIDES the RPATH → 'symbol lookup error: libc.so.6 + # undefined symbol __nptl_change_stack_perm GLIBC_PRIVATE' (run 26686054042). + # The wrapper runs ollama clear of LD_LIBRARY_PATH so EVERY call (install-time + # serve+pull AND the test's assert) uses ollama's own glibc. Harmless on real + # NixOS / ubuntu / mac (LD_LIBRARY_PATH unset there → env -u is a no-op). + printf '#!/usr/bin/env bash\nexec env -u LD_LIBRARY_PATH %s/bin/ollama "$@"\n' "$ollama_store" > "$HOME/.local/bin/ollama" + chmod +x "$HOME/.local/bin/ollama" + echo " ✓ ollama via nix build + LD_LIBRARY_PATH-clean wrapper ($ollama_store/bin/ollama)" + else + echo "warn: nix build ollama failed ($ollama_store); skipping local-llm (tests fall back to mock)" >&2; exit 0 + fi + export PATH="$HOME/.local/bin:$PATH" + command -v ollama >/dev/null 2>&1 || { echo "warn: ollama not on PATH after nix build; skipping local-llm" >&2; exit 0; } + else + case "$(uname -m)" in + x86_64 | amd64) oarch=amd64 ;; + aarch64 | arm64) oarch=arm64 ;; + *) echo "warn: unsupported arch $(uname -m) for ollama; skipping local-llm" >&2; exit 0 ;; + esac + tmp="$(mktemp -d)" + # FLOATING latest (Aaron 2026-05-30): the ollama *runtime* version does not + # affect DST reproducibility — the pinned MODEL + temp0 + seed do — so we + # track latest (less maintenance). GitHub's /releases/latest/download/ + # auto-redirects to the newest release's asset (no API call, no pin). + # Asset is .tar.zst (zstd), NOT .tgz — verified against the release API + # 2026-05-30 (ollama-linux-amd64.tar.zst). The bare ollama-linux-.tgz + # name 404s; this was caught by the validation workflow. + url="https://github.com/ollama/ollama/releases/latest/download/ollama-linux-${oarch}.tar.zst" + echo "↓ installing ollama (latest, linux-${oarch})..." + if ! curl_fetch --output "${tmp}/ollama.tar.zst" "$url"; then + echo "warn: ollama download failed; skipping local-llm (tests fall back to mock)" >&2; exit 0 + fi + mkdir -p "$HOME/.local" + # ollama-linux-.tar.zst extracts bin/ollama + lib/ollama under the + # prefix. zstd-compressed → tar --zstd (zstd is present on ubuntu runners; + # GNU tar + bsdtar both support --zstd). + if ! tar -C "$HOME/.local" --zstd -xf "${tmp}/ollama.tar.zst"; then + echo "warn: ollama extract failed (zstd?); skipping local-llm (tests fall back to mock)" >&2; exit 0 + fi + export PATH="$HOME/.local/bin:$PATH" + fi + ;; + Darwin) + echo "warn: ollama not found on macOS — expected via manifests/brew (brew install ollama)." >&2 + echo " Skipping model pull; re-run after the brew step installs it." >&2 + exit 0 + ;; + *) + echo "warn: unknown OS '$(uname -s)' for ollama install; skipping local-llm" >&2; exit 0 ;; + esac +fi + +# ── 2. ensure the daemon is reachable (start in background if needed) ── +if ! curl -fsS "${HOST}/api/version" >/dev/null 2>&1; then + echo "↓ starting ollama serve (background)..." + (ollama serve >/dev/null 2>&1 &) + for _ in $(seq 1 30); do + curl -fsS "${HOST}/api/version" >/dev/null 2>&1 && break + sleep 1 + done +fi +if ! curl -fsS "${HOST}/api/version" >/dev/null 2>&1; then + echo "warn: ollama daemon not reachable at ${HOST}; skipping model pull (tests fall back to mock)" >&2 + exit 0 +fi + +# ── 3. pull the pinned model (idempotent) ── +if ollama list 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "$MODEL"; then + echo "✓ local-llm model ${MODEL} already present" +else + echo "↓ pulling ${MODEL} (~400MB, one-time)..." + if ! ollama pull "$MODEL"; then + echo "warn: 'ollama pull ${MODEL}' failed; skipping (tests fall back to mock)" >&2 + exit 0 + fi +fi +echo "✓ local-llm primitive ready: ${MODEL} via ollama $(ollama --version 2>/dev/null | head -1 || echo '(version unknown)')" diff --git a/tools/setup/linux.sh b/tools/setup/linux.sh index 4aea388313..b84bda80a4 100755 --- a/tools/setup/linux.sh +++ b/tools/setup/linux.sh @@ -176,5 +176,8 @@ export PATH="$HOME/.dotnet/tools:$PATH" "$SETUP_DIR/common/elan.sh" "$SETUP_DIR/common/dotnet-tools.sh" "$SETUP_DIR/common/verifiers.sh" +# Local-LLM core primitive — installs pinned ollama binary + pulls the pinned +# tiny model (manifests/local-llm). Graceful: warns + continues on failure. +"$SETUP_DIR/common/local-llm.sh" "$SETUP_DIR/common/shellenv.sh" "$SETUP_DIR/common/profile-edit.sh" diff --git a/tools/setup/macos.sh b/tools/setup/macos.sh index 7efaeb5224..5012a1cbe0 100755 --- a/tools/setup/macos.sh +++ b/tools/setup/macos.sh @@ -142,5 +142,8 @@ export PATH="$HOME/.dotnet/tools:$PATH" "$SETUP_DIR/common/elan.sh" "$SETUP_DIR/common/dotnet-tools.sh" "$SETUP_DIR/common/verifiers.sh" +# Local-LLM core primitive — macOS gets the ollama binary via manifests/brew +# (above); this pulls the pinned tiny model (manifests/local-llm). Graceful. +"$SETUP_DIR/common/local-llm.sh" "$SETUP_DIR/common/shellenv.sh" "$SETUP_DIR/common/profile-edit.sh" diff --git a/tools/setup/manifests/apt b/tools/setup/manifests/apt index d1d0d1cdb3..dbbfcbebcc 100644 --- a/tools/setup/manifests/apt +++ b/tools/setup/manifests/apt @@ -15,3 +15,19 @@ git p7zip-full # cascade #4 ISO content audit (7z list); ubuntu-24.04 # default-installs but Linux maintainers running setup # locally need explicit declaration + +# Local-LLM core primitive (Aaron 2026-05-30): the ollama Linux release is a +# .tar.zst (zstd), so common/local-llm.sh needs `tar --zstd` ⇒ the zstd binary. +zstd # required to extract ollama-linux-.tar.zst + +# .NET runtime native deps (mise installs the dotnet SDK; it needs these shared +# libs to RUN). Present on full ubuntu runners (implicit), MISSING on a minimal +# ubuntu:24.04 image — the docker-ubuntu-install-sh-test exposed this (dotnet +# exited with no status = missing libicu). Declaring them makes the entropy lever +# work on TRULY bare ubuntu. Per Microsoft Learn linux-scripted-manual deps; +# build-essential already pulls libstdc++6/libgcc-s1/zlib1g. Names are Ubuntu +# 24.04 (Noble: libicu74, libssl3t64 post-time_t-transition). +libicu74 # ICU — .NET globalization (the classic "dotnet exited" cause) +libssl3t64 # OpenSSL 3 runtime (Noble t64 name) +libgssapi-krb5-2 # Kerberos/GSSAPI — .NET networking +tzdata # timezone data — .NET DateTime diff --git a/tools/setup/manifests/brew b/tools/setup/manifests/brew index c8410b4495..69c050975d 100644 --- a/tools/setup/manifests/brew +++ b/tools/setup/manifests/brew @@ -23,3 +23,10 @@ hermes-agent # "Self-improving AI agent that creates skills from # resolved by brew (see `brew info hermes-agent` for # current list). Idempotent: brew install skips if # present. + +# Local-LLM core primitive (Aaron 2026-05-30 — "core, not optional"; small +# CPU model = baseline substrate). macOS installs the ollama binary here; the +# pinned MODEL is pulled by common/local-llm.sh per manifests/local-llm (the +# model is the reproducible/pinned artifact; brew tracks latest ollama binary). +ollama # CPU-served tiny model for the move-next selector + observe.ts + # classifier + DST fixtures. Idempotent: brew install skips if present. diff --git a/tools/setup/manifests/local-llm b/tools/setup/manifests/local-llm new file mode 100644 index 0000000000..0c9da09c8e --- /dev/null +++ b/tools/setup/manifests/local-llm @@ -0,0 +1,27 @@ +# tools/setup/manifests/local-llm — declarative pins for the CORE local-LLM +# primitive: a small CPU-only model served by Ollama, account-free. +# +# Why core (Aaron 2026-05-30): small CPU-capable local LLMs are a baseline +# substrate primitive (like a language runtime), not an optional extra. Consumers: +# - accelerator move-next selector ("choose your own adventure") +# - observe.ts auto-classifier (input -> one label) +# - DST test fixtures: temp 0 + fixed seed + pinned model = reproducible, so a +# real (not mocked) model can back deterministic-simulation tests. +# +# Installed by tools/setup/common/local-llm.sh (idempotent, graceful). Format: +# `key value` (one per line; comments start with `#`). +# +# Ollama runtime: FLOATING latest (Aaron 2026-05-30) — the runtime version does +# not affect DST reproducibility (the pinned MODEL + temp0 + seed do), so we track +# latest for less maintenance. Installed per-OS: macOS via manifests/brew; Linux +# via GitHub /releases/latest; Windows via install.ps1 (peer surface) — all read +# the model/seed/host below from THIS manifest (OS-agnostic shared contract). + +# Tiny instruct model — 398MB Q4_K_M, CPU-friendly +# (https://ollama.com/library/qwen2.5:0.5b). PINNED tag — the reproducible +# artifact for DST (temp0 + seed + this pin = deterministic). +model qwen2.5:0.5b + +# Deterministic defaults for DST reproducibility (greedy + fixed seed). +seed 0 +host http://127.0.0.1:11434