From a03a2e92dd3dd79466b2f5c7bb952440755ff5f5 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 4 Jun 2026 02:29:02 -0400 Subject: [PATCH 1/8] docs(verify): plan + decisions log for Epic #208 --- planning/issues/208/decisions.md | 60 +++++++++++++++++++++++++++ planning/issues/208/plan.md | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 planning/issues/208/decisions.md create mode 100644 planning/issues/208/plan.md diff --git a/planning/issues/208/decisions.md b/planning/issues/208/decisions.md new file mode 100644 index 0000000..47f33d6 --- /dev/null +++ b/planning/issues/208/decisions.md @@ -0,0 +1,60 @@ +# Decisions — Issue #208 (Phase-12 live-smoke verification harness) + +## One shared `runFileModeSmoke()` runner, two consumers +**File(s):** `packages/dispatcher/src/epic-store/file-mode-smoke.ts` +**Date:** 2026-06-04 + +**Decision:** Put the smoke drive in a single runner in the dispatcher package, +returning structured per-section results, and have both #212's `bun test` and +#213's `mm verify-file-mode` call it. +**Why:** #213's AC explicitly allows "delegating to the integration fixture from +the sibling sub-issue". A second hand-rolled drive would be a parity hazard — the +exact failure the Epic exists to prevent. The runner lives in the dispatcher (not +the CLI) because it depends on dispatcher internals (`Engine`, +`createImplementationWorkflow`, the gateways, `runFileWatcherTick`); the CLI +already imports dispatcher internals. +**Evidence:** Precedent `file-dispatch-integration.test.ts` / `parity.test.ts` +both drive the real workflow; this consolidates that drive into a reusable seam. + +## Resume via the real file-watcher, not a direct `engine.signal` +**File(s):** `packages/dispatcher/src/epic-store/file-mode-smoke.ts` +**Date:** 2026-06-04 + +**Decision:** The "answer" step writes a non-empty `` +block to the Epic file (via `writeEpicFile`), then drives `runFileWatcherTick` +(the real watcher) to detect it and fire the resume — rather than calling +`engine.signal(RESUME_EVENT)` directly like `parity.test.ts` does. +**Why:** #212's framing is "resume-via-edit"; exercising the real watcher proves +the file-mode resume path end to end (mtime poll → open-question-with-answer +detection → `fireSignal` → flip to `resolved`), which is exactly the seam the +live gap left unproven. `engine.signal` would skip the watcher entirely. +**Evidence:** `watcher.ts` `runFileWatcherTick`; `main.ts` wires +`fireSignal: (id, p) => engine.signal(id, RESUME_EVENT, p)` — the runner mirrors it. + +## Assert the worktree checkbox from a capture, because finalize destroys it +**File(s):** `packages/dispatcher/src/epic-store/file-mode-smoke.ts` +**Date:** 2026-06-04 + +**Decision:** The stub adapter flips `` to `[x]` in the worktree +on the resume drive and the runner captures the worktree Epic file content at +that moment; the assertion reads the capture. +**Why:** `finalize` calls `destroyWorktree` on a `completed` terminal, removing +the worktree directory — so a post-completion read of the worktree file would +find nothing. The capture is the faithful state the agent left behind. The Epic +file must be **committed** in the tmpdir repo first, or `git worktree add` (which +checks out HEAD) yields a worktree without the file. +**Evidence:** `implementation.ts` `finalize` (`if (finalState !== "waiting-human") +await destroyWorktree`); `worktree.ts` `destroyWorktree` rmSyncs the dir. + +## `--live` evidence run is the operator step; headless ships code + deterministic tests +**File(s):** `packages/cli/src/commands/verify-file-mode.ts` +**Date:** 2026-06-04 + +**Decision:** Build the `--live` command and a deterministic plumbing test +(stubbed gh/daemon boundary), but treat the actual real-GitHub evidence run as a +human-operated step. +**Why:** The Epic context states a headless run "could not create a throwaway +GitHub repo or spawn a real agent" — the live run fundamentally needs a real +coding agent to open a real PR. Faking the evidence would re-create the very +trust gap this Epic closes. +**Evidence:** Epic #208 "Context"; the deferred-smoke notes in PR #198/#207. diff --git a/planning/issues/208/plan.md b/planning/issues/208/plan.md new file mode 100644 index 0000000..27259a9 --- /dev/null +++ b/planning/issues/208/plan.md @@ -0,0 +1,70 @@ +# Issue #208: feat(verify): Phase-12 live-smoke verification harness + +**Link:** https://github.com/thejustinwalsh/middle/issues/208 +**Branch:** middle-issue-208 + +## Goal +Close the file-mode trust gap with a verification harness: a deterministic +integration test that drives the **real** file-mode workflow on every commit, an +operator command (`mm verify-file-mode`) that runs it with a structured report, +a real-GitHub smoke (`--live`), and docs that tell operators what it proves. + +## Approach +- **One shared runner, two consumers.** Extract `runFileModeSmoke()` into the + dispatcher: it stands up a tmpdir git repo configured `epic_store="file"`, + authors an Epic file, and drives the **real** `createImplementationWorkflow` + (real engine, `createWorktree`, `parseEpicFile`/`renderEpicFile`, the real + `makeDefaultPostQuestion` + `runFileWatcherTick`) through + dispatch → park-on-question → answer-via-file-edit → resume → complete, + returning structured per-section results (`{name, ok, ms, detail}[]`). The gh + boundary is stubbed at `EpicGateway`'s PR/comment methods only. Both #212's CI + test and #213's CLI command call this one runner — no re-implemented drive. +- **#212** is the `bun test` that calls the runner and asserts the deep + invariants (row `completed`, worktree `` `[x]`, conversation + has exactly one question + one answer, tmpdir cleaned up). +- **#213** is `mm verify-file-mode`, which runs the runner over its own throwaway + fixture and prints a `mm doctor`-style report (one line per section, PASS/FAIL + + wall-time, summary line; exit 0/1 with the failing section named last). Its + integration test spawns the real CLI via `Bun.spawn` and asserts the report. +- **#214** is `mm verify-file-mode --live --repo `: the same loop against + real GitHub via the daemon control plane — write Epic file, dispatch, await a + draft PR, edit the answer block, await completion, assert the PR exists with + the sub-issue checkbox flipped, then clean up (close PR + delete branch) on + success / leave artifacts + print URLs on failure. Deterministic plumbing test + stubs the gh + daemon boundary. The actual live-GitHub *evidence run* is the + operator step the Epic itself acknowledges a headless run can't perform + ("could not create a throwaway GitHub repo or spawn a real agent"). +- **#215** documents the harness in `docs/dogfooding.md`, cross-links from + `docs/operator.md` and `README.md`, and adds `docs-cross-link.test.ts` that + boots the CLI (`mm verify-file-mode --help` exits 0) and greps every `mm ` + in the docs back to a registered command. + +## Phases (one per sub-issue) +1. **#212** — `live-smoke.test.ts` + the shared `runFileModeSmoke()` runner. +2. **#213** — `mm verify-file-mode` command + structured report + spawn test. +3. **#214** — `mm verify-file-mode --live` + deterministic plumbing test. +4. **#215** — docs + cross-link test. + +## Files likely to change +- `packages/dispatcher/src/epic-store/file-mode-smoke.ts` (new) — the runner. +- `packages/dispatcher/src/index.ts` — export the runner. +- `packages/dispatcher/test/epic-store/live-smoke.test.ts` (new) — #212. +- `packages/cli/src/commands/verify-file-mode.ts` (new) — #213 + #214 command. +- `packages/cli/src/index.ts` — register `verify-file-mode`. +- `packages/cli/test/verify-file-mode.test.ts` (new) — #213 spawn test. +- `packages/cli/test/verify-file-mode-live.test.ts` (new) — #214 plumbing test. +- `docs/dogfooding.md`, `docs/operator.md`, `README.md` — #215. +- `packages/cli/test/docs-cross-link.test.ts` (new) — #215. + +## Out of scope +- Putting `--live` in CI (operator-cadence by design — racy + token-costly). +- A scheduled weekly live run (separate hosting decision). + +## Open questions +- **The live-GitHub evidence run (Epic AC #3 + #214's live criterion) needs a + human operator.** A headless dispatch cannot stand up a throwaway GitHub repo + and spawn a real coding agent that opens a real PR — the Epic context says as + much. All *code* (incl. the `--live` command) and all *deterministic* tests + will land green; the one-shot live evidence is the operator's post-merge step, + documented in #215. If the PR-ready gate blocks on that criterion, it will be + surfaced for operator action rather than faked. From 2650d9012966a6bb59ee8ebb292297baaa7f1de9 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 4 Jun 2026 02:36:32 -0400 Subject: [PATCH 2/8] test(epic-store): file-mode live-smoke integration + shared runFileModeSmoke runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #212. Drives the real createImplementationWorkflow through the full file-mode loop (dispatch → park-on-question → answer-via-file-edit → resume → complete) against an in-tmpdir epic_store=file repo, stubbing only EpicGateway's PR/comment boundary. The drive lives in runFileModeSmoke so mm verify-file-mode (sibling) exercises the identical path; live-smoke.test.ts asserts the deep invariants (completed, worktree sub-issue checkbox [x], one question + one answer, tmpdir cleaned up). --- .../src/epic-store/file-mode-smoke.ts | 397 ++++++++++++++++++ .../test/epic-store/live-smoke.test.ts | 73 ++++ 2 files changed, 470 insertions(+) create mode 100644 packages/dispatcher/src/epic-store/file-mode-smoke.ts create mode 100644 packages/dispatcher/test/epic-store/live-smoke.test.ts diff --git a/packages/dispatcher/src/epic-store/file-mode-smoke.ts b/packages/dispatcher/src/epic-store/file-mode-smoke.ts new file mode 100644 index 0000000..cfd7e0b --- /dev/null +++ b/packages/dispatcher/src/epic-store/file-mode-smoke.ts @@ -0,0 +1,397 @@ +/** + * The file-mode end-to-end smoke: drive the **real** implementation workflow + * through the full file-mode loop — dispatch → park-on-question → + * answer-via-file-edit → resume → complete — against a throwaway tmpdir git repo + * configured `epic_store="file"`, with the gh boundary stubbed at `EpicGateway`'s + * PR/comment methods only. Everything else is the production path: the real + * `Engine`, `createImplementationWorkflow`, `createWorktree`, the real + * `makeDefaultPostQuestion` (file branch → `appendQuestion`), and the real + * `runFileWatcherTick` that turns a human's answer-block edit into the resume + * signal exactly as the daemon's poller cron does. + * + * This is the deterministic foundation the live-smoke harness rests on. It is + * consumed by two callers — `packages/dispatcher/test/epic-store/live-smoke.test.ts` + * (the CI integration test, which asserts the deep invariants) and `mm + * verify-file-mode` (the operator command, which formats {@link SmokeResult} into + * a structured report). One drive, two consumers — so the command can never drift + * from what CI proves. + */ + +import { mkdirSync, mkdtempSync, readFileSync, realpathSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { Engine } from "bunqueue/workflow"; +import type { AgentAdapter, HookPayload, StopClassification } from "@middle/core"; +import { makeDefaultPostQuestion } from "../build-deps.ts"; +import { openAndMigrate } from "../db.ts"; +import type { EpicGateway } from "../github.ts"; +import type { SessionGate } from "../hook-server.ts"; +import { registerManagedRepo, setEpicStoreConfig } from "../repo-config.ts"; +import { getWaitForSignal, getWorkflow } from "../workflow-record.ts"; +import { + createImplementationWorkflow, + RESUME_EVENT, + type ImplementationDeps, +} from "../workflows/implementation.ts"; +import { createWorktree, destroyWorktree } from "../worktree.ts"; +import { epicFilePath, readEpicFile, writeEpicFile } from "./epic-file-io.ts"; +import type { ConversationEntry, EpicFile } from "./epic-file/types.ts"; +import { renderEpicFile } from "./epic-file/renderer.ts"; +import { runFileWatcherTick } from "./watcher.ts"; + +/** The seven phases of the smoke, in drive order — also the report section names. */ +export const SMOKE_SECTIONS = [ + "init", + "author", + "dispatch", + "park", + "answer", + "resume", + "complete", +] as const; +export type SmokeSectionName = (typeof SMOKE_SECTIONS)[number]; + +/** One phase's result: did it pass, how long it took, and a one-line detail. */ +export type SmokeSection = { + name: SmokeSectionName; + ok: boolean; + ms: number; + detail: string; +}; + +/** The structured outcome of one smoke run — the report source and the test's assertion surface. */ +export type SmokeResult = { + /** True iff every section passed. */ + ok: boolean; + sections: SmokeSection[]; + /** Name of the first failed section (the report's last line), or null on success. */ + failedSection: SmokeSectionName | null; + /** The repo Epic file's conversation after the full loop (one question, answered + resolved). */ + conversation: ConversationEntry[]; + /** Raw markdown of the repo Epic file after the loop — for marker-count assertions. */ + rawEpicText: string; + /** The worktree's Epic file as the agent left it (checkbox flipped), captured before teardown. */ + worktreeEpic: EpicFile | null; + /** The worktree path the resume drive ran in (proves the agent worked in the worktree). */ + worktreePath: string | null; + /** gh comment/post calls the run made — must be empty in file mode (gh stub untouched). */ + ghCalls: Array<{ method: string; repo: string; ref: string }>; + /** The throwaway scratch dir — removed by the runner; the caller asserts it's gone. */ + scratchDir: string; + /** True once the scratch dir was removed (cleanup ran regardless of outcome). */ + cleanedUp: boolean; +}; + +/** Tunables (the test/command can shorten or lengthen the in-drive waits). */ +export type SmokeOptions = { + launchTimeoutMs?: number; + stopTimeoutMs?: number; + livenessPollMs?: number; + /** How long to wait for each workflow-state transition before failing the section. */ + stateTimeoutMs?: number; +}; + +const SLUG = "verify-file-mode-smoke"; +const REPO = "middle-smoke/file-repo"; +const QUESTION = "Approach A or B?"; +const ANSWER = "Go with A."; +const GIT_ENV = { + ...process.env, + GIT_AUTHOR_NAME: "middle-smoke", + GIT_AUTHOR_EMAIL: "middle-smoke@example.invalid", + GIT_COMMITTER_NAME: "middle-smoke", + GIT_COMMITTER_EMAIL: "middle-smoke@example.invalid", +}; + +async function git(cwd: string, args: string[]): Promise { + const proc = Bun.spawn(["git", "-C", cwd, ...args], { + stdout: "ignore", + stderr: "pipe", + env: GIT_ENV, + }); + if ((await proc.exited) !== 0) { + throw new Error(`git ${args.join(" ")}: ${(await new Response(proc.stderr).text()).trim()}`); + } +} + +/** A SessionGate whose Stop wait never resolves — the smoke's outcome is decided + * by the stub adapter's classification + the always-present blocked.json sentinel + * (the same shape `parity.test.ts` uses), not by a real Stop hook. */ +const hangingGate: SessionGate = { + awaitSessionStart: async () => + ({ session_id: "smoke", transcript_path: "/tmp/smoke.jsonl" }) as HookPayload, + awaitStop: () => new Promise(() => {}), +}; + +/** Poll `getWorkflow(...).state` until it equals `state` or the deadline passes. */ +async function awaitState( + db: ReturnType, + id: string, + state: string, + timeoutMs: number, +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (getWorkflow(db, id)?.state === state) return; + await Bun.sleep(20); + } + throw new Error(`workflow did not reach '${state}' (was '${getWorkflow(db, id)?.state}')`); +} + +/** + * Run the full file-mode smoke once. Never throws for a *workflow* failure — it + * captures the failing section in {@link SmokeResult} and always tears the + * scratch dir down. It only throws if cleanup itself fails (a real disk fault). + */ +export async function runFileModeSmoke(opts: SmokeOptions = {}): Promise { + const launchTimeoutMs = opts.launchTimeoutMs ?? 2000; + const stopTimeoutMs = opts.stopTimeoutMs ?? 2000; + const livenessPollMs = opts.livenessPollMs ?? 20; + const stateTimeoutMs = opts.stateTimeoutMs ?? 8000; + + const sections: SmokeSection[] = []; + let failedSection: SmokeSectionName | null = null; + + // Shared mutable state across sections (assigned as the drive progresses). + let scratch = ""; + let repoPath = ""; + let epicsDir = ""; + let worktreeRoot = ""; + let db: ReturnType | null = null; + let engine: Engine | null = null; + let workflowId = ""; + const ghCalls: Array<{ method: string; repo: string; ref: string }> = []; + let worktreeEpic: EpicFile | null = null; + let worktreePath: string | null = null; + let conversation: ConversationEntry[] = []; + let rawEpicText = ""; + + // The gh stub: file mode never calls it, so any call is a contract violation + // the caller asserts against. Shaped as the slice `makeDefaultPostQuestion` reads. + const ghStub = { + async listIssueComments(repo: string, ref: string) { + ghCalls.push({ method: "listIssueComments", repo, ref }); + return []; + }, + async postComment(repo: string, ref: string) { + ghCalls.push({ method: "postComment", repo, ref }); + }, + } as unknown as EpicGateway; + + // The stub adapter: always writes a blocked.json (so a hung session parks on + // the sentinel rather than throwing), classifies the first drive as a question + // and the resume drive as a bare-stop, and on the resume drive does the agent's + // "work" — flip the `` checkbox in the worktree. The worktree + // Epic file is captured then, because `finalize` destroys the worktree on the + // completed terminal (a post-completion read would find nothing). + let installCount = 0; + let classifyIdx = 0; + const classifications: StopClassification[] = [ + { + kind: "asked-question", + sentinelPath: "/x/.middle/blocked.json", + sentinel: { question: QUESTION }, + }, + { kind: "bare-stop" }, + ]; + const adapter: AgentAdapter = { + name: "stub", + readyEvent: "session.started", + async installHooks(o) { + installCount += 1; + mkdirSync(join(o.worktree, ".middle"), { recursive: true }); + writeFileSync( + join(o.worktree, ".middle", "blocked.json"), + JSON.stringify({ question: QUESTION }), + ); + if (installCount >= 2) { + const wtEpicsDir = join(o.worktree, "planning", "epics"); + const epic = readEpicFile(wtEpicsDir, SLUG); + if (epic) { + writeEpicFile(wtEpicsDir, SLUG, { + ...epic, + subIssues: epic.subIssues.map((s) => (s.id === 1 ? { ...s, checked: true } : s)), + }); + worktreeEpic = readEpicFile(wtEpicsDir, SLUG); + worktreePath = o.worktree; + } + } + }, + buildLaunchCommand: () => ({ argv: ["true"], env: {} }), + buildPromptText: () => "@.middle/prompt.md", + async enterAutoMode() {}, + resolveTranscriptPath: (p) => p.transcript_path as string, + readTranscriptState: () => ({ + lastActivity: "", + contextTokens: 0, + turnCount: 0, + lastToolUse: null, + }), + classifyStop: () => classifications[Math.min(classifyIdx++, classifications.length - 1)]!, + }; + + /** Run one section, time it, record the result; skip if a prior section failed. */ + async function section(name: SmokeSectionName, body: () => Promise): Promise { + if (failedSection !== null) { + sections.push({ name, ok: false, ms: 0, detail: `skipped after '${failedSection}' failed` }); + return; + } + const start = Date.now(); + try { + const detail = await body(); + sections.push({ name, ok: true, ms: Date.now() - start, detail }); + } catch (error) { + failedSection = name; + sections.push({ name, ok: false, ms: Date.now() - start, detail: (error as Error).message }); + } + } + + try { + await section("init", async () => { + scratch = realpathSync(mkdtempSync(join(tmpdir(), "middle-smoke-"))); + repoPath = join(scratch, "repo"); + worktreeRoot = join(scratch, "worktrees"); + epicsDir = join(repoPath, "planning", "epics"); + await git(scratch, ["init", "repo"]); + await git(repoPath, ["commit", "--allow-empty", "-m", "init"]); + return `tmpdir repo at ${repoPath}`; + }); + + await section("author", async () => { + mkdirSync(epicsDir, { recursive: true }); + writeFileSync( + epicFilePath(epicsDir, SLUG), + renderEpicFile({ + title: "feat: file-mode smoke", + meta: { slug: SLUG, adapter: "stub" }, + context: "Verify the file-mode workflow end to end.", + acceptanceCriteria: [{ checked: false, text: "ship" }], + subIssues: [{ id: 1, checked: false, title: "1 — gateways", body: "" }], + conversation: [], + }), + ); + // Commit the Epic file so the worktree checkout (HEAD) contains it — without + // this, `git worktree add` yields a worktree missing planning/epics/. + await git(repoPath, ["add", "planning/epics"]); + await git(repoPath, ["commit", "-m", "author epic"]); + + db = openAndMigrate(join(scratch, "db.sqlite3")); + registerManagedRepo(db, REPO, repoPath); + setEpicStoreConfig(db, REPO, { + mode: "file", + epicsDir: "planning/epics", + stateFile: ".middle/state.md", + }); + engine = new Engine({ embedded: true }); + return `authored ${SLUG}.md (epic_store=file)`; + }); + + await section("dispatch", async () => { + const deps: ImplementationDeps = { + db: db!, + getAdapter: () => adapter, + sessionGate: hangingGate, + tmux: { + async newSession() {}, + async sendText() {}, + async sendEnter() {}, + async killSession() {}, + status: async () => ({ alive: false }), + }, + worktree: { createWorktree, destroyWorktree }, + resolveRepoPath: () => repoPath, + worktreeRoot, + dispatcherUrl: "http://127.0.0.1:8822", + launchTimeoutMs, + stopTimeoutMs, + livenessPollMs, + resolveEpicStoreMode: () => "file", + enqueueContinuation: async (input) => { + await engine!.start("implementation", input); + }, + // The real file-mode poster: appends a block. + postQuestion: makeDefaultPostQuestion({ + db: db!, + resolveRepoPath: () => repoPath, + github: ghStub, + }), + }; + engine!.register(createImplementationWorkflow(deps)); + const handle = await engine!.start("implementation", { + repo: REPO, + epicRef: SLUG, + adapter: "stub", + }); + workflowId = handle.id; + return `workflow ${workflowId} started`; + }); + + await section("park", async () => { + await awaitState(db!, workflowId, "waiting-human", stateTimeoutMs); + if (getWaitForSignal(db!, workflowId) === null) { + throw new Error("parked but no resume signal armed"); + } + const epic = readEpicFile(epicsDir, SLUG); + const open = + epic?.conversation.filter((e) => e.kind === "question" && e.status === "open") ?? []; + if (open.length !== 1) throw new Error(`expected one open question, found ${open.length}`); + return "parked waiting-human; question block written to the Epic file"; + }); + + await section("answer", async () => { + // The human's edit: fill in the open question's answer block. + const epic = readEpicFile(epicsDir, SLUG); + if (!epic) throw new Error("Epic file vanished before the answer edit"); + writeEpicFile(epicsDir, SLUG, { + ...epic, + conversation: epic.conversation.map((e) => + e.kind === "question" && e.status === "open" ? { ...e, answer: { body: ANSWER } } : e, + ), + }); + return "answer block filled in on disk"; + }); + + await section("resume", async () => { + // Drive the REAL file-watcher — mtime poll detects the now-non-empty answer + // and fires the resume signal, exactly as the daemon's poller cron does. + const fired = await runFileWatcherTick( + { + db: db!, + fileModeRepos: () => [{ repo: REPO, epicsDir }], + fireSignal: (id, payload) => engine!.signal(id, RESUME_EVENT, payload), + }, + 0, + ); + if (fired !== 1) throw new Error(`file-watcher fired ${fired} signals, expected 1`); + return "file-watcher detected the answer edit and fired the resume"; + }); + + await section("complete", async () => { + await awaitState(db!, workflowId, "completed", stateTimeoutMs); + const epic = readEpicFile(epicsDir, SLUG); + conversation = epic?.conversation ?? []; + rawEpicText = readFileSync(epicFilePath(epicsDir, SLUG), "utf8"); + return "workflow reached completed"; + }); + } finally { + // Cleanup runs regardless of outcome — no leaked .middle/ dirs in /tmp. The + // casts re-assert the declared type: TS narrows these closure-assigned `let`s + // to their `null` initializer (it doesn't track the in-closure assignments). + await (engine as Engine | null)?.close(true); + (db as ReturnType | null)?.close(); + if (scratch) rmSync(scratch, { recursive: true, force: true }); + } + + return { + ok: failedSection === null, + sections, + failedSection, + conversation, + rawEpicText, + worktreeEpic, + worktreePath, + ghCalls, + scratchDir: scratch, + cleanedUp: true, + }; +} diff --git a/packages/dispatcher/test/epic-store/live-smoke.test.ts b/packages/dispatcher/test/epic-store/live-smoke.test.ts new file mode 100644 index 0000000..e05f817 --- /dev/null +++ b/packages/dispatcher/test/epic-store/live-smoke.test.ts @@ -0,0 +1,73 @@ +/** + * Integration (#212): the deterministic foundation of the live-smoke harness. + * Drives the **real** `createImplementationWorkflow` (real engine, + * `createWorktree`, `parseEpicFile`/`renderEpicFile`, the real + * `makeDefaultPostQuestion`, the real `runFileWatcherTick`) against an in-tmpdir + * `epic_store="file"` repo through the full loop — dispatch → park-on-question → + * answer-via-file-edit → resume → complete. The only stub is the gh boundary at + * `EpicGateway`'s PR/comment methods, which file mode must never touch. + * + * This runs on every commit to `main`; the live-GitHub smoke (`--live`, sibling + * sub-issue) is the opt-in operator counterpart that drives the same loop against + * real GitHub. The drive itself lives in `runFileModeSmoke` so `mm + * verify-file-mode` exercises the identical path — this test asserts the deep + * invariants the command's report can't. + */ + +import { existsSync } from "node:fs"; +import { describe, expect, test } from "bun:test"; +import { parseEpicFile } from "../../src/epic-store/epic-file/parser.ts"; +import { runFileModeSmoke } from "../../src/epic-store/file-mode-smoke.ts"; + +describe("file-mode live-smoke — real workflow end-to-end (no real GitHub)", () => { + test("dispatch → park → answer-via-edit → resume → complete, all invariants hold", async () => { + const result = await runFileModeSmoke(); + + // Every section passed and the run is green. + expect(result.failedSection).toBeNull(); + expect(result.ok).toBe(true); + expect(result.sections.map((s) => s.name)).toEqual([ + "init", + "author", + "dispatch", + "park", + "answer", + "resume", + "complete", + ]); + expect(result.sections.every((s) => s.ok)).toBe(true); + + // The gh boundary was never touched — file mode is fully file-backed. + expect(result.ghCalls).toEqual([]); + + // The worktree's `` checkbox flipped to `[x]` (the agent's work), + // captured before `finalize` tore the worktree down on completion. + expect(result.worktreePath).not.toBeNull(); + expect(result.worktreeEpic).not.toBeNull(); + const sub1 = result.worktreeEpic!.subIssues.find((s) => s.id === 1); + expect(sub1?.checked).toBe(true); + + // The repo Epic file's conversation carries exactly one question — answered + // and resolved — i.e. exactly one `` + one + // ``, not a duplicated or extra entry. + const questions = result.conversation.filter((e) => e.kind === "question"); + expect(questions).toHaveLength(1); + const q = questions[0]!; + expect(q.kind).toBe("question"); + if (q.kind === "question") { + expect(q.status).toBe("resolved"); // the watcher flipped it after firing + expect(q.answer?.body).toBe("Go with A."); + } + expect(result.conversation).toHaveLength(1); + const countOf = (needle: string): number => result.rawEpicText.split(needle).length - 1; + expect(countOf("", + "# feat: live-smoke verification probe", + "", + "## meta", + `slug: ${slug}`, + "adapter: claude", + "", + "## context", + "Throwaway Epic authored by `mm verify-file-mode --live` to prove the", + "file-mode dispatch loop opens a real PR end to end. Safe to delete.", + "", + "## acceptance criteria", + "- [ ] a draft PR opens for this Epic", + "", + "## sub-issues", + "", + "- [ ] **1 — touch a probe file** Create `verify-live-probe.txt` with any content, open the draft PR, and ask the operator to confirm before finishing.", + "", + "", + "## conversation", + "", + ].join("\n"); + +const ANSWER_TEXT = "Confirmed — finish the sub-issue and leave the PR as a draft."; + +async function gh(args: string[]): Promise<{ ok: boolean; stdout: string; stderr: string }> { + const proc = Bun.spawn(["gh", ...args], { stdout: "pipe", stderr: "pipe", stdin: "ignore" }); + const [stdout, stderr] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + ]); + return { ok: (await proc.exited) === 0, stdout, stderr }; +} + +async function git(cwd: string, args: string[]): Promise { + const proc = Bun.spawn(["git", "-C", cwd, ...args], { stdout: "ignore", stderr: "pipe" }); + if ((await proc.exited) !== 0) { + throw new Error(`git ${args.join(" ")}: ${(await new Response(proc.stderr).text()).trim()}`); + } +} + +/** + * The real GitHub/daemon/git IO. Operator-run — this boundary is what `bun test` + * cannot exercise (real repo, real agent, real tokens). The recorded one-shot run + * against the designated test repo is the evidence; the orchestration above is + * what CI proves. + */ +export function makeLiveSmokeIO(cfg: { repo: string; repoPath: string }): LiveSmokeIO { + const { repo, repoPath } = cfg; + const stamp = Date.now(); + const slug = `verify-smoke-${stamp}`; + const branch = `middle-smoke-${stamp}`; + const epicRelPath = `planning/epics/${slug}.md`; + const log = (line: string): void => console.log(`mm verify-file-mode --live: ${line}`); + const prUrl = (n: number): string => `https://github.com/${repo}/pull/${n}`; + + return { + log, + async authorEpic() { + const { writeFileSync, mkdirSync } = await import("node:fs"); + const { join, dirname } = await import("node:path"); + const abs = join(repoPath, epicRelPath); + mkdirSync(dirname(abs), { recursive: true }); + writeFileSync(abs, EPIC_BODY(slug)); + await git(repoPath, ["checkout", "-b", branch]); + await git(repoPath, ["add", epicRelPath]); + await git(repoPath, ["commit", "-m", `chore: live-smoke Epic ${slug}`]); + await git(repoPath, ["push", "-u", "origin", branch]); + return { slug, branch, branchUrl: `https://github.com/${repo}/tree/${branch}` }; + }, + async dispatch(s) { + // runDispatch returns 0 when the workflow completes or parks; infer which by + // re-reading the Epic file for an open question (the file-mode park trace). + const code = await runDispatch(repoPath, s, {}); + if (code !== 0) return "failed"; + return (await hasOpenQuestion(repoPath, s)) ? "waiting-human" : "completed"; + }, + async answerQuestion(s) { + await fillAnswerBlock(repoPath, s, ANSWER_TEXT); + await git(repoPath, ["add", epicRelPath]); + await git(repoPath, ["commit", "-m", `chore: answer live-smoke question ${s}`]); + await git(repoPath, ["push"]); + }, + async awaitResume(s) { + // The daemon's file-watcher polls on its cron; poll the PR until the + // sub-issue checkbox flips (or a generous deadline passes). + const deadline = Date.now() + 15 * 60_000; + while (Date.now() < deadline) { + const pr = await this.findEpicPr(s); + if (pr && (await this.isSubIssueChecked(s, pr, 1))) return; + await Bun.sleep(10_000); + } + }, + async findEpicPr(s) { + const { ghGitHub } = await import("@middle/dispatcher/src/github.ts"); + const pr = await ghGitHub.findEpicPr(repo, s); + return pr ? { number: pr.number, isDraft: pr.isDraft, url: prUrl(pr.number) } : null; + }, + async isSubIssueChecked(s, pr, id) { + // Read the Epic file at the PR head and parse the sub-issue's checkbox. + const headRes = await gh([ + "pr", + "view", + String(pr.number), + "--repo", + repo, + "--json", + "headRefName", + "--jq", + ".headRefName", + ]); + const ref = headRes.stdout.trim(); + const fileRes = await gh([ + "api", + `repos/${repo}/contents/${epicRelPath}?ref=${ref}`, + "--jq", + ".content", + ]); + if (!fileRes.ok) return false; + const text = Buffer.from(fileRes.stdout.trim(), "base64").toString("utf8"); + const { parseEpicFile } = + await import("@middle/dispatcher/src/epic-store/epic-file/parser.ts"); + const epic = parseEpicFile(text); + return epic.subIssues.find((sub) => sub.id === id)?.checked === true; + }, + async cleanup(_s, b, pr) { + if (pr) await gh(["pr", "close", String(pr.number), "--repo", repo, "--delete-branch"]); + // Drop the local probe branch + the authored branch (best-effort). + await git(repoPath, ["checkout", "-"]).catch(() => {}); + await git(repoPath, ["branch", "-D", b]).catch(() => {}); + }, + }; +} + +/** Does the Epic file carry an open question? (the file-mode park trace). */ +async function hasOpenQuestion(repoPath: string, slug: string): Promise { + const { readEpicFile } = await import("@middle/dispatcher/src/epic-store/epic-file-io.ts"); + const { join } = await import("node:path"); + const epic = readEpicFile(join(repoPath, "planning", "epics"), slug); + return (epic?.conversation ?? []).some((e) => e.kind === "question" && e.status === "open"); +} + +/** Fill the open question's answer block on disk (the human-edit the watcher detects). */ +async function fillAnswerBlock(repoPath: string, slug: string, answer: string): Promise { + const { readEpicFile, writeEpicFile } = + await import("@middle/dispatcher/src/epic-store/epic-file-io.ts"); + const { join } = await import("node:path"); + const epicsDir = join(repoPath, "planning", "epics"); + const epic = readEpicFile(epicsDir, slug); + if (!epic) throw new Error(`no Epic file for ${slug} to answer`); + writeEpicFile(epicsDir, slug, { + ...epic, + conversation: epic.conversation.map((e) => + e.kind === "question" && e.status === "open" ? { ...e, answer: { body: answer } } : e, + ), + }); +} diff --git a/packages/cli/src/commands/verify-file-mode.ts b/packages/cli/src/commands/verify-file-mode.ts index 0b2231b..f0c79d6 100644 --- a/packages/cli/src/commands/verify-file-mode.ts +++ b/packages/cli/src/commands/verify-file-mode.ts @@ -44,12 +44,26 @@ export function printSmokeReport( return 1; } +/** Options for {@link runVerifyFileMode}. */ +export type VerifyFileModeOptions = { + /** Run the real-GitHub smoke instead of the in-tmpdir integration fixture. */ + live?: boolean; + /** `owner/name` of the designated test repo (required with `--live`). */ + repo?: string; + /** Local checkout of the test repo for `--live` (defaults to cwd). */ + repoPath?: string; +}; + /** - * Entry point for `mm verify-file-mode`. Runs the in-tmpdir integration fixture - * and prints the structured report. Returns a process exit code (0 green / 1 - * failed). The `--live` real-GitHub path is added by the sibling sub-issue. + * Entry point for `mm verify-file-mode`. The default path runs the in-tmpdir + * integration fixture and prints the structured report; `--live` delegates to the + * real-GitHub smoke. Returns a process exit code (0 green / 1 failed). */ -export async function runVerifyFileMode(): Promise { +export async function runVerifyFileMode(opts: VerifyFileModeOptions = {}): Promise { + if (opts.live) { + const { runVerifyFileModeLive } = await import("./verify-file-mode-live.ts"); + return runVerifyFileModeLive({ repo: opts.repo, repoPath: opts.repoPath }); + } const result = await runFileModeSmoke(); return printSmokeReport(result); } diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 60577a0..514b379 100755 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -261,7 +261,18 @@ program .description( "Verify file mode end-to-end: drive the real file-mode workflow over a throwaway fixture and print a structured report", ) - .action(async () => process.exit(await runVerifyFileMode())); + .option("--live", "run the real-GitHub smoke against a designated test repo (needs --repo)") + .option("--repo ", "the throwaway test repo for --live") + .option("--repo-path ", "local checkout of the --live test repo (defaults to cwd)") + .action(async (options: { live?: boolean; repo?: string; repoPath?: string }) => + process.exit( + await runVerifyFileMode({ + live: options.live, + repo: options.repo, + repoPath: options.repoPath, + }), + ), + ); program .command("version") diff --git a/packages/cli/test/verify-file-mode-live.test.ts b/packages/cli/test/verify-file-mode-live.test.ts new file mode 100644 index 0000000..187bff1 --- /dev/null +++ b/packages/cli/test/verify-file-mode-live.test.ts @@ -0,0 +1,150 @@ +/** + * Plumbing (#214): the `mm verify-file-mode --live` orchestration, driven against + * an injected fake {@link LiveSmokeIO}. The real-GitHub *evidence run* is the + * operator step the Epic acknowledges a headless run can't perform (it needs a + * real repo + a real agent); this proves the control flow deterministically — + * arg validation, the park → answer → resume detour, the draft-PR + checkbox + * assertions, cleanup-on-success, and leave-artifacts-on-failure. + */ + +import { describe, expect, test } from "bun:test"; +import { + runLiveSmoke, + runVerifyFileModeLive, + type LivePr, + type LiveSmokeIO, + type SettledState, +} from "../src/commands/verify-file-mode-live.ts"; + +type Trace = { calls: string[]; lines: string[] }; + +/** Scripted return values; every base method still records its call into the trace. */ +type Script = { + settled?: SettledState; + pr?: LivePr | null; + checked?: boolean; +}; + +/** Build a fake IO + a trace from scripted return values (so recording is never lost). */ +function fakeIO(script: Script = {}): { io: LiveSmokeIO; trace: Trace } { + const trace: Trace = { calls: [], lines: [] }; + const defaultPr: LivePr = { number: 42, isDraft: true, url: "https://github.com/o/r/pull/42" }; + const pr = script.pr === undefined ? defaultPr : script.pr; + const io: LiveSmokeIO = { + log: (l) => trace.lines.push(l), + authorEpic: async () => { + trace.calls.push("authorEpic"); + return { + slug: "verify-smoke-1", + branch: "middle-smoke-1", + branchUrl: "https://github.com/o/r/tree/middle-smoke-1", + }; + }, + dispatch: async () => { + trace.calls.push("dispatch"); + return script.settled ?? "completed"; + }, + answerQuestion: async () => { + trace.calls.push("answerQuestion"); + }, + awaitResume: async () => { + trace.calls.push("awaitResume"); + }, + findEpicPr: async () => { + trace.calls.push("findEpicPr"); + return pr; + }, + isSubIssueChecked: async () => { + trace.calls.push("isSubIssueChecked"); + return script.checked ?? true; + }, + cleanup: async () => { + trace.calls.push("cleanup"); + }, + }; + return { io, trace }; +} + +describe("runLiveSmoke orchestration", () => { + test("happy path with no park → asserts PR + checkbox, cleans up, exit 0", async () => { + const { io, trace } = fakeIO(); + const code = await runLiveSmoke(io); + expect(code).toBe(0); + // No park → no answer/resume detour. + expect(trace.calls).toEqual([ + "authorEpic", + "dispatch", + "findEpicPr", + "isSubIssueChecked", + "cleanup", + ]); + expect(trace.lines.at(-1)).toBe("cleaned up the test branch + PR."); + }); + + test("park path → answers, awaits resume, then asserts + cleans up, exit 0", async () => { + const { io, trace } = fakeIO({ settled: "waiting-human" }); + const code = await runLiveSmoke(io); + expect(code).toBe(0); + expect(trace.calls).toEqual([ + "authorEpic", + "dispatch", + "answerQuestion", + "awaitResume", + "findEpicPr", + "isSubIssueChecked", + "cleanup", + ]); + }); + + test("dispatch failed → leaves the branch, exit 1, no PR checks", async () => { + const { io, trace } = fakeIO({ settled: "failed" }); + const code = await runLiveSmoke(io); + expect(code).toBe(1); + expect(trace.calls).toEqual(["authorEpic", "dispatch"]); + expect(trace.calls).not.toContain("cleanup"); + expect(trace.lines.at(-1)).toContain("tree/middle-smoke-1"); + }); + + test("no draft PR → leaves the branch URL, exit 1, no cleanup", async () => { + const { io, trace } = fakeIO({ pr: null }); + const code = await runLiveSmoke(io); + expect(code).toBe(1); + expect(trace.calls).not.toContain("cleanup"); + expect(trace.lines.at(-1)).toContain("no draft PR"); + }); + + test("PR exists but not a draft → leaves the PR URL, exit 1, no cleanup", async () => { + const { io, trace } = fakeIO({ + pr: { number: 7, isDraft: false, url: "https://github.com/o/r/pull/7" }, + }); + const code = await runLiveSmoke(io); + expect(code).toBe(1); + expect(trace.calls).not.toContain("cleanup"); + expect(trace.lines.at(-1)).toContain("pull/7"); + }); + + test("checkbox not flipped → leaves the PR for inspection, exit 1, no cleanup", async () => { + const { io, trace } = fakeIO({ checked: false }); + const code = await runLiveSmoke(io); + expect(code).toBe(1); + expect(trace.calls).not.toContain("cleanup"); + expect(trace.lines.at(-1)).toContain("checkbox not flipped"); + }); +}); + +describe("runVerifyFileModeLive arg validation", () => { + test("rejects a missing --repo", async () => { + expect(await runVerifyFileModeLive({})).toBe(1); + }); + + test("rejects a non-owner/name --repo", async () => { + expect(await runVerifyFileModeLive({ repo: "not-a-slug" })).toBe(1); + }); + + test("accepts owner/name and runs the injected IO", async () => { + const { io, trace } = fakeIO(); + const code = await runVerifyFileModeLive({ repo: "o/r", io }); + expect(code).toBe(0); + expect(trace.calls[0]).toBe("authorEpic"); + }); +}); From 0428d6099f637ba4319cfc88d2ab6295d997d8e8 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 4 Jun 2026 02:49:09 -0400 Subject: [PATCH 5/8] =?UTF-8?q?docs(verify):=20live-smoke=20verification?= =?UTF-8?q?=20=E2=80=94=20what=20it=20proves,=20when=20to=20run,=20failure?= =?UTF-8?q?-reading?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #215. Adds a 'Live-smoke verification' how-to to docs/dogfooding.md (what mm verify-file-mode covers, what --live adds, when to run each, reading the per-section structured output, and the one-time test-repo setup); cross-links it from docs/operator.md (health-check section + command table) and README.md's setup steps. docs-cross-link.test.ts boots mm verify-file-mode --help (exit 0) and asserts every 'mm ' in dogfooding.md resolves to a registered command in the CLI entry. --- README.md | 5 ++ docs/dogfooding.md | 54 ++++++++++++++++++++++ docs/operator.md | 3 ++ packages/cli/test/docs-cross-link.test.ts | 56 +++++++++++++++++++++++ 4 files changed, 118 insertions(+) create mode 100644 packages/cli/test/docs-cross-link.test.ts diff --git a/README.md b/README.md index 3e4aa53..7ab278a 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,13 @@ cd packages/cli && bun link && cd ../.. # "did you get that memo?" — verify bun / tmux / claude / git / gh + gh auth mm doctor + +# verify everything works — drive the file-mode dispatch loop end to end +mm verify-file-mode ``` +`mm doctor` checks your toolchain; `mm verify-file-mode` proves the dispatch loop itself runs end to end. See [Live-smoke verification](docs/dogfooding.md#live-smoke-verification) for what it covers and the opt-in `--live` real-GitHub smoke. + Configuration is optional — middle ships with working defaults. To override, drop a `~/.middle/config.toml` (defaults shown): ```toml diff --git a/docs/dogfooding.md b/docs/dogfooding.md index 8073f32..8290536 100644 --- a/docs/dogfooding.md +++ b/docs/dogfooding.md @@ -86,3 +86,57 @@ dispatch needs: `mm init` is idempotent: a re-run with a matching `bootstrap.version` refreshes skills/hooks but keeps the config and the existing state issue. + +## Live-smoke verification + +`mm verify-file-mode` proves the file-mode dispatch loop works end to end on your +machine. Run it after install and after any merge that touches the dispatcher, +the file gateways, the worktree machinery, or the Epic-file parser/renderer. + +`mm verify-file-mode` (no flags) drives the **real** workflow over a throwaway +tmpdir repo: it authors a `epic_store="file"` Epic, dispatches it, parks it on a +question, answers via a file edit, resumes through the real file-watcher, and +checks the run reaches `completed` with the sub-issue checkbox flipped. It stubs +only the GitHub PR/comment boundary, so it needs no daemon, no `gh`, and no +network. This is the same drive CI runs on every commit to `main`. + +`mm verify-file-mode --live --repo ` runs that loop against **real +GitHub**: it authors an Epic on a fresh branch, dispatches a real agent, answers +any park, and asserts a draft PR opened with the sub-issue checkbox flipped. It +spends real tokens and minutes of wall-clock, so it is opt-in — run it after a +major merge, not on every commit. It is not in CI by design. + +```bash +mm verify-file-mode # the local integration smoke (post-install) +mm verify-file-mode --live --repo you/middle-smoketest # the real-GitHub smoke (post-major-merge) +``` + +### Read a failure + +Both modes print one line per phase — `init` → `author` → `dispatch` → `park` → +`answer` → `resume` → `complete` — each marked `PASS` or `FAIL` with its +wall-time, then a verdict line. On success the last line is `all sections pass.`; +on failure it is `FAIL:
`, so the failing phase is the last +thing printed. The section that flips to `FAIL` tells you which seam broke: a +`dispatch` failure is the engine or worktree, `resume` is the file-watcher, +`complete` is the terminal finalize. + +`--live` exits 0 only after it cleans up the test branch and PR. On failure it +**leaves** the branch and PR intact and prints their URLs — inspect those +artifacts, then delete them by hand once you have diagnosed the break. + +### Set up a designated test repo for `--live` + +`--live` needs a throwaway GitHub repo you can let an agent open PRs against. Set +one up once: + +1. Create an empty repo, e.g. `you/middle-smoketest`, and clone it locally. +2. Bootstrap it in file mode: `mm init --epic-store=file`. This stamps the + skills and hooks and registers the repo with the daemon in file mode (Epics + live in `planning/epics/`, not GitHub issues). +3. Confirm the install: `mm doctor` from the checkout reports the file-mode Epic + directory. + +Then run `mm verify-file-mode --live --repo you/middle-smoketest --repo-path ` +(`--repo-path` defaults to the current directory). The command authors, +dispatches, and cleans up its own throwaway Epic each run. diff --git a/docs/operator.md b/docs/operator.md index eb2bae6..ed6dcf0 100644 --- a/docs/operator.md +++ b/docs/operator.md @@ -75,6 +75,8 @@ mm doctor --fix # also append the bun PATH export to ~/.zshrc / ~/.bashrc Each check is pass (`✓`), warn (`!`), or fail (`✗`). Warnings mean degraded-but-functional; the command exits non-zero only on a failure. +`mm doctor` checks your *toolchain*; `mm verify-file-mode` checks the *file-mode dispatch loop* end to end. Run it after install and after a major merge — see [Live-smoke verification](dogfooding.md#live-smoke-verification) for what it covers, when to run `--live`, and how to read a failure. + ## Back up and restore state middle's SQLite database holds operational bookkeeping — workflow rows, the event log, rate-limit state. GitHub holds the work itself (issues, sub-issues, PRs), so a backup captures middle's state, never GitHub's. @@ -115,6 +117,7 @@ Retention touches only middle's SQLite. `mm doctor`'s `database` line reports th | `mm stop` | Stop the dispatcher | | `mm status` | One-screen summary of repos and workflow states | | `mm doctor [--fix]` | Full health check | +| `mm verify-file-mode [--live --repo ]` | Verify the file-mode dispatch loop end to end (`--live` runs against real GitHub) | | `mm dispatch ` | Force-dispatch an Epic (or standalone issue) | | `mm run-recommender ` | Rank the backlog now (rewrites the state issue) | | `mm pause ` / `mm resume ` | Pause / resume auto-dispatch for a repo | diff --git a/packages/cli/test/docs-cross-link.test.ts b/packages/cli/test/docs-cross-link.test.ts new file mode 100644 index 0000000..e6591bb --- /dev/null +++ b/packages/cli/test/docs-cross-link.test.ts @@ -0,0 +1,56 @@ +/** + * Docs cross-link guard (#215): keeps `docs/dogfooding.md` honest against the + * real CLI. Every `mm ` the dogfooding guide names must resolve to a + * command registered in `packages/cli/src/index.ts`, and `mm verify-file-mode + * --help` (the command the new "Live-smoke verification" section documents) must + * boot and exit 0 — so a renamed or dropped command can't leave the guide citing + * a command that no longer exists. + */ + +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const REPO_ROOT = join(import.meta.dir, "..", "..", ".."); +const CLI = join(import.meta.dir, "..", "src", "index.ts"); +const DOGFOODING = join(REPO_ROOT, "docs", "dogfooding.md"); + +/** Commands registered via `.command("name")` in the CLI entry. */ +function registeredCommands(): Set { + const src = readFileSync(CLI, "utf8"); + const out = new Set(); + for (const m of src.matchAll(/\.command\("([a-z][a-z-]*)"\)/g)) out.add(m[1]!); + return out; +} + +/** `mm ` tokens mentioned in a doc (the first word after `mm`). */ +function mentionedCommands(docPath: string): string[] { + const text = readFileSync(docPath, "utf8"); + const out = new Set(); + for (const m of text.matchAll(/\bmm ([a-z][a-z-]*[a-z])\b/g)) out.add(m[1]!); + return [...out]; +} + +describe("docs/dogfooding.md cross-links", () => { + test("every `mm ` mentioned resolves to a registered command", () => { + const registered = registeredCommands(); + expect(registered.has("verify-file-mode")).toBe(true); // guards the parser itself + const mentioned = mentionedCommands(DOGFOODING); + expect(mentioned.length).toBeGreaterThan(0); + const unknown = mentioned.filter((c) => !registered.has(c)); + expect(unknown).toEqual([]); + // The section the guide adds names verify-file-mode by exact string. + expect(readFileSync(DOGFOODING, "utf8")).toContain("mm verify-file-mode"); + }); + + test("mm verify-file-mode --help boots and exits 0", async () => { + const proc = Bun.spawn(["bun", CLI, "verify-file-mode", "--help"], { + stdout: "pipe", + stderr: "pipe", + stdin: "ignore", + }); + const stdout = await new Response(proc.stdout).text(); + expect(await proc.exited).toBe(0); + expect(stdout).toContain("verify-file-mode"); + }); +}); From 4a0065ef963dbb7c60bd233f4b91c385fbe7dd3b Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 4 Jun 2026 02:55:42 -0400 Subject: [PATCH 6/8] fix(cli): self-review hardening of the --live smoke + docs guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review (be-my-own-CodeRabbit) over the branch diff: - verify-file-mode-live: the production findEpicPr matched the Epic's PR via ghGitHub.findEpicPr, which throws on a file-mode slug (it requires a numeric ref) and matches by 'closes #N' a file-mode Epic lacks — so the operator path could never reach the PR/checkbox checks. Find the PR by the dispatch's head branch (middle-issue-) instead, and read the sub-issue checkbox at that ref directly. Drop the needless push of the local seed branch (the daemon dispatches against the local checkout) and log the awaitResume timeout. - docs-cross-link: anchor the 'mm ' matcher to line-start/inline-code so prose ('mm then …') can't trip it, and tolerate inline-arg .command() forms. --- .../adapters/copilot/test/adapter.test.ts | 4 +- .../cli/src/commands/verify-file-mode-live.ts | 62 +++++++++++-------- packages/cli/test/docs-cross-link.test.ts | 16 +++-- 3 files changed, 52 insertions(+), 30 deletions(-) diff --git a/packages/adapters/copilot/test/adapter.test.ts b/packages/adapters/copilot/test/adapter.test.ts index 45d2abe..43de950 100644 --- a/packages/adapters/copilot/test/adapter.test.ts +++ b/packages/adapters/copilot/test/adapter.test.ts @@ -368,7 +368,9 @@ describe("classifyStop", () => { writeFileSync(join(middle, "done.json"), JSON.stringify({ pr: 207 })); writeFileSync( transcript, - ev("assistant.message", "2026-06-04T12:30:00.000Z", { content: "Error 429: Too Many Requests" }), + ev("assistant.message", "2026-06-04T12:30:00.000Z", { + content: "Error 429: Too Many Requests", + }), ); const result = copilotAdapter.classifyStop({ payload: {}, diff --git a/packages/cli/src/commands/verify-file-mode-live.ts b/packages/cli/src/commands/verify-file-mode-live.ts index 37cdadb..48089d5 100644 --- a/packages/cli/src/commands/verify-file-mode-live.ts +++ b/packages/cli/src/commands/verify-file-mode-live.ts @@ -38,7 +38,7 @@ export type LiveSmokeIO = { authorEpic: () => Promise<{ slug: string; branch: string; branchUrl: string }>; /** Dispatch the Epic through the daemon and resolve once the row settles. */ dispatch: (slug: string) => Promise; - /** Fill in the open question's answer block on disk + push (the file-mode resume trigger). */ + /** Fill in the open question's answer block on disk (the file-mode resume trigger). */ answerQuestion: (slug: string) => Promise; /** Wait for the daemon's file-watcher resume to drive the sub-issue checkbox to `[x]`. */ awaitResume: (slug: string) => Promise; @@ -179,10 +179,18 @@ export function makeLiveSmokeIO(cfg: { repo: string; repoPath: string }): LiveSm const { repo, repoPath } = cfg; const stamp = Date.now(); const slug = `verify-smoke-${stamp}`; - const branch = `middle-smoke-${stamp}`; + // The branch the daemon's worktree opens its PR from: `middle-`, where + // unit is `issue-` (see worktree.ts `unitName`/`createWorktree`). The + // smoke finds + cleans the PR by this head branch — file-mode Epics have no + // issue number, so the gh `closes #N` finder (ghGitHub.findEpicPr) can't match. + const agentBranch = `middle-issue-${slug}`; + // The local seed branch the Epic file is authored on; never pushed (the daemon + // dispatches against the local checkout, so the Epic only needs to be on disk). + const seedBranch = `middle-smoke-${stamp}`; const epicRelPath = `planning/epics/${slug}.md`; const log = (line: string): void => console.log(`mm verify-file-mode --live: ${line}`); const prUrl = (n: number): string => `https://github.com/${repo}/pull/${n}`; + const branchUrl = `https://github.com/${repo}/tree/${agentBranch}`; return { log, @@ -192,11 +200,12 @@ export function makeLiveSmokeIO(cfg: { repo: string; repoPath: string }): LiveSm const abs = join(repoPath, epicRelPath); mkdirSync(dirname(abs), { recursive: true }); writeFileSync(abs, EPIC_BODY(slug)); - await git(repoPath, ["checkout", "-b", branch]); + // Seed the Epic on a fresh local branch; the daemon's worktree branches off + // this HEAD, so its checkout carries the Epic file. No push needed. + await git(repoPath, ["checkout", "-b", seedBranch]); await git(repoPath, ["add", epicRelPath]); await git(repoPath, ["commit", "-m", `chore: live-smoke Epic ${slug}`]); - await git(repoPath, ["push", "-u", "origin", branch]); - return { slug, branch, branchUrl: `https://github.com/${repo}/tree/${branch}` }; + return { slug, branch: seedBranch, branchUrl }; }, async dispatch(s) { // runDispatch returns 0 when the workflow completes or parks; infer which by @@ -206,10 +215,9 @@ export function makeLiveSmokeIO(cfg: { repo: string; repoPath: string }): LiveSm return (await hasOpenQuestion(repoPath, s)) ? "waiting-human" : "completed"; }, async answerQuestion(s) { + // The human-edit the file-watcher detects: fill the answer block on disk. + // The daemon reads the local checkout, so no push is needed. await fillAnswerBlock(repoPath, s, ANSWER_TEXT); - await git(repoPath, ["add", epicRelPath]); - await git(repoPath, ["commit", "-m", `chore: answer live-smoke question ${s}`]); - await git(repoPath, ["push"]); }, async awaitResume(s) { // The daemon's file-watcher polls on its cron; poll the PR until the @@ -220,29 +228,33 @@ export function makeLiveSmokeIO(cfg: { repo: string; repoPath: string }): LiveSm if (pr && (await this.isSubIssueChecked(s, pr, 1))) return; await Bun.sleep(10_000); } + log(`timed out after 15m waiting for the resume to flip the sub-issue checkbox`); }, - async findEpicPr(s) { - const { ghGitHub } = await import("@middle/dispatcher/src/github.ts"); - const pr = await ghGitHub.findEpicPr(repo, s); - return pr ? { number: pr.number, isDraft: pr.isDraft, url: prUrl(pr.number) } : null; - }, - async isSubIssueChecked(s, pr, id) { - // Read the Epic file at the PR head and parse the sub-issue's checkbox. - const headRes = await gh([ + async findEpicPr() { + // Match by the agent's head branch (file-mode Epics have no issue number). + const res = await gh([ "pr", - "view", - String(pr.number), + "list", "--repo", repo, + "--head", + agentBranch, + "--state", + "open", "--json", - "headRefName", + "number,isDraft", "--jq", - ".headRefName", + ".[0] // empty", ]); - const ref = headRes.stdout.trim(); + if (!res.ok || res.stdout.trim() === "") return null; + const pr = JSON.parse(res.stdout.trim()) as { number: number; isDraft: boolean }; + return { number: pr.number, isDraft: pr.isDraft, url: prUrl(pr.number) }; + }, + async isSubIssueChecked(_s, _pr, id) { + // Read the Epic file at the agent branch head and parse the sub-issue's box. const fileRes = await gh([ "api", - `repos/${repo}/contents/${epicRelPath}?ref=${ref}`, + `repos/${repo}/contents/${epicRelPath}?ref=${agentBranch}`, "--jq", ".content", ]); @@ -253,11 +265,11 @@ export function makeLiveSmokeIO(cfg: { repo: string; repoPath: string }): LiveSm const epic = parseEpicFile(text); return epic.subIssues.find((sub) => sub.id === id)?.checked === true; }, - async cleanup(_s, b, pr) { + async cleanup(_s, _b, pr) { + // Close the agent PR and delete its remote branch; drop the local seed branch. if (pr) await gh(["pr", "close", String(pr.number), "--repo", repo, "--delete-branch"]); - // Drop the local probe branch + the authored branch (best-effort). await git(repoPath, ["checkout", "-"]).catch(() => {}); - await git(repoPath, ["branch", "-D", b]).catch(() => {}); + await git(repoPath, ["branch", "-D", seedBranch]).catch(() => {}); }, }; } diff --git a/packages/cli/test/docs-cross-link.test.ts b/packages/cli/test/docs-cross-link.test.ts index e6591bb..376e30b 100644 --- a/packages/cli/test/docs-cross-link.test.ts +++ b/packages/cli/test/docs-cross-link.test.ts @@ -15,19 +15,27 @@ const REPO_ROOT = join(import.meta.dir, "..", "..", ".."); const CLI = join(import.meta.dir, "..", "src", "index.ts"); const DOGFOODING = join(REPO_ROOT, "docs", "dogfooding.md"); -/** Commands registered via `.command("name")` in the CLI entry. */ +/** + * Commands registered via `.command("name")` in the CLI entry. Captures up to the + * first space or quote so an inline-arg form (`.command("name ")`) still + * resolves to `name`. + */ function registeredCommands(): Set { const src = readFileSync(CLI, "utf8"); const out = new Set(); - for (const m of src.matchAll(/\.command\("([a-z][a-z-]*)"\)/g)) out.add(m[1]!); + for (const m of src.matchAll(/\.command\("([a-z][a-z-]*)/g)) out.add(m[1]!); return out; } -/** `mm ` tokens mentioned in a doc (the first word after `mm`). */ +/** + * `mm ` tokens a doc *runs* — only where `mm ` starts a line (a fenced + * command) or follows an inline-code backtick. Prose like "mm then dispatches" + * is deliberately not matched, so an English sentence can't trip the guard. + */ function mentionedCommands(docPath: string): string[] { const text = readFileSync(docPath, "utf8"); const out = new Set(); - for (const m of text.matchAll(/\bmm ([a-z][a-z-]*[a-z])\b/g)) out.add(m[1]!); + for (const m of text.matchAll(/(?:^|`)\s*mm ([a-z][a-z-]*[a-z])\b/gm)) out.add(m[1]!); return [...out]; } From 82c974654a7ab04bd77e034a2f8fa0297ee9e539 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Fri, 5 Jun 2026 02:38:54 -0400 Subject: [PATCH 7/8] docs(verify): TSDoc SmokeSectionName + normalize issue refs in decisions log Address CodeRabbit review on PR #230: - Add TSDoc for exported SmokeSectionName (public-export doc guideline). - Backtick-wrap bare #212/#213 refs in decisions.md so the wrapped line no longer parses as an ATX heading (markdownlint MD018), matching the file's existing backtick-ref convention. --- packages/dispatcher/src/epic-store/file-mode-smoke.ts | 1 + planning/issues/208/decisions.md | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/dispatcher/src/epic-store/file-mode-smoke.ts b/packages/dispatcher/src/epic-store/file-mode-smoke.ts index cfd7e0b..fe3e4e0 100644 --- a/packages/dispatcher/src/epic-store/file-mode-smoke.ts +++ b/packages/dispatcher/src/epic-store/file-mode-smoke.ts @@ -49,6 +49,7 @@ export const SMOKE_SECTIONS = [ "resume", "complete", ] as const; +/** A smoke section identifier — one of {@link SMOKE_SECTIONS}; orders the drive and names a report line. */ export type SmokeSectionName = (typeof SMOKE_SECTIONS)[number]; /** One phase's result: did it pass, how long it took, and a one-line detail. */ diff --git a/planning/issues/208/decisions.md b/planning/issues/208/decisions.md index 47f33d6..d8e6d33 100644 --- a/planning/issues/208/decisions.md +++ b/planning/issues/208/decisions.md @@ -5,9 +5,9 @@ **Date:** 2026-06-04 **Decision:** Put the smoke drive in a single runner in the dispatcher package, -returning structured per-section results, and have both #212's `bun test` and -#213's `mm verify-file-mode` call it. -**Why:** #213's AC explicitly allows "delegating to the integration fixture from +returning structured per-section results, and have both `#212`'s `bun test` and +`#213`'s `mm verify-file-mode` call it. +**Why:** `#213`'s AC explicitly allows "delegating to the integration fixture from the sibling sub-issue". A second hand-rolled drive would be a parity hazard — the exact failure the Epic exists to prevent. The runner lives in the dispatcher (not the CLI) because it depends on dispatcher internals (`Engine`, @@ -24,7 +24,7 @@ both drive the real workflow; this consolidates that drive into a reusable seam. block to the Epic file (via `writeEpicFile`), then drives `runFileWatcherTick` (the real watcher) to detect it and fire the resume — rather than calling `engine.signal(RESUME_EVENT)` directly like `parity.test.ts` does. -**Why:** #212's framing is "resume-via-edit"; exercising the real watcher proves +**Why:** `#212`'s framing is "resume-via-edit"; exercising the real watcher proves the file-mode resume path end to end (mtime poll → open-question-with-answer detection → `fireSignal` → flip to `resolved`), which is exactly the seam the live gap left unproven. `engine.signal` would skip the watcher entirely. From 48a479428dc83821e76d31454cb0661940d1e82d Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Fri, 5 Jun 2026 02:41:06 -0400 Subject: [PATCH 8/8] docs(verify): docstring the smoke harness's git/gh/EPIC_BODY helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit's docstring-coverage pre-merge check read 76.47% (< 80% threshold). Document the remaining undocumented helpers in the PR's new source — the git runners, the gh runner, and the live Epic-body builder — to clear the class. One-line local closures (log/prUrl) left as-is (YAGNI). --- packages/cli/src/commands/verify-file-mode-live.ts | 3 +++ packages/dispatcher/src/epic-store/file-mode-smoke.ts | 1 + 2 files changed, 4 insertions(+) diff --git a/packages/cli/src/commands/verify-file-mode-live.ts b/packages/cli/src/commands/verify-file-mode-live.ts index 48089d5..4a833f0 100644 --- a/packages/cli/src/commands/verify-file-mode-live.ts +++ b/packages/cli/src/commands/verify-file-mode-live.ts @@ -126,6 +126,7 @@ export async function runVerifyFileModeLive(opts: LiveOptions = {}): Promise [ "", @@ -153,6 +154,7 @@ const EPIC_BODY = (slug: string): string => const ANSWER_TEXT = "Confirmed — finish the sub-issue and leave the PR as a draft."; +/** Run a `gh` subcommand, capturing stdout/stderr; returns `ok` instead of throwing so callers can branch on failure. */ async function gh(args: string[]): Promise<{ ok: boolean; stdout: string; stderr: string }> { const proc = Bun.spawn(["gh", ...args], { stdout: "pipe", stderr: "pipe", stdin: "ignore" }); const [stdout, stderr] = await Promise.all([ @@ -162,6 +164,7 @@ async function gh(args: string[]): Promise<{ ok: boolean; stdout: string; stderr return { ok: (await proc.exited) === 0, stdout, stderr }; } +/** Run a git subcommand in `cwd`; throws with stderr on non-zero exit. */ async function git(cwd: string, args: string[]): Promise { const proc = Bun.spawn(["git", "-C", cwd, ...args], { stdout: "ignore", stderr: "pipe" }); if ((await proc.exited) !== 0) { diff --git a/packages/dispatcher/src/epic-store/file-mode-smoke.ts b/packages/dispatcher/src/epic-store/file-mode-smoke.ts index fe3e4e0..e041743 100644 --- a/packages/dispatcher/src/epic-store/file-mode-smoke.ts +++ b/packages/dispatcher/src/epic-store/file-mode-smoke.ts @@ -104,6 +104,7 @@ const GIT_ENV = { GIT_COMMITTER_EMAIL: "middle-smoke@example.invalid", }; +/** Run a git subcommand in `cwd` with the smoke's fixed identity env; throws with stderr on non-zero exit. */ async function git(cwd: string, args: string[]): Promise { const proc = Bun.spawn(["git", "-C", cwd, ...args], { stdout: "ignore",