From e5fcf788172bdc2330289722ae8d22f3c0954407 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 13:30:58 -0400 Subject: [PATCH 01/23] docs(adapters): implementation plan for the CodexAdapter Epic --- planning/issues/60/plan.md | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 planning/issues/60/plan.md diff --git a/planning/issues/60/plan.md b/planning/issues/60/plan.md new file mode 100644 index 00000000..c7d83db1 --- /dev/null +++ b/planning/issues/60/plan.md @@ -0,0 +1,59 @@ +# Issue #60: CodexAdapter (Epic — Phase 10) + +**Link:** https://github.com/thejustinwalsh/middle/issues/60 +**Branch:** middle-issue-60 + +## Goal +Ship the second `AgentAdapter` (Codex), add per-CLI adapter selection across the +implementer and recommender paths, and prove the adapter abstraction holds across +both adapters — fixing the interface or any leak where it doesn't. + +## Approach +- Mirror the ClaudeAdapter package structure (`index.ts` + `classify.ts` / + `hooks.ts` / `prompt.ts` / `transcript.ts`) for Codex, implementing every + `AgentAdapter` member to the spec's "CodexAdapter specifics". +- Codex's empirically-observed bits (hook event names, transcript location/format, + rate-limit message, force-include syntax, auto-mode mechanism) are implemented as + the spec's start-generous baseline — the interface is designed to absorb tightening + once observed on a live run. Document each assumption in `decisions.md`. +- Replace the two hardcoded `getAdapter` registries and the six "claude-only" gates + with a shared registry + a pure `selectAdapter(...)` that encodes the spec's four + selection rules (label override → default → rate-limit switch → skip). +- Prove the abstraction with an automated test that drives both adapters through the + same workflow path; audit for adapter-specific logic that leaked outside the + adapter packages and fix it (the registry hardcodes are the known leak). + +## Phases (= open sub-issues) +1. **#61 Implement the CodexAdapter** — full `AgentAdapter` impl + unit tests for + `buildLaunchCommand`, `buildPromptText`, `installHooks`, `classifyStop`, + transcript reads, rate-limit detection. +2. **#62 Per-CLI adapter selection** — shared adapter registry; pure `selectAdapter` + with the four rules; wire into manual dispatch + the claude-only gates; recommender + skill prose + state-issue schema already record per-Epic adapter (verify/extend). +3. **#63 Verify the abstraction holds** — automated test exercising both adapters + through one workflow path; leak audit + fix; document interface findings. + +## Files likely to change +- `packages/adapters/codex/src/{index,classify,hooks,prompt,transcript}.ts` — new +- `packages/adapters/codex/test/adapter.test.ts` — new +- `packages/adapters/codex/package.json` — barrel exports +- `packages/core/src/select-adapter.ts` (+ index export) — new `selectAdapter` +- `packages/core/test/select-adapter.test.ts` — new +- `packages/dispatcher/src/main.ts`, `packages/cli/src/commands/docs.ts` — shared registry +- `packages/cli/src/commands/dispatch.ts`, `packages/dispatcher/src/recommender-run.ts`, + `packages/dispatcher/src/documentation-run.ts` — generalize claude-only gates +- `packages/skills/recommending-github-issues/SKILL.md` — adapter-selection prose (already present; verify) +- A cross-adapter workflow test (location TBD — dispatcher test) + +## Out of scope +- Applying `agent:` labels (a human action) +- The universal `hook.sh` (Phase 2, reused as-is) +- Tightening Codex regexes/event-names against a live run (future, once observed) + +## Open questions +- The Epic headline criterion + #63's first criterion require a **live** dual-dispatch + on a test repo with both CLIs in interactive tmux. Codex CLI is **not installed** in + this sandbox and there are no OpenAI credentials, so the live run is an operator step. + Everything else (adapter impl, selection logic, the automated same-path cross-adapter + test, the leak audit) is delivered and verified here; the live run is surfaced in the + reviewer's brief as the operator-executed acceptance step. From 67ece1c436d710972149cd8a50a311b02c016a16 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 13:38:22 -0400 Subject: [PATCH 02/23] feat(adapters): implement the CodexAdapter Full AgentAdapter for the Codex CLI mirroring ClaudeAdapter: - buildLaunchCommand: interactive `codex` (no exec); auto mode + sandbox in .codex/config.toml, not the command line - installHooks: writes .codex/config.toml with approval_policy/sandbox and a [hooks] block mapping Codex's event names (startup/turn-start/command/ turn-end/shutdown) to the normalized taxonomy; reuses the shared hook.sh + pr-ready-gate.sh - buildPromptText: parity with Claude (slash-command + @-reference) - enterAutoMode: config-driven, so only fails fast on a not-logged-in pane - resolveTranscriptPath/readTranscriptState: rollout JSONL location + format - classifyStop/detectRateLimit: shared sentinel logic + Codex's generous rate-limit pattern (/rate.?limit|429|too many requests/i) Codex's observable bits are coded as the spec's start-generous baseline behind the interface; documented as tightening points in decisions.md. 36 unit tests cover buildLaunchCommand, buildPromptText, installHooks output shape, classifyStop branches, transcript reads, and rate-limit detection. --- packages/adapters/codex/package.json | 3 + packages/adapters/codex/src/classify.ts | 105 ++++ packages/adapters/codex/src/hooks.ts | 87 ++++ packages/adapters/codex/src/index.ts | 109 +++- packages/adapters/codex/src/prompt.ts | 33 ++ packages/adapters/codex/src/transcript.ts | 77 +++ packages/adapters/codex/test/adapter.test.ts | 493 +++++++++++++++++++ planning/issues/60/decisions.md | 55 +++ 8 files changed, 956 insertions(+), 6 deletions(-) create mode 100644 packages/adapters/codex/src/classify.ts create mode 100644 packages/adapters/codex/src/hooks.ts create mode 100644 packages/adapters/codex/src/prompt.ts create mode 100644 packages/adapters/codex/src/transcript.ts create mode 100644 packages/adapters/codex/test/adapter.test.ts create mode 100644 planning/issues/60/decisions.md diff --git a/packages/adapters/codex/package.json b/packages/adapters/codex/package.json index dfbf3956..d9204791 100644 --- a/packages/adapters/codex/package.json +++ b/packages/adapters/codex/package.json @@ -6,5 +6,8 @@ "main": "src/index.ts", "dependencies": { "@middle/core": "workspace:*" + }, + "devDependencies": { + "smol-toml": "^1.6.1" } } diff --git a/packages/adapters/codex/src/classify.ts b/packages/adapters/codex/src/classify.ts new file mode 100644 index 00000000..ec42923c --- /dev/null +++ b/packages/adapters/codex/src/classify.ts @@ -0,0 +1,105 @@ +import { existsSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import type { + BlockedSentinel, + HookPayload, + RateLimitDetection, + StopClassification, +} from "@middle/core"; + +/** + * Codex's rate-limit signal. The spec's deliberately generous starting pattern + * (`/rate.?limit|429|too many requests/i`), to be tightened as Codex's real + * usage-limit messages are observed on live runs. Unlike Claude's, it carries no + * reset timestamp — Codex's message format hasn't surfaced one, so the + * classification defaults `resetAt` to "unknown" rather than inventing a time. + */ +const RATE_LIMIT_RE = /rate.?limit|429|too many requests/i; + +/** + * Classify the agent's state at a turn-end (`Stop`) hook. The sentinel logic is + * identical to Claude's — the `.middle/{blocked,done,failed}.json` files are + * written by the universal skill, not the CLI, so their resolution is + * adapter-agnostic. Only the rate-limit detection differs (Codex's pattern). + * All sentinel paths anchor at `/.middle/`, never `payload.cwd` (the + * agent may have `cd`'d into a subdirectory). + */ +export function classifyStop(opts: { + payload: HookPayload; + transcriptPath: string; + sentinelPresent: boolean; + worktree: string; +}): StopClassification { + const middleDir = join(opts.worktree, ".middle"); + + if (opts.sentinelPresent) { + const sentinelPath = join(middleDir, "blocked.json"); + return { kind: "asked-question", sentinelPath, sentinel: readBlockedSentinel(sentinelPath) }; + } + + if (RATE_LIMIT_RE.test(readTail(opts.transcriptPath))) { + return { kind: "rate-limited", resetAt: "unknown" }; + } + + if (existsSync(join(middleDir, "done.json"))) return { kind: "done" }; + + const failedPath = join(middleDir, "failed.json"); + if (existsSync(failedPath)) { + return { kind: "failed", reason: readFailedReason(failedPath) }; + } + + return { kind: "bare-stop" }; +} + +/** + * The turn-end rate-limit detector: the same Codex pattern applied to the + * transcript tail, independent of `classifyStop`'s ordering so the dispatcher + * can update `rate_limit_state` on every stop even when the classification is a + * higher-priority kind. Returns null when no rate-limit signal is present. + */ +export function detectRateLimit(opts: { + payload: HookPayload; + transcriptPath: string; +}): RateLimitDetection | null { + if (!RATE_LIMIT_RE.test(readTail(opts.transcriptPath))) return null; + return { resetAt: "unknown", source: "stop-hook" }; +} + +function readTail(path: string): string { + try { + const raw = readFileSync(path, "utf8"); + return raw.length > 8192 ? raw.slice(-8192) : raw; + } catch { + return ""; + } +} + +/** + * Read and tolerantly parse the `.middle/blocked.json` question sentinel. Returns + * `null` when the file is missing, unreadable, not JSON, or carries no string + * `question`; the Stop is still classified `asked-question` (the sentinel's + * presence is the signal), the contents are best-effort. + */ +function readBlockedSentinel(path: string): BlockedSentinel | null { + try { + const parsed = JSON.parse(readFileSync(path, "utf8")) as Record; + if (typeof parsed.question !== "string" || parsed.question.length === 0) return null; + const context = typeof parsed.context === "string" ? parsed.context : undefined; + const kind = parsed.kind === "complexity" ? "complexity" : undefined; + const out: BlockedSentinel = { question: parsed.question }; + if (context !== undefined) out.context = context; + if (kind !== undefined) out.kind = kind; + return out; + } catch { + return null; + } +} + +function readFailedReason(path: string): string { + try { + const parsed = JSON.parse(readFileSync(path, "utf8")) as { reason?: unknown }; + return typeof parsed.reason === "string" ? parsed.reason : "agent reported failure"; + } catch { + return "agent reported failure"; + } +} diff --git a/packages/adapters/codex/src/hooks.ts b/packages/adapters/codex/src/hooks.ts new file mode 100644 index 00000000..c41f177d --- /dev/null +++ b/packages/adapters/codex/src/hooks.ts @@ -0,0 +1,87 @@ +import { chmod, mkdir } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import type { InstallHookOpts, NormalizedEvent } from "@middle/core"; +import { HOOK_SH, PR_READY_GATE_SH } from "@middle/core"; + +/** + * Map each Codex hook event name to the normalized taxonomy. Names come from the + * build spec's "Normalized event taxonomy" table ("Trigger (Codex)" column): + * `startup`/`turn-start`/`command`/`turn-end`/`shutdown`, with the `command` + * event distinguished into pre / success / failure. Codex has no equivalent of + * Claude's `Notification` or `SubagentStop`, so `agent.notification` / + * `agent.subagent-stopped` are not emitted. + * + * Two entries are load-bearing for dispatch: `startup → session.started` + * (carries the rollout path, triggers launch→drive) and `turn-end → + * agent.stopped` (the turn boundary `classifyStop` reacts to). + */ +const CODEX_EVENT_MAP: ReadonlyArray<[codexEvent: string, normalized: NormalizedEvent]> = [ + ["startup", "session.started"], + ["turn-start", "turn.started"], + ["command", "tool.pre"], + ["command-success", "tool.post"], + ["command-failure", "tool.failed"], + ["turn-end", "agent.stopped"], + ["shutdown", "session.ended"], +]; + +/** Escape a string for a TOML basic (double-quoted) value: backslash + quote. */ +function tomlString(value: string): string { + return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`; +} + +/** + * Write the full Codex hook configuration into the worktree: the universal + * `hook.sh` and the PR-ready gate script (single-sourced from `@middle/core`, + * shared verbatim with the Claude adapter), plus a `.codex/config.toml` that + * sets the auto-mode policy and registers every taxonomy event in a `[hooks]` + * block. + * + * Auto mode lives in config, not the launch command (per spec): `approval_policy + * = "never"` + `sandbox = "workspace-write"` let the session run unattended. + * + * Each hook invokes the script **through `sh`** with an **absolute** path, + * double-quoted — same rationale as the Claude adapter: `sh` runs the file + * regardless of its exec bit (so a missing bit can't wedge the blocking + * command-gate), and the absolute path resolves from whatever subdirectory the + * agent has `cd`'d into. The PR-ready gate is registered as a second hook on the + * `command` (pre) event so it sits alongside the heartbeat, mirroring Claude's + * two PreToolUse hooks; the gate script self-filters to `gh pr ready`. + * + * The exact `[hooks]` schema is a live-run tightening point (see + * `planning/issues/60/decisions.md`); the array-of-tables shape is the baseline. + */ +export async function installHooks(opts: InstallHookOpts): Promise { + const scriptPath = join(opts.worktree, opts.hookScriptPath); + await mkdir(dirname(scriptPath), { recursive: true }); + await Bun.write(scriptPath, HOOK_SH); + await chmod(scriptPath, 0o755); + + const gateScriptPath = join(dirname(scriptPath), "pr-ready-gate.sh"); + await Bun.write(gateScriptPath, PR_READY_GATE_SH); + await chmod(gateScriptPath, 0o755); + + const lines: string[] = [ + "# middle-managed Codex configuration for headless dispatch.", + "# Auto mode: no approval prompts, workspace-write sandbox.", + 'approval_policy = "never"', + 'sandbox = "workspace-write"', + "", + ]; + for (const [codexEvent, normalized] of CODEX_EVENT_MAP) { + lines.push(`[[hooks.${codexEvent}]]`); + lines.push(`command = ${tomlString(`sh "${scriptPath}" ${normalized}`)}`); + lines.push(""); + // The blocking PR-ready gate rides the pre-command event, second so the + // heartbeat stays first (the gate self-filters to `gh pr ready`). + if (codexEvent === "command") { + lines.push(`[[hooks.${codexEvent}]]`); + lines.push(`command = ${tomlString(`sh "${gateScriptPath}"`)}`); + lines.push(""); + } + } + + const codexDir = join(opts.worktree, ".codex"); + await mkdir(codexDir, { recursive: true }); + await Bun.write(join(codexDir, "config.toml"), `${lines.join("\n")}\n`); +} diff --git a/packages/adapters/codex/src/index.ts b/packages/adapters/codex/src/index.ts index 196f3a47..553e3879 100644 --- a/packages/adapters/codex/src/index.ts +++ b/packages/adapters/codex/src/index.ts @@ -2,18 +2,115 @@ * @packageDocumentation * @module @middle/adapter-codex * - * The `AgentAdapter` implementation for the Codex CLI. Stub — source lands in - * build-spec Phase 10. + * The `AgentAdapter` implementation for the Codex CLI: launch command, auto-mode + * confirmation, transcript reads, stop classification, and rate-limit detection. + * Mirrors the Claude adapter; the per-CLI differences (config-driven auto mode, + * the `.codex/config.toml` `[hooks]` block, the rollout transcript format, and + * the rate-limit pattern) are isolated here behind the shared interface. * * Public surface: - * - None yet (stub). + * - `codexAdapter` — the `AgentAdapter` the dispatcher consumes + * - `detectNeedsLogin` — pane probe for a not-authenticated session * * Where things live: - * - `index.ts` — placeholder export until Phase 10. + * - `index.ts` — the adapter object + auto-mode confirmation (`enterAutoMode`) + * - `classify.ts` — stop classification + rate-limit detection + * - `hooks.ts` — `.codex/config.toml` hook installation + * - `prompt.ts` — the launch prompt text + * - `transcript.ts` — rollout-path resolution + state reads * * Gotchas: - * - None. + * - Auto mode is config-driven (`approval_policy = "never"` in `.codex/config.toml`), + * not a launch flag — so `enterAutoMode` sends no keystrokes; it only fails fast + * on a not-logged-in pane. Codex's observable bits (hook names, rollout format, + * rate-limit message, force-include syntax) are start-generous baselines pending + * live observation — see `planning/issues/60/decisions.md`. * * claude-md: false */ -export {}; +import type { AgentAdapter } from "@middle/core"; +import { capturePane } from "@middle/core"; +import { classifyStop, detectRateLimit } from "./classify.ts"; +import { installHooks } from "./hooks.ts"; +import { buildPromptText } from "./prompt.ts"; +import { readTranscriptState, resolveTranscriptPath } from "./transcript.ts"; + +const NEEDS_LOGIN_RE = + /please\s+(?:run\s+)?(?:codex\s+)?(?:login|sign[ -]?in)|not\s+(?:logged\s+in|authenticated|signed\s+in)|set\s+openai_api_key|invalid\s+(?:api\s+key|credentials)/i; + +/** Whether a captured pane shows a "you need to log in" message. */ +export function detectNeedsLogin(paneContent: string): boolean { + return NEEDS_LOGIN_RE.test(paneContent); +} + +/** Short window — covers Codex's boot before the startup hook fires. */ +const BOOT_DETECT_TIMEOUT_MS = 90_000; +const BOOT_POLL_INTERVAL_MS = 200; + +/** + * Confirm the session is ready for auto operation. Unlike Claude — which must + * dismiss folder-trust + bypass dialogs — Codex's auto mode comes entirely from + * `.codex/config.toml` (`approval_policy = "never"`, `sandbox = "workspace-write"`), + * so there are no approval dialogs to answer and no keystrokes to send. This + * method's only job is to fail fast on a not-authenticated pane so a dispatch + * against an unconfigured Codex surfaces a useful error instead of hanging on the + * startup-hook timeout. It returns as soon as the pane looks normal (no login + * prompt), or when the boot window elapses. + * + * NOTE (tightening point): if a live Codex turns out to show a first-run trust / + * onboarding prompt, the keystroke handling for it is added here. + */ +async function enterAutoMode(opts: { sessionName: string }): Promise { + const tag = `codex:${opts.sessionName}`; + const deadline = Date.now() + BOOT_DETECT_TIMEOUT_MS; + + while (Date.now() < deadline) { + const pane = await capturePane(opts.sessionName); + if (pane === null) { + console.error(`[${tag}] enterAutoMode: capture-pane failed (session gone) — stopping`); + return; + } + if (detectNeedsLogin(pane)) { + throw new Error( + "codex is not authenticated — run `codex login` (or set OPENAI_API_KEY) in a normal terminal, then retry the dispatch", + ); + } + // A non-empty, non-login pane means Codex has booted into its prompt; auto + // mode is already in force via config, so there's nothing to send. + if (pane.trim().length > 0) return; + await Bun.sleep(BOOT_POLL_INTERVAL_MS); + } + console.error(`[${tag}] enterAutoMode: boot window (${BOOT_DETECT_TIMEOUT_MS}ms) elapsed`); +} + +/** + * The Codex CLI agent adapter. Implements {@link AgentAdapter} for the + * dispatcher: builds the interactive launch command (`codex`, no `exec`; auto + * mode + sandbox set in `.codex/config.toml`), confirms readiness, reads the + * rollout transcript for stop classification, and detects rate-limit and + * needs-login states. + */ +export const codexAdapter: AgentAdapter = { + name: "codex", + readyEvent: "session.started", + installHooks, + buildLaunchCommand(opts) { + // Interactive — no `exec`, no prompt. approval_policy/sandbox live in + // .codex/config.toml (written by installHooks), not the command line. Env is + // injected by tmux at spawn time. + return { + argv: ["codex"], + env: { + MIDDLE_SESSION: opts.sessionName, + MIDDLE_SESSION_TOKEN: opts.sessionToken, + ...opts.envOverrides, + }, + }; + }, + buildPromptText, + enterAutoMode, + resolveTranscriptPath, + readTranscriptState, + classifyStop, + detectRateLimit, +}; diff --git a/packages/adapters/codex/src/prompt.ts b/packages/adapters/codex/src/prompt.ts new file mode 100644 index 00000000..b039b424 --- /dev/null +++ b/packages/adapters/codex/src/prompt.ts @@ -0,0 +1,33 @@ +import type { BuildPromptOpts } from "@middle/core"; + +/** + * The literal text `send-keys` carries into a Codex tmux session to start or + * continue the agent. Mirrors the Claude adapter's contract — the dispatch model + * (a single one-line submission that both invokes the skill and `@`-references + * the on-disk brief) is adapter-agnostic, and the skills are mirrored into + * `.codex/skills/` at bootstrap so the same slash-command invocation resolves. + * + * - `initial`: a slash command that force-invokes the implementing skill on the + * Epic; the skill reads `.middle/prompt.md` itself. + * - `resume` / `answer`: an `@`-reference that force-includes the on-disk brief. + * - `recommender` / `docs`: force-invokes the repo-level skill with the assembled + * context `@`-referenced. + * + * NOTE (tightening point): Codex's exact skill-invocation + force-include syntax + * is verified on a live run (see `planning/issues/60/decisions.md`). The `@`-path + * reference and slash-command form are the parity baseline. + */ +export function buildPromptText(opts: BuildPromptOpts): string { + switch (opts.kind) { + case "initial": + return `/implementing-github-issues implement #${opts.epicNumber}`; + case "resume": + return `Resuming this workstream — re-read the linked context and continue. @${opts.promptFile}`; + case "answer": + return `A human answered your open question — read the answer and continue. @${opts.promptFile}`; + case "recommender": + return `/recommending-github-issues @${opts.promptFile}`; + case "docs": + return `/documenting-the-repo @${opts.promptFile}`; + } +} diff --git a/packages/adapters/codex/src/transcript.ts b/packages/adapters/codex/src/transcript.ts new file mode 100644 index 00000000..acda5e8a --- /dev/null +++ b/packages/adapters/codex/src/transcript.ts @@ -0,0 +1,77 @@ +import { readFileSync } from "node:fs"; +import type { HookPayload, TranscriptState } from "@middle/core"; + +/** + * Locate Codex's on-disk session transcript (the "rollout" JSONL) from the + * startup-hook payload. Codex's payload key for the rollout path is verified on + * a live run; we accept the likely keys in priority order and throw if none is + * present, so a payload-shape mismatch fails fast at launch→drive rather than + * silently reading nothing. + */ +export function resolveTranscriptPath(payload: HookPayload): string { + for (const key of ["transcript_path", "rollout_path", "session_file", "path"] as const) { + const value = payload[key]; + if (typeof value === "string" && value.length > 0) return value; + } + throw new Error("Codex startup payload has no transcript/rollout path"); +} + +type RolloutLine = { + timestamp?: string; + type?: string; + payload?: { + type?: string; + role?: string; + name?: string; + info?: { total_token_usage?: Record }; + }; +}; + +/** + * Parse Codex's rollout JSONL for activity, turn count, last tool use, and + * context-token usage. Corrupt lines are skipped, not thrown on — the reconciler + * cron is the authoritative reader; this is the fast-path estimate. + * + * The rollout schema (field names, event-type strings) is the live-run tightening + * point. We read tolerantly: + * - `timestamp` on any line advances `lastActivity`. + * - an assistant `message` response item counts as a turn. + * - a `function_call` / `local_shell_call` response item updates `lastToolUse`. + * - a `token_count` event's `total_token_usage` (input + cached) is the context fill. + */ +export function readTranscriptState(transcriptPath: string): TranscriptState { + const raw = readFileSync(transcriptPath, "utf8"); + let lastActivity = ""; + let turnCount = 0; + let lastToolUse: string | null = null; + let contextTokens = 0; + + for (const line of raw.split("\n")) { + const trimmed = line.trim(); + if (trimmed === "") continue; + let entry: RolloutLine; + try { + entry = JSON.parse(trimmed) as RolloutLine; + } catch { + continue; + } + if (typeof entry.timestamp === "string") lastActivity = entry.timestamp; + + const p = entry.payload; + if (!p) continue; + + if (p.type === "message" && p.role === "assistant") turnCount++; + if ( + (p.type === "function_call" || p.type === "local_shell_call") && + typeof p.name === "string" + ) { + lastToolUse = p.name; + } + const usage = p.info?.total_token_usage; + if (usage) { + contextTokens = (usage.input_tokens ?? 0) + (usage.cached_input_tokens ?? 0); + } + } + + return { lastActivity, contextTokens, turnCount, lastToolUse }; +} diff --git a/packages/adapters/codex/test/adapter.test.ts b/packages/adapters/codex/test/adapter.test.ts new file mode 100644 index 00000000..b3a8f781 --- /dev/null +++ b/packages/adapters/codex/test/adapter.test.ts @@ -0,0 +1,493 @@ +import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import type { HookPayload } from "@middle/core"; +import { parse as parseToml } from "smol-toml"; +import { codexAdapter, detectNeedsLogin } from "../src/index.ts"; + +let dir: string; + +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "middle-codex-")); +}); + +afterEach(() => { + rmSync(dir, { recursive: true, force: true }); +}); + +describe("codexAdapter identity", () => { + test("name is 'codex' and readyEvent is session.started", () => { + expect(codexAdapter.name).toBe("codex"); + expect(codexAdapter.readyEvent).toBe("session.started"); + }); +}); + +describe("buildLaunchCommand", () => { + test("argv launches interactive codex (no exec, no prompt)", () => { + const { argv } = codexAdapter.buildLaunchCommand({ + worktree: dir, + sessionName: "middle-60", + sessionToken: "tok", + }); + expect(argv).toEqual(["codex"]); + expect(argv).not.toContain("exec"); // never the headless/non-interactive subcommand + // approval_policy / sandbox live in .codex/config.toml, NOT the command line. + expect(argv.join(" ")).not.toContain("approval"); + expect(argv.join(" ")).not.toContain("sandbox"); + }); + + test("env carries the session vars and merges envOverrides", () => { + const { env } = codexAdapter.buildLaunchCommand({ + worktree: dir, + sessionName: "middle-60", + sessionToken: "secret-token", + envOverrides: { MIDDLE_DISPATCHER_URL: "http://127.0.0.1:4120", MIDDLE_EPIC: "60" }, + }); + expect(env.MIDDLE_SESSION).toBe("middle-60"); + expect(env.MIDDLE_SESSION_TOKEN).toBe("secret-token"); + expect(env.MIDDLE_DISPATCHER_URL).toBe("http://127.0.0.1:4120"); + expect(env.MIDDLE_EPIC).toBe("60"); + }); +}); + +describe("buildPromptText", () => { + test("initial force-invokes the skill via slash command on the epic", () => { + expect( + codexAdapter.buildPromptText({ + promptFile: ".middle/prompt.md", + kind: "initial", + epicNumber: 60, + }), + ).toBe("/implementing-github-issues implement #60"); + }); + + test("resume frames the @-reference as a continuation", () => { + const text = codexAdapter.buildPromptText({ + promptFile: ".middle/resume.md", + kind: "resume", + epicNumber: 60, + }); + expect(text).toContain("@.middle/resume.md"); + expect(text.toLowerCase()).toContain("resum"); + }); + + test("answer frames the @-reference as a human reply", () => { + const text = codexAdapter.buildPromptText({ + promptFile: ".middle/answer.md", + kind: "answer", + epicNumber: 60, + }); + expect(text).toContain("@.middle/answer.md"); + expect(text.toLowerCase()).toContain("answer"); + }); + + test("recommender force-invokes the recommender skill with the @-referenced context", () => { + expect( + codexAdapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "recommender" }), + ).toBe("/recommending-github-issues @.middle/prompt.md"); + }); + + test("docs force-invokes the documenting-the-repo skill with the @-referenced context", () => { + expect(codexAdapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "docs" })).toBe( + "/documenting-the-repo @.middle/prompt.md", + ); + }); + + // Compile-time contract (enforced by `bun run typecheck`): same discriminated + // union as Claude — a dispatched-issue kind cannot omit its Epic and the + // repo-level kinds cannot carry one. + test("type contract: dispatched-issue kinds require an epicNumber; recommender forbids one", () => { + // @ts-expect-error — 'initial' must carry an epicNumber + codexAdapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "initial" }); + // @ts-expect-error — 'resume' must carry an epicNumber + codexAdapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "resume" }); + // @ts-expect-error — 'answer' must carry an epicNumber + codexAdapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "answer" }); + // @ts-expect-error — 'recommender' runs against no Epic, so epicNumber is forbidden + codexAdapter.buildPromptText({ + promptFile: ".middle/prompt.md", + kind: "recommender", + epicNumber: 1, + }); + // @ts-expect-error — 'docs' runs against no Epic, so epicNumber is forbidden + codexAdapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "docs", epicNumber: 1 }); + expect(true).toBe(true); + }); +}); + +describe("resolveTranscriptPath", () => { + test("returns transcript_path from the startup payload", () => { + const payload: HookPayload = { + session_id: "abc", + transcript_path: "/home/u/.codex/sessions/2026/05/rollout-abc.jsonl", + }; + expect(codexAdapter.resolveTranscriptPath(payload)).toBe( + "/home/u/.codex/sessions/2026/05/rollout-abc.jsonl", + ); + }); + + test("falls back to rollout_path when transcript_path is absent", () => { + const payload: HookPayload = { rollout_path: "/home/u/.codex/sessions/r.jsonl" }; + expect(codexAdapter.resolveTranscriptPath(payload)).toBe("/home/u/.codex/sessions/r.jsonl"); + }); + + test("throws when the payload carries no session-file path", () => { + expect(() => codexAdapter.resolveTranscriptPath({ session_id: "abc" })).toThrow(); + }); +}); + +describe("readTranscriptState", () => { + test("parses activity, turn count, last tool use, and context tokens from a rollout", () => { + const transcript = join(dir, "rollout.jsonl"); + writeFileSync( + transcript, + [ + JSON.stringify({ + timestamp: "2026-05-14T12:00:00.000Z", + type: "session_meta", + payload: { id: "s1" }, + }), + JSON.stringify({ + timestamp: "2026-05-14T12:00:05.000Z", + type: "response_item", + payload: { type: "message", role: "assistant", content: [{ type: "text", text: "ok" }] }, + }), + JSON.stringify({ + timestamp: "2026-05-14T12:00:08.000Z", + type: "response_item", + payload: { type: "function_call", name: "shell" }, + }), + JSON.stringify({ + timestamp: "2026-05-14T12:00:10.000Z", + type: "event_msg", + payload: { + type: "token_count", + info: { total_token_usage: { input_tokens: 1500, cached_input_tokens: 500 } }, + }, + }), + "", // trailing blank line — must be tolerated + ].join("\n"), + ); + const state = codexAdapter.readTranscriptState(transcript); + expect(state.lastActivity).toBe("2026-05-14T12:00:10.000Z"); + expect(state.turnCount).toBe(1); // one assistant message + expect(state.lastToolUse).toBe("shell"); + expect(state.contextTokens).toBe(2000); // 1500 input + 500 cached + }); + + test("tolerates a corrupt line without throwing", () => { + const transcript = join(dir, "corrupt.jsonl"); + writeFileSync( + transcript, + [ + "{ not json", + JSON.stringify({ + timestamp: "2026-05-14T12:00:01.000Z", + type: "response_item", + payload: { type: "message", role: "assistant", content: [{ type: "text", text: "hi" }] }, + }), + ].join("\n"), + ); + const state = codexAdapter.readTranscriptState(transcript); + expect(state.turnCount).toBe(1); + expect(state.lastActivity).toBe("2026-05-14T12:00:01.000Z"); + }); +}); + +function writeMiddleDir(): { cwd: string; middle: string; transcript: string } { + const cwd = join(dir, "worktree"); + const middle = join(cwd, ".middle"); + mkdirSync(middle, { recursive: true }); + const transcript = join(dir, "stop.jsonl"); + writeFileSync(transcript, ""); + return { cwd, middle, transcript }; +} + +describe("classifyStop", () => { + test("sentinelPresent → asked-question, surfacing the blocked.json path + question/context", () => { + const { cwd, middle, transcript } = writeMiddleDir(); + writeFileSync( + join(middle, "blocked.json"), + JSON.stringify({ question: "A or B?", context: "Both pass typecheck." }), + ); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: true, + worktree: cwd, + }); + expect(result.kind).toBe("asked-question"); + if (result.kind === "asked-question") { + expect(result.sentinelPath).toBe(join(cwd, ".middle", "blocked.json")); + expect(result.sentinel).toEqual({ question: "A or B?", context: "Both pass typecheck." }); + } + }); + + test("a blocked.json with kind 'complexity' surfaces the complexity pause kind", () => { + const { cwd, middle, transcript } = writeMiddleDir(); + writeFileSync( + join(middle, "blocked.json"), + JSON.stringify({ question: "4 designs, no winner", kind: "complexity" }), + ); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: true, + worktree: cwd, + }); + expect(result.kind).toBe("asked-question"); + if (result.kind === "asked-question") { + expect(result.sentinel).toEqual({ question: "4 designs, no winner", kind: "complexity" }); + } + }); + + test("asked-question tolerates a malformed blocked.json (sentinel → null)", () => { + const { cwd, middle, transcript } = writeMiddleDir(); + writeFileSync(join(middle, "blocked.json"), "{ not valid json"); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: true, + worktree: cwd, + }); + expect(result.kind).toBe("asked-question"); + if (result.kind === "asked-question") { + expect(result.sentinel).toBeNull(); + } + }); + + test.each([ + ["You've hit a rate limit, try later.", "rate limit phrase"], + ["Error 429: Too Many Requests", "429 status"], + ["too many requests — slow down", "too many requests phrase"], + ["ratelimit exceeded", "ratelimit no-space"], + ])("rate-limit signal %p in the transcript tail → rate-limited (%s)", (text) => { + const { cwd, transcript } = writeMiddleDir(); + writeFileSync( + transcript, + JSON.stringify({ + timestamp: "2026-05-14T12:30:00.000Z", + type: "response_item", + payload: { type: "message", role: "assistant", content: [{ type: "text", text }] }, + }), + ); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: false, + worktree: cwd, + }); + expect(result.kind).toBe("rate-limited"); + }); + + test("done.json sentinel → done", () => { + const { cwd, middle, transcript } = writeMiddleDir(); + writeFileSync(join(middle, "done.json"), JSON.stringify({ pr: 155 })); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: false, + worktree: cwd, + }); + expect(result.kind).toBe("done"); + }); + + test("failed.json sentinel → failed, carrying its reason", () => { + const { cwd, middle, transcript } = writeMiddleDir(); + writeFileSync(join(middle, "failed.json"), JSON.stringify({ reason: "boom" })); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: false, + worktree: cwd, + }); + expect(result.kind).toBe("failed"); + if (result.kind === "failed") expect(result.reason).toBe("boom"); + }); + + test("sentinels are found even when payload.cwd is a worktree subdirectory", () => { + const { cwd: worktree, middle, transcript } = writeMiddleDir(); + writeFileSync(join(middle, "done.json"), JSON.stringify({ pr: 155 })); + const subdir = join(worktree, "src"); + mkdirSync(subdir); + const result = codexAdapter.classifyStop({ + payload: { cwd: subdir }, + transcriptPath: transcript, + sentinelPresent: false, + worktree, + }); + expect(result.kind).toBe("done"); + }); + + test("nothing notable → bare-stop", () => { + const { cwd, transcript } = writeMiddleDir(); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: false, + worktree: cwd, + }); + expect(result.kind).toBe("bare-stop"); + }); +}); + +describe("detectRateLimit", () => { + test("matches a rate-limit signal in the transcript tail", () => { + const transcript = join(dir, "rl.jsonl"); + writeFileSync( + transcript, + JSON.stringify({ + type: "response_item", + payload: { + type: "message", + role: "assistant", + content: [{ type: "text", text: "429 Too Many Requests" }], + }, + }), + ); + const result = codexAdapter.detectRateLimit!({ payload: {}, transcriptPath: transcript }); + expect(result).not.toBeNull(); + expect(result!.source).toBe("stop-hook"); + }); + + test("returns null when no rate-limit signal is present", () => { + const transcript = join(dir, "ok.jsonl"); + writeFileSync(transcript, JSON.stringify({ type: "response_item", payload: { text: "fine" } })); + expect(codexAdapter.detectRateLimit!({ payload: {}, transcriptPath: transcript })).toBeNull(); + }); +}); + +describe("installHooks", () => { + async function installInto(worktree: string): Promise { + await codexAdapter.installHooks({ + worktree, + hookScriptPath: ".middle/hooks/hook.sh", + dispatcherUrl: "http://127.0.0.1:4120", + sessionName: "middle-60", + sessionToken: "tok", + epicNumber: 60, + }); + } + + test("writes .codex/config.toml with auto-mode settings and a [hooks] block", async () => { + const worktree = join(dir, "wt-cfg"); + mkdirSync(worktree, { recursive: true }); + await installInto(worktree); + const raw = await Bun.file(join(worktree, ".codex", "config.toml")).text(); + const cfg = parseToml(raw) as { + approval_policy: string; + sandbox: string; + hooks: Record>; + }; + expect(cfg.approval_policy).toBe("never"); + expect(cfg.sandbox).toBe("workspace-write"); + expect(cfg.hooks).toBeDefined(); + }); + + test("maps each Codex hook event to the normalized taxonomy via the absolute hook path", async () => { + const worktree = join(dir, "wt-map"); + mkdirSync(worktree, { recursive: true }); + await installInto(worktree); + const raw = await Bun.file(join(worktree, ".codex", "config.toml")).text(); + const cfg = parseToml(raw) as { hooks: Record> }; + const abs = join(worktree, ".middle/hooks/hook.sh"); + const cmd = (event: string): string => cfg.hooks[event]![0]!.command; + expect(cmd("startup")).toBe(`sh "${abs}" session.started`); + expect(cmd("turn-start")).toBe(`sh "${abs}" turn.started`); + expect(cmd("command")).toBe(`sh "${abs}" tool.pre`); + expect(cmd("command-success")).toBe(`sh "${abs}" tool.post`); + expect(cmd("command-failure")).toBe(`sh "${abs}" tool.failed`); + expect(cmd("turn-end")).toBe(`sh "${abs}" agent.stopped`); + expect(cmd("shutdown")).toBe(`sh "${abs}" session.ended`); + }); + + test("registers the full Codex hook event set", async () => { + const worktree = join(dir, "wt-events"); + mkdirSync(worktree, { recursive: true }); + await installInto(worktree); + const raw = await Bun.file(join(worktree, ".codex", "config.toml")).text(); + const cfg = parseToml(raw) as { hooks: Record }; + expect(Object.keys(cfg.hooks).sort()).toEqual([ + "command", + "command-failure", + "command-success", + "shutdown", + "startup", + "turn-end", + "turn-start", + ]); + }); + + test("writes an executable hook.sh into the worktree at the configured path", async () => { + const worktree = join(dir, "wt-script"); + mkdirSync(worktree, { recursive: true }); + await installInto(worktree); + const scriptPath = join(worktree, ".middle/hooks/hook.sh"); + const contents = await Bun.file(scriptPath).text(); + expect(contents).toStartWith("#!/bin/sh"); + expect(contents).toContain("curl"); + expect(contents).toContain("${MIDDLE_DISPATCHER_URL}"); + const { stat } = await import("node:fs/promises"); + expect(((await stat(scriptPath)).mode & 0o111) !== 0).toBe(true); + }); + + test("registers the PR-ready gate as a second hook on the command (pre) event", async () => { + const worktree = join(dir, "wt-gate"); + mkdirSync(worktree, { recursive: true }); + await installInto(worktree); + const raw = await Bun.file(join(worktree, ".codex", "config.toml")).text(); + const cfg = parseToml(raw) as { hooks: Record> }; + const commandHooks = cfg.hooks.command!; + // First entry stays the universal heartbeat (tool.pre); the gate is added second. + expect(commandHooks[0]!.command).toBe( + `sh "${join(worktree, ".middle/hooks/hook.sh")}" tool.pre`, + ); + expect(commandHooks[1]!.command).toBe( + `sh "${join(worktree, ".middle/hooks/pr-ready-gate.sh")}"`, + ); + }); + + test("writes an executable pr-ready-gate.sh that POSTs to /gates/pr-ready", async () => { + const worktree = join(dir, "wt-gate-script"); + mkdirSync(worktree, { recursive: true }); + await installInto(worktree); + const scriptPath = join(worktree, ".middle/hooks/pr-ready-gate.sh"); + const contents = await Bun.file(scriptPath).text(); + expect(contents).toStartWith("#!/bin/sh"); + expect(contents).toContain("${MIDDLE_DISPATCHER_URL}/gates/pr-ready"); + expect(contents).toContain("exit 2"); + const { stat } = await import("node:fs/promises"); + expect(((await stat(scriptPath)).mode & 0o111) !== 0).toBe(true); + }); +}); + +describe("detectNeedsLogin", () => { + test("matches representative not-authenticated messages", () => { + expect(detectNeedsLogin("Please run codex login to authenticate")).toBe(true); + expect(detectNeedsLogin("You are not logged in")).toBe(true); + expect(detectNeedsLogin("Not authenticated — please sign in")).toBe(true); + expect(detectNeedsLogin("set OPENAI_API_KEY or run codex login")).toBe(true); + }); + + test("does not match normal pane content", () => { + expect(detectNeedsLogin("> ")).toBe(false); + expect(detectNeedsLogin("Codex CLI v0.1.0")).toBe(false); + expect(detectNeedsLogin("")).toBe(false); + }); +}); + +describe("enterAutoMode", () => { + test("returns immediately when the target session does not exist", async () => { + const errSpy = spyOn(console, "error").mockImplementation(() => {}); + const start = Date.now(); + try { + await expect( + codexAdapter.enterAutoMode({ sessionName: "middle-does-not-exist" }), + ).resolves.toBeUndefined(); + } finally { + errSpy.mockRestore(); + } + expect(Date.now() - start).toBeLessThan(2000); + }); +}); diff --git a/planning/issues/60/decisions.md b/planning/issues/60/decisions.md new file mode 100644 index 00000000..31f447b4 --- /dev/null +++ b/planning/issues/60/decisions.md @@ -0,0 +1,55 @@ +# Decisions — Issue #60 (CodexAdapter) + +## Codex's observable behaviors are implemented as the spec's "start generous" baseline +**File(s):** `packages/adapters/codex/src/*` +**Date:** 2026-05-26 + +**Decision:** The Codex CLI is not installed in the dispatch sandbox (no `codex` +binary, no `~/.codex`, no OpenAI credentials). The adapter is implemented from the +build spec's "CodexAdapter specifics" + "Normalized event taxonomy" tables, with +every empirically-observed bit (hook event names, transcript location/format, +rate-limit message, force-include syntax, auto-mode mechanism) coded as the spec's +documented starting point and marked as a tightening point. + +**Why:** The spec explicitly defers these to the Codex phase ("observed during the +Codex phase", "filled in during Phase 10", rate-limit regex "to be tightened as +patterns are observed"). The `AgentAdapter` interface is designed precisely so these +swappable bits live behind it. Blocking the whole Epic on an un-installable CLI would +strand the adapter impl + selection logic, which are fully specified and testable. + +**Evidence:** Build spec lines 790–815 (CodexAdapter specifics + event taxonomy); +ClaudeAdapter as the structural template. + +## Codex hook event → normalized event mapping +**File(s):** `packages/adapters/codex/src/hooks.ts` +**Date:** 2026-05-26 + +**Decision:** Mirror Claude's `CLAUDE_EVENT_MAP` with Codex's event names from the +taxonomy table's "Trigger (Codex)" column: +`startup→session.started`, `turn-start→turn.started`, `command→tool.pre`, +`command-success→tool.post`, `command-failure→tool.failed`, `turn-end→agent.stopped`, +`shutdown→session.ended`. No `agent.notification` / `agent.subagent-stopped` (Codex +has no equivalent per the table). Written into `/.codex/config.toml` as a +`[hooks]` array-of-tables (`[[hooks.]]` with a `command` key) so multiple +hooks can share one event (the heartbeat + the PR-ready gate on `command`). + +**Why:** Direct read of the spec's taxonomy table. Array-of-tables is the simplest +TOML shape that supports >1 hook per event, which the `command` (pre) event needs. + +**Evidence:** Build spec lines 803–814 (taxonomy), 790–795 (CodexAdapter specifics). + +## classifyStop sentinel logic is adapter-agnostic (only the rate-limit regex differs) +**File(s):** `packages/adapters/codex/src/classify.ts` +**Date:** 2026-05-26 + +**Decision:** Codex's `classifyStop` resolves the `.middle/{blocked,done,failed}.json` +sentinels identically to Claude — that logic is not Codex-specific. Only the +rate-limit regex changes: `/rate.?limit|429|too many requests/i` (spec's generous +starting pattern). Noted as a #63 candidate to extract the shared sentinel logic into +`@middle/core`. + +**Why:** The sentinel files are written by the universal skill, not the CLI, so their +resolution is the same for every adapter. Duplicating it now keeps #61 self-contained; +#63 is where cross-adapter shared logic gets factored. + +**Evidence:** Build spec line 795 (Codex rate-limit pattern); Claude `classify.ts`. From d31200421d57f1543b8b4bead899bbbb7bd23baf Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 13:50:10 -0400 Subject: [PATCH 03/23] feat(adapters): per-CLI adapter selection across implementer + recommender MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - core/select-adapter.ts: selectAdapter() encodes the spec's 4-rule selection (agent: label override → default_adapter → rate-limit switch when portable → otherwise skip). A label pin is never switched away from. - dispatcher/adapters.ts: shared registry (getAdapter/isKnownAdapter/ knownAdapters) replacing the two hardcoded claude-only getAdapter copies; registers claude + codex. - main.ts/docs.ts/recommender-run.ts/documentation-run.ts: the four claude-only gates now validate against the registry, so codex is dispatchable end-to-end. - mm dispatch: --adapter override, else honors the Epic's agent: label via selectAdapter (best-effort gh label fetch, falls back to the default). Recommender skill prose + state-issue schema already record the per-Epic adapter in the Ready table. Tests: 12 selectAdapter cases (label/default/switch/skip) + mm dispatch selection wiring; documentation-run test flipped to accept a known non-default adapter and reject an unknown one. --- bun.lock | 4 + packages/cli/src/commands/dispatch.ts | 75 +++++++++- packages/cli/src/commands/docs.ts | 9 +- packages/cli/src/index.ts | 8 +- packages/cli/test/dispatch.test.ts | 77 ++++++++++ packages/core/src/index.ts | 5 + packages/core/src/select-adapter.ts | 115 +++++++++++++++ packages/core/test/select-adapter.test.ts | 137 ++++++++++++++++++ packages/dispatcher/package.json | 1 + packages/dispatcher/src/adapters.ts | 38 +++++ packages/dispatcher/src/documentation-run.ts | 9 +- packages/dispatcher/src/main.ts | 14 +- packages/dispatcher/src/recommender-run.ts | 9 +- .../dispatcher/test/documentation-run.test.ts | 17 ++- 14 files changed, 480 insertions(+), 38 deletions(-) create mode 100644 packages/core/src/select-adapter.ts create mode 100644 packages/core/test/select-adapter.test.ts create mode 100644 packages/dispatcher/src/adapters.ts diff --git a/bun.lock b/bun.lock index 33306a63..2beedab5 100644 --- a/bun.lock +++ b/bun.lock @@ -24,6 +24,9 @@ "dependencies": { "@middle/core": "workspace:*", }, + "devDependencies": { + "smol-toml": "^1.6.1", + }, }, "packages/cli": { "name": "@middle/cli", @@ -71,6 +74,7 @@ "version": "0.0.0", "dependencies": { "@middle/adapter-claude": "workspace:*", + "@middle/adapter-codex": "workspace:*", "@middle/core": "workspace:*", "@middle/docs": "workspace:*", "@middle/state-issue": "workspace:*", diff --git a/packages/cli/src/commands/dispatch.ts b/packages/cli/src/commands/dispatch.ts index 03cc7456..5f2a05e6 100644 --- a/packages/cli/src/commands/dispatch.ts +++ b/packages/cli/src/commands/dispatch.ts @@ -1,17 +1,24 @@ import { existsSync } from "node:fs"; import { basename, join, resolve } from "node:path"; -import { loadConfig } from "@middle/core"; +import { loadConfig, selectAdapter } from "@middle/core"; import { type StartOptions, runStart } from "./start.ts"; export type DispatchOptions = { /** Override the global config path (defaults to `~/.middle/config.toml`). */ configPath?: string; + /** + * Explicit adapter override. When set, it wins over the Epic's `agent:` + * label and the default adapter (it must still be a configured adapter). + */ + adapter?: string; /** Override the daemon spawn (defaults to {@link runStart}). Returns its exit code. */ startDaemon?: (opts: StartOptions) => number; /** Readiness-poll budget after a spawn before giving up (default 10000ms). */ healthTimeoutMs?: number; /** Backoff between `/control/events` reconnect attempts (default 1000ms). */ reconnectBackoffMs?: number; + /** Injected Epic-label fetch (defaults to a `gh` call). Tests override it. */ + fetchLabels?: (repoSlug: string, epic: number) => Promise; }; const DEFAULT_HEALTH_TIMEOUT_MS = 10_000; @@ -60,6 +67,36 @@ async function deriveRepoSlug(repoPath: string): Promise { return basename(repoPath); } +/** + * Fetch an Epic's label names via `gh`, for the `agent:` adapter override. + * Best-effort: a non-zero exit (offline, unauthenticated, missing issue) yields + * an empty list so selection falls back to the default adapter rather than + * blocking the dispatch. + */ +async function fetchEpicLabels(repoSlug: string, epic: number): Promise { + const proc = Bun.spawn( + [ + "gh", + "issue", + "view", + String(epic), + "-R", + repoSlug, + "--json", + "labels", + "--jq", + ".labels[].name", + ], + { stdout: "pipe", stderr: "ignore" }, + ); + const out = await new Response(proc.stdout).text(); + if ((await proc.exited) !== 0) return []; + return out + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.length > 0); +} + /** Probe `GET /health`; true only on `{ ok: true }`. Connection errors are "down", not a throw. */ async function probeHealth(base: string): Promise { try { @@ -239,15 +276,39 @@ export async function runDispatch( return 1; } - const adapterName = config.global.defaultAdapter; - if (adapterName !== "claude") { - console.error( - `mm dispatch: only the 'claude' adapter is available in Phase 1 (config asks for "${adapterName}")`, + const repoSlug = await deriveRepoSlug(repoPath); + + // Resolve the adapter: an explicit --adapter wins; otherwise honor an + // `agent:` label on the Epic, falling back to the default adapter + // (selectAdapter rules 1–2). The rate-limit switch (rules 3–4) is the + // recommender/auto-dispatch path's concern — a force-dispatch is an explicit + // human act, so it doesn't second-guess a rate-limited choice here. + const available = Object.keys(config.adapters); + let adapterName: string; + if (opts.adapter !== undefined) { + if (!available.includes(opts.adapter)) { + console.error( + `mm dispatch: adapter "${opts.adapter}" is not configured (available: ${available.join(", ") || "(none)"})`, + ); + return 1; + } + adapterName = opts.adapter; + } else { + const labels = await (opts.fetchLabels ?? fetchEpicLabels)(repoSlug, epicNumber).catch( + () => [], ); - return 1; + try { + adapterName = selectAdapter({ + labels, + defaultAdapter: config.global.defaultAdapter, + available, + }).adapter; + } catch (error) { + console.error(`mm dispatch: ${(error as Error).message}`); + return 1; + } } - const repoSlug = await deriveRepoSlug(repoPath); const base = `http://127.0.0.1:${config.global.dispatcherPort}`; // Ensure the daemon is up. If /health is down, spawn it and poll until ready. diff --git a/packages/cli/src/commands/docs.ts b/packages/cli/src/commands/docs.ts index 2f91b91e..d73458c8 100644 --- a/packages/cli/src/commands/docs.ts +++ b/packages/cli/src/commands/docs.ts @@ -1,8 +1,7 @@ import { existsSync } from "node:fs"; import { join } from "node:path"; -import { claudeAdapter } from "@middle/adapter-claude"; -import type { AgentAdapter } from "@middle/core"; import { loadConfig } from "@middle/core"; +import { getAdapter } from "@middle/dispatcher/src/adapters.ts"; import { dispatchDocumentation, resolveDocumentationOptions, @@ -15,12 +14,6 @@ export type RunDocsOptions = { dispatch?: typeof dispatchDocumentation; }; -/** Phase 7 adapter registry — only `claude` is implemented. */ -function getAdapter(name: string): AgentAdapter { - if (name !== "claude") throw new Error(`unknown adapter: ${name}`); - return claudeAdapter; -} - /** * `mm docs ` — trigger a documentation run for the given repo. Resolves * the repo's docs target (Starlight / Docusaurus / MkDocs / TypeDoc, or the diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 7ab424cd..a4b42743 100755 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -113,7 +113,13 @@ program .description("Force-dispatch an Epic (or standalone issue) through the implementation workflow") .argument("", "path to the local repo checkout") .argument("", "Epic or standalone issue number") - .action(async (repo: string, epic: string) => process.exit(await runDispatch(repo, epic))); + .option( + "--adapter ", + "adapter to dispatch with (overrides the agent: label and default)", + ) + .action(async (repo: string, epic: string, opts: { adapter?: string }) => + process.exit(await runDispatch(repo, epic, { adapter: opts.adapter })), + ); program .command("run-recommender") diff --git a/packages/cli/test/dispatch.test.ts b/packages/cli/test/dispatch.test.ts index 6c07034c..9f070a89 100644 --- a/packages/cli/test/dispatch.test.ts +++ b/packages/cli/test/dispatch.test.ts @@ -275,6 +275,83 @@ describe("runDispatch — control client", () => { } }); + test("--adapter overrides the agent label and the default, and is sent to the daemon", async () => { + const repoPath = makeRepo(); + const { server, dispatchBodies } = fakeDaemon({ states: ["completed"] }); + const configPath = writeConfig(server.port); + const restore = silenceLogs(); + try { + const code = await runDispatch(repoPath, "6", { + configPath, + startDaemon: () => 0, + adapter: "codex", + fetchLabels: async () => ["agent:claude"], // ignored — explicit --adapter wins + }); + expect(code).toBe(0); + expect((dispatchBodies[0] as { adapter: string }).adapter).toBe("codex"); + } finally { + restore(); + server.stop(true); + } + }); + + test("an agent: label on the Epic selects that adapter", async () => { + const repoPath = makeRepo(); + const { server, dispatchBodies } = fakeDaemon({ states: ["completed"] }); + const configPath = writeConfig(server.port); + const restore = silenceLogs(); + try { + const code = await runDispatch(repoPath, "6", { + configPath, + startDaemon: () => 0, + fetchLabels: async () => ["epic", "agent:codex"], + }); + expect(code).toBe(0); + expect((dispatchBodies[0] as { adapter: string }).adapter).toBe("codex"); + } finally { + restore(); + server.stop(true); + } + }); + + test("no agent label falls back to the default adapter", async () => { + const repoPath = makeRepo(); + const { server, dispatchBodies } = fakeDaemon({ states: ["completed"] }); + const configPath = writeConfig(server.port); + const restore = silenceLogs(); + try { + const code = await runDispatch(repoPath, "6", { + configPath, + startDaemon: () => 0, + fetchLabels: async () => ["epic", "phase:10"], + }); + expect(code).toBe(0); + expect((dispatchBodies[0] as { adapter: string }).adapter).toBe("claude"); + } finally { + restore(); + server.stop(true); + } + }); + + test("an unconfigured --adapter is rejected (exit 1) before any dispatch", async () => { + const repoPath = makeRepo(); + const { server, dispatchBodies } = fakeDaemon({ states: ["completed"] }); + const configPath = writeConfig(server.port); + const restore = silenceLogs(); + try { + const code = await runDispatch(repoPath, "6", { + configPath, + startDaemon: () => 0, + adapter: "ghost", + }); + expect(code).toBe(1); + expect(dispatchBodies).toEqual([]); // never reached the daemon + } finally { + restore(); + server.stop(true); + } + }); + test("friendly failure (exit 1) when the daemon can't be reached or started", async () => { const repoPath = makeRepo(); // A port with nothing listening. diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3d8b9d3a..2914132d 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -11,10 +11,12 @@ * - normalized hook events: `NormalizedEvent`, `HookEnvelope`, `isNormalizedEvent` * - `HOOK_SH`, `PR_READY_GATE_SH` — the hook shell-script payloads * - tmux-TUI helpers: `capturePane`, `sendText`, `sendKeys`, `pollPaneFor` + * - `selectAdapter` — per-CLI adapter selection (label override / default / rate-limit switch) * * Where things live: * - `config.ts` — config schema + `loadConfig` * - `adapter.ts` — `AgentAdapter` + option/result types + * - `select-adapter.ts` — the four-rule adapter selection algorithm * - `events.ts` — the normalized hook-event vocabulary * - `hook-script.ts` — the hook shell scripts adapters install * - `tmux-tui.ts` — low-level tmux pane capture / key sending @@ -58,3 +60,6 @@ export type { export { capturePane, sendText, sendKeys, pollPaneFor } from "./tmux-tui.ts"; export type { SendKeysOpts, PollPaneOpts } from "./tmux-tui.ts"; + +export { selectAdapter } from "./select-adapter.ts"; +export type { AdapterSelectionInput, AdapterSelection } from "./select-adapter.ts"; diff --git a/packages/core/src/select-adapter.ts b/packages/core/src/select-adapter.ts new file mode 100644 index 00000000..b66736a0 --- /dev/null +++ b/packages/core/src/select-adapter.ts @@ -0,0 +1,115 @@ +/** + * Per-CLI adapter selection — the build spec's four-rule algorithm + * (build spec → "Phase 3" → "Adapter selection", and the `recommending-github-issues` + * skill). The canonical TS encoding of the rules the recommender applies in prose + * when it fills the state issue's Ready table, and the resolver `mm dispatch` + * uses to honor an `agent:` label. + * + * Rules, in order: + * 1. An explicit `agent:` label on the Epic overrides everything. + * 2. Otherwise `config.default_adapter`. + * 3. If the chosen adapter is rate-limited AND the task is portable (and the + * choice was not pinned by a label), switch to an available adapter. + * 4. Otherwise leave it as chosen and mark `skip` — auto-dispatch skips a + * rate-limited adapter until its limit resets. + * + * A label pin is absolute: rule 3 never switches away from a human's explicit + * `agent:` choice (a pinned-but-rate-limited adapter is skipped, not + * switched). See `planning/issues/60/decisions.md` for why. + */ +export type AdapterSelectionInput = { + /** Labels on the Epic; an `agent:` among them pins the adapter. */ + labels: readonly string[]; + /** `config.global.default_adapter`. */ + defaultAdapter: string; + /** Configured + implemented adapter names a choice is validated against. */ + available: readonly string[]; + /** Adapters whose rate limit has not yet reset. Defaults to none. */ + rateLimited?: ReadonlySet; + /** Whether the task can run on any adapter (the recommender's judgment). Defaults to false. */ + portable?: boolean; +}; + +/** The resolved adapter plus how it was chosen and whether dispatch should skip it. */ +export type AdapterSelection = { + /** The selected adapter name. */ + adapter: string; + /** How it was chosen: a label pin, the default, or a rate-limit switch. */ + source: "label" | "default" | "switched"; + /** True when the chosen adapter is rate-limited and could not be switched — auto-dispatch skips it. */ + skip: boolean; + /** One-line, human-readable rationale for the decision. */ + reason: string; +}; + +const AGENT_LABEL_RE = /^agent:(.+)$/; + +/** Extract the distinct `agent:` overrides from a label set. */ +function parseOverrides(labels: readonly string[]): string[] { + const names = labels + .map((label) => AGENT_LABEL_RE.exec(label.trim())?.[1]?.trim()) + .filter((name): name is string => name !== undefined && name.length > 0); + return [...new Set(names)]; +} + +/** + * Resolve which adapter an Epic should dispatch on, applying the four selection + * rules. Throws on an unresolvable configuration — conflicting `agent:` labels, + * or a label/default naming an adapter that isn't in `available` — so a + * misconfiguration surfaces loudly at dispatch rather than silently picking the + * wrong CLI. + */ +export function selectAdapter(input: AdapterSelectionInput): AdapterSelection { + const available = new Set(input.available); + const rateLimited = input.rateLimited ?? new Set(); + const portable = input.portable ?? false; + + const overrides = parseOverrides(input.labels); + if (overrides.length > 1) { + throw new Error( + `conflicting adapter labels: ${overrides.map((name) => `agent:${name}`).join(", ")}`, + ); + } + + const pinned = overrides.length === 1; + const chosen = pinned ? overrides[0]! : input.defaultAdapter; + if (!available.has(chosen)) { + const list = [...available].join(", ") || "(none)"; + throw new Error( + pinned + ? `agent:${chosen} label names an adapter that is not configured (available: ${list})` + : `default adapter "${chosen}" is not configured (available: ${list})`, + ); + } + + if (!rateLimited.has(chosen)) { + return { + adapter: chosen, + source: pinned ? "label" : "default", + skip: false, + reason: pinned ? `pinned by agent:${chosen} label` : `default adapter ${chosen}`, + }; + } + + // The chosen adapter is rate-limited. A label pin is never switched away from. + if (!pinned && portable) { + const alternative = input.available.find((name) => name !== chosen && !rateLimited.has(name)); + if (alternative !== undefined) { + return { + adapter: alternative, + source: "switched", + skip: false, + reason: `${chosen} rate-limited; switched to ${alternative} (portable task)`, + }; + } + } + + return { + adapter: chosen, + source: pinned ? "label" : "default", + skip: true, + reason: pinned + ? `pinned ${chosen} is rate-limited; left for auto-dispatch to skip until reset` + : `${chosen} rate-limited${portable ? " with no available alternative" : " and task not portable"}; left for auto-dispatch to skip until reset`, + }; +} diff --git a/packages/core/test/select-adapter.test.ts b/packages/core/test/select-adapter.test.ts new file mode 100644 index 00000000..14d0010e --- /dev/null +++ b/packages/core/test/select-adapter.test.ts @@ -0,0 +1,137 @@ +import { describe, expect, test } from "bun:test"; +import { selectAdapter } from "../src/select-adapter.ts"; + +const AVAILABLE = ["claude", "codex"] as const; + +describe("selectAdapter — rule 1: explicit agent: label overrides", () => { + test("an agent: label pins that adapter over the default", () => { + const result = selectAdapter({ + labels: ["epic", "agent:codex", "phase:10"], + defaultAdapter: "claude", + available: AVAILABLE, + }); + expect(result.adapter).toBe("codex"); + expect(result.source).toBe("label"); + expect(result.skip).toBe(false); + }); + + test("whitespace around the label and name is tolerated", () => { + const result = selectAdapter({ + labels: [" agent:codex "], + defaultAdapter: "claude", + available: AVAILABLE, + }); + expect(result.adapter).toBe("codex"); + expect(result.source).toBe("label"); + }); + + test("conflicting agent labels throw", () => { + expect(() => + selectAdapter({ + labels: ["agent:claude", "agent:codex"], + defaultAdapter: "claude", + available: AVAILABLE, + }), + ).toThrow(/conflicting adapter labels/); + }); + + test("duplicate agent labels for the same name are not a conflict", () => { + const result = selectAdapter({ + labels: ["agent:codex", "agent:codex"], + defaultAdapter: "claude", + available: AVAILABLE, + }); + expect(result.adapter).toBe("codex"); + }); + + test("a label naming an unconfigured adapter throws", () => { + expect(() => + selectAdapter({ labels: ["agent:ghost"], defaultAdapter: "claude", available: AVAILABLE }), + ).toThrow(/not configured/); + }); +}); + +describe("selectAdapter — rule 2: default adapter", () => { + test("with no agent label, the default adapter is chosen", () => { + const result = selectAdapter({ + labels: ["epic", "phase:10"], + defaultAdapter: "claude", + available: AVAILABLE, + }); + expect(result.adapter).toBe("claude"); + expect(result.source).toBe("default"); + expect(result.skip).toBe(false); + }); + + test("a default adapter that isn't configured throws", () => { + expect(() => + selectAdapter({ labels: [], defaultAdapter: "ghost", available: AVAILABLE }), + ).toThrow(/default adapter "ghost" is not configured/); + }); +}); + +describe("selectAdapter — rule 3: switch away from a rate-limited adapter when portable", () => { + test("a rate-limited default switches to an available adapter for a portable task", () => { + const result = selectAdapter({ + labels: [], + defaultAdapter: "claude", + available: AVAILABLE, + rateLimited: new Set(["claude"]), + portable: true, + }); + expect(result.adapter).toBe("codex"); + expect(result.source).toBe("switched"); + expect(result.skip).toBe(false); + }); + + test("a label pin is never switched away from, even when rate-limited and portable", () => { + const result = selectAdapter({ + labels: ["agent:claude"], + defaultAdapter: "codex", + available: AVAILABLE, + rateLimited: new Set(["claude"]), + portable: true, + }); + expect(result.adapter).toBe("claude"); + expect(result.source).toBe("label"); + expect(result.skip).toBe(true); // pinned + rate-limited → skip, not switch + }); +}); + +describe("selectAdapter — rule 4: otherwise leave it for auto-dispatch to skip", () => { + test("a rate-limited default with a non-portable task is left and marked skip", () => { + const result = selectAdapter({ + labels: [], + defaultAdapter: "claude", + available: AVAILABLE, + rateLimited: new Set(["claude"]), + portable: false, + }); + expect(result.adapter).toBe("claude"); + expect(result.skip).toBe(true); + expect(result.reason).toContain("not portable"); + }); + + test("a portable task with no non-rate-limited alternative is left and marked skip", () => { + const result = selectAdapter({ + labels: [], + defaultAdapter: "claude", + available: AVAILABLE, + rateLimited: new Set(["claude", "codex"]), + portable: true, + }); + expect(result.adapter).toBe("claude"); + expect(result.skip).toBe(true); + }); + + test("a non-rate-limited choice is never marked skip", () => { + const result = selectAdapter({ + labels: [], + defaultAdapter: "claude", + available: AVAILABLE, + rateLimited: new Set(["codex"]), + }); + expect(result.adapter).toBe("claude"); + expect(result.skip).toBe(false); + }); +}); diff --git a/packages/dispatcher/package.json b/packages/dispatcher/package.json index 7279295a..a67ead63 100644 --- a/packages/dispatcher/package.json +++ b/packages/dispatcher/package.json @@ -6,6 +6,7 @@ "main": "src/main.ts", "dependencies": { "@middle/adapter-claude": "workspace:*", + "@middle/adapter-codex": "workspace:*", "@middle/core": "workspace:*", "@middle/docs": "workspace:*", "@middle/state-issue": "workspace:*", diff --git a/packages/dispatcher/src/adapters.ts b/packages/dispatcher/src/adapters.ts new file mode 100644 index 00000000..e819a907 --- /dev/null +++ b/packages/dispatcher/src/adapters.ts @@ -0,0 +1,38 @@ +/** + * The adapter registry — the single map from adapter name to its `AgentAdapter` + * implementation. Every dispatch path (implementation, recommender, docs, + * watchdog, control routes) resolves its adapter object through `getAdapter`, + * and the control plane validates a requested name through `isKnownAdapter`, so + * adding a CLI is one entry here rather than a hardcode per call site. + */ +import { claudeAdapter } from "@middle/adapter-claude"; +import { codexAdapter } from "@middle/adapter-codex"; +import type { AgentAdapter } from "@middle/core"; + +const REGISTRY: Readonly> = { + claude: claudeAdapter, + codex: codexAdapter, +}; + +/** The names of every implemented adapter. */ +export function knownAdapters(): string[] { + return Object.keys(REGISTRY); +} + +/** Whether `name` resolves to an implemented adapter. */ +export function isKnownAdapter(name: string): boolean { + return Object.hasOwn(REGISTRY, name); +} + +/** + * Resolve an adapter object by name. Throws `unknown adapter: ` (listing + * the known set) when the name isn't implemented — callers that prefer a + * non-throwing check guard with {@link isKnownAdapter} first. + */ +export function getAdapter(name: string): AgentAdapter { + const adapter = REGISTRY[name]; + if (adapter === undefined) { + throw new Error(`unknown adapter: ${name} (known: ${knownAdapters().join(", ")})`); + } + return adapter; +} diff --git a/packages/dispatcher/src/documentation-run.ts b/packages/dispatcher/src/documentation-run.ts index c8417175..576fd344 100644 --- a/packages/dispatcher/src/documentation-run.ts +++ b/packages/dispatcher/src/documentation-run.ts @@ -83,11 +83,10 @@ export async function resolveDocumentationOptions( getAdapter: (name: string) => AgentAdapter, ): Promise { const adapterName = config.docs?.adapter ?? config.global.defaultAdapter; - if (adapterName !== "claude") { - return { - ok: false, - error: `only the 'claude' adapter is available in Phase 1 (config asks for "${adapterName}")`, - }; + try { + getAdapter(adapterName); + } catch (error) { + return { ok: false, error: (error as Error).message }; } let target: DocsTargetSummary; diff --git a/packages/dispatcher/src/main.ts b/packages/dispatcher/src/main.ts index ca60c3b8..fd751f70 100644 --- a/packages/dispatcher/src/main.ts +++ b/packages/dispatcher/src/main.ts @@ -8,11 +8,11 @@ // drive it over the HTTP control plane. `mm stop` sends it SIGTERM. import { mkdirSync, readFileSync } from "node:fs"; import { dirname, join } from "node:path"; -import { claudeAdapter } from "@middle/adapter-claude"; -import type { AgentAdapter, MiddleConfig } from "@middle/core"; +import type { MiddleConfig } from "@middle/core"; import { loadConfig } from "@middle/core"; import type { Database } from "bun:sqlite"; import { Engine } from "bunqueue/workflow"; +import { getAdapter, isKnownAdapter } from "./adapters.ts"; import { autoDispatch } from "./auto-dispatch.ts"; import { buildImplementationDeps } from "./build-deps.ts"; import { installBunqueueRaceSwallower } from "./bunqueue-race.ts"; @@ -85,12 +85,6 @@ const AUTO_DISPATCH_DEBOUNCE_MS = 250; /** Epic-cache refresh cadence (constant, like POLLER/WATCHDOG; config-ification deferred). */ const EPICS_REFRESH_INTERVAL_MS = 60_000; -/** Adapter registry — only `claude` is implemented. */ -function getAdapter(name: string): AgentAdapter { - if (name !== "claude") throw new Error(`unknown adapter: ${name}`); - return claudeAdapter; -} - /** The dispatcher's own version, reported by `GET /health`. */ function readVersion(): string { try { @@ -350,7 +344,7 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { if (repoPath === undefined) { return { status: 400, body: JSON.stringify({ error: `unknown repo: ${normalizedRepo}` }) }; } - if (adapter !== "claude") { + if (!isKnownAdapter(adapter)) { return { status: 400, body: JSON.stringify({ error: `unknown adapter: ${adapter}` }) }; } const input = { repo: normalizedRepo, repoPath, epicNumber, adapter }; @@ -481,7 +475,7 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { const control: ControlPlane = { hub, version, - knownAdapter: (name) => name === "claude", + knownAdapter: isKnownAdapter, // A route dispatch is a manual `mm dispatch` — recorded `source: 'manual'`. startDispatch: (input) => startDispatchImpl(input, "manual"), // Manual dispatch respects slot limits (the loop does its own accounting). diff --git a/packages/dispatcher/src/recommender-run.ts b/packages/dispatcher/src/recommender-run.ts index f35de238..1411af91 100644 --- a/packages/dispatcher/src/recommender-run.ts +++ b/packages/dispatcher/src/recommender-run.ts @@ -113,11 +113,10 @@ export async function resolveRecommenderOptions( return { ok: false, error: `no state issue configured for this repo (run \`mm init\` first)` }; } const adapterName = config.recommender?.adapter ?? config.global.defaultAdapter; - if (adapterName !== "claude") { - return { - ok: false, - error: `only the 'claude' adapter is available in Phase 1 (config asks for "${adapterName}")`, - }; + try { + getAdapter(adapterName); + } catch (error) { + return { ok: false, error: (error as Error).message }; } const schemaPath = join(repoPath, "schemas", "state-issue.v1.md"); if (!existsSync(schemaPath)) { diff --git a/packages/dispatcher/test/documentation-run.test.ts b/packages/dispatcher/test/documentation-run.test.ts index 01bbf4e8..c2ee2b2f 100644 --- a/packages/dispatcher/test/documentation-run.test.ts +++ b/packages/dispatcher/test/documentation-run.test.ts @@ -159,14 +159,27 @@ describe("dispatchDocumentation — enqueues a documentation workflow (read-only }); describe("resolveDocumentationOptions", () => { - test("rejects a non-claude adapter in Phase 1", async () => { + test("accepts a configured non-default adapter (e.g. codex)", async () => { const result = await resolveDocumentationOptions( repoPath, configWith({ enabled: true, intervalMinutes: 60, adapter: "codex", write: false }), stubAdapter, ); + expect(result.ok).toBe(true); + }); + + test("rejects an adapter the registry doesn't know", async () => { + const knownOnly = (name: string): AgentAdapter => { + if (name !== "claude" && name !== "codex") throw new Error(`unknown adapter: ${name}`); + return stubAdapter(); + }; + const result = await resolveDocumentationOptions( + repoPath, + configWith({ enabled: true, intervalMinutes: 60, adapter: "ghost", write: false }), + knownOnly, + ); expect(result.ok).toBe(false); - if (!result.ok) expect(result.error).toContain("only the 'claude' adapter"); + if (!result.ok) expect(result.error).toContain("unknown adapter: ghost"); }); test("resolves the markdown fallback target for a plain repo", async () => { From 6142e9664facc63b794f4f6a904c8609c9e7d9da Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 13:55:21 -0400 Subject: [PATCH 04/23] test(adapters): prove the AgentAdapter abstraction holds across both adapters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - adapter-conformance.test.ts: drives both adapters through the same registry (getAdapter) and the same interface calls, asserting each conforms and that the adapter-agnostic sentinel classification behaves identically — the automated same-path cross-adapter proof (#63 criterion 4). - Leak audit (#63 criterion 2): the dispatch-path leak (hardcoded registries + claude-only gates) was fixed in #62. Fixed two tooling leaks in-pass: - mm doctor now checks every configured adapter's binary (missing = warn, not a blocking fail) instead of hardcoding claude. - dashboard slot-pill fallback now lists claude+codex (stale 'codex is a later phase' comment removed). The state-issue parser/recommender {claude,codex} rate-limit pair is a documented deliberate exception (schema v1 fixes the set). - The AgentAdapter interface did not change for Codex (#63 criterion 3 / Epic headline) — documented in decisions.md. The live dual-dispatch on a test repo (Epic headline + #63 criterion 1) needs a running Codex CLI + OpenAI creds, absent in the sandbox — it's the operator's acceptance step (reviewer's brief). --- packages/cli/src/commands/doctor.ts | 47 ++++++- packages/dashboard/src/db-deps.ts | 9 +- .../test/adapter-conformance.test.ts | 132 ++++++++++++++++++ planning/issues/60/decisions.md | 69 +++++++++ 4 files changed, 248 insertions(+), 9 deletions(-) create mode 100644 packages/dispatcher/test/adapter-conformance.test.ts diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index babba3c9..03f19fbe 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -1,4 +1,5 @@ import { homedir } from "node:os"; +import { loadConfig } from "@middle/core"; import { getTmuxVersion, MIN_TMUX_VERSION, @@ -117,6 +118,43 @@ async function runBunPathFix(): Promise { } } +/** + * Check the binary of every configured + enabled adapter (not just `claude`). + * A missing binary is a **warning**, not a failure: middle can still dispatch + * with whichever adapters are installed — the recommender / `mm dispatch` simply + * won't pick the absent one. When the config file is absent, `loadConfig`'s + * defaults still describe both `claude` and `codex`, so a default environment is + * checked for both. The check `name` is the adapter name so each gets its own row. + */ +async function checkAdapterBinaries(): Promise { + let config: ReturnType; + try { + config = loadConfig({ globalPath: process.env.MIDDLE_CONFIG }); + } catch (error) { + return [ + { + name: "adapters", + status: "warn", + detail: `config unreadable: ${(error as Error).message}`, + }, + ]; + } + const enabled = Object.entries(config.adapters).filter(([, a]) => a.enabled); + if (enabled.length === 0) { + return [{ name: "adapters", status: "warn", detail: "no adapters enabled in config" }]; + } + return enabled.map(([name, adapter]) => { + if (!Bun.which(adapter.binary)) { + return { + name, + status: "warn", + detail: `${adapter.binary} not installed — the ${name} adapter is unavailable`, + }; + } + return { name, status: "pass", detail: `${adapter.binary} on PATH` }; + }); +} + async function checkGhAuth(): Promise { if (!Bun.which("gh")) { return { name: "gh auth", status: "fail", detail: "gh not installed" }; @@ -200,16 +238,17 @@ function checkTsdocCoverageWarn(): Check { /** * `mm doctor` — run a system check for every external tool the dispatcher - * shells out to: `bun`, `tmux` (≥ 3.5), `claude`, `git`, `gh`, and `gh` auth. - * Exits 0 when no check fails; 1 if anything is missing or broken. Warnings - * (degraded but functional) do not fail the run. + * shells out to: `bun`, `tmux` (≥ 3.5), each configured adapter's binary (e.g. + * `claude`, `codex`), `git`, `gh`, and `gh` auth. Exits 0 when no check fails; + * 1 if anything is missing or broken. Warnings (degraded but functional — a + * missing adapter binary among others present) do not fail the run. */ export async function runDoctor({ fix }: { fix?: boolean } = {}): Promise { const checks: Check[] = [ await checkBinary("bun", ["bun", "--version"]), await checkBunPath(), await checkTmux(), - await checkBinary("claude", ["claude", "--version"]), + ...(await checkAdapterBinaries()), await checkBinary("git", ["git", "--version"]), await checkBinary("gh", ["gh", "--version"]), await checkGhAuth(), diff --git a/packages/dashboard/src/db-deps.ts b/packages/dashboard/src/db-deps.ts index 44f2387a..9faa233b 100644 --- a/packages/dashboard/src/db-deps.ts +++ b/packages/dashboard/src/db-deps.ts @@ -315,11 +315,10 @@ export function createDbDeps(opts: DbDepsOptions): DashboardDeps { if (rows.length === 0) return []; const parsed = await readParsedState(repo); const adapters = Object.keys(config.adapters ?? {}); - // Drives the dispatch picker's per-adapter free-slot pills. Unlike the - // banner (which lists every adapter for *observability*), this lists only - // dispatchable adapters; with nothing configured that's `claude` alone — - // the only wired adapter today (codex is a later phase, would 400 on dispatch). - const adapterNames = adapters.length > 0 ? adapters : ["claude"]; + // Drives the dispatch picker's per-adapter free-slot pills, one per + // configured adapter. Both claude and codex are wired adapters; the + // fallback lists them when the config has none (matching the banner). + const adapterNames = adapters.length > 0 ? adapters : ["claude", "codex"]; const state = getSlotState(db, repo, slotLimits()); const freeSlots = adapterNames.sort().map((adapter) => ({ adapter, diff --git a/packages/dispatcher/test/adapter-conformance.test.ts b/packages/dispatcher/test/adapter-conformance.test.ts new file mode 100644 index 00000000..706dda98 --- /dev/null +++ b/packages/dispatcher/test/adapter-conformance.test.ts @@ -0,0 +1,132 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { isNormalizedEvent } from "@middle/core"; +import { getAdapter, knownAdapters } from "../src/adapters.ts"; + +// The proof that the `AgentAdapter` abstraction holds across both adapters: the +// SAME sequence of interface calls is driven against every registered adapter +// through the SAME registry the dispatch workflow uses (`getAdapter`), asserting +// each conforms — and that the adapter-agnostic parts (stop classification of +// the universal `.middle/` sentinels) behave identically. A divergence here is a +// leak in the abstraction, the exact failure mode Phase 10's verification guards. + +let dir: string; + +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "middle-conformance-")); +}); + +afterEach(() => { + rmSync(dir, { recursive: true, force: true }); +}); + +function worktreeWithMiddle(): { worktree: string; middle: string } { + const worktree = join(dir, "wt"); + const middle = join(worktree, ".middle"); + mkdirSync(middle, { recursive: true }); + return { worktree, middle }; +} + +test("the registry knows both adapters", () => { + expect(knownAdapters().sort()).toEqual(["claude", "codex"]); +}); + +describe.each(knownAdapters())("AgentAdapter contract — %s", (name) => { + const adapter = getAdapter(name); + + test("identity: name matches its registry key and readyEvent is a normalized event", () => { + expect(adapter.name).toBe(name); + expect(isNormalizedEvent(adapter.readyEvent)).toBe(true); + }); + + test("buildLaunchCommand yields a non-empty argv and the session env", () => { + const { argv, env } = adapter.buildLaunchCommand({ + worktree: dir, + sessionName: "middle-60", + sessionToken: "tok", + envOverrides: { MIDDLE_EPIC: "60" }, + }); + expect(argv.length).toBeGreaterThan(0); + expect(argv).not.toContain("-p"); // never headless (claude) + expect(argv).not.toContain("exec"); // never headless (codex) + expect(env.MIDDLE_SESSION).toBe("middle-60"); + expect(env.MIDDLE_SESSION_TOKEN).toBe("tok"); + expect(env.MIDDLE_EPIC).toBe("60"); + }); + + test("buildPromptText: initial is the skill slash-command on the Epic", () => { + expect( + adapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "initial", epicNumber: 60 }), + ).toBe("/implementing-github-issues implement #60"); + }); + + test("buildPromptText: recommender / docs force-invoke their skill with the @-ref", () => { + expect(adapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "recommender" })).toBe( + "/recommending-github-issues @.middle/prompt.md", + ); + expect(adapter.buildPromptText({ promptFile: ".middle/prompt.md", kind: "docs" })).toBe( + "/documenting-the-repo @.middle/prompt.md", + ); + }); + + test("installHooks writes the shared hook.sh + pr-ready-gate.sh into the worktree", async () => { + const { worktree } = worktreeWithMiddle(); + await adapter.installHooks({ + worktree, + hookScriptPath: ".middle/hooks/hook.sh", + dispatcherUrl: "http://127.0.0.1:4120", + sessionName: "middle-60", + sessionToken: "tok", + epicNumber: 60, + }); + const hook = await Bun.file(join(worktree, ".middle/hooks/hook.sh")).text(); + expect(hook).toStartWith("#!/bin/sh"); + const gate = await Bun.file(join(worktree, ".middle/hooks/pr-ready-gate.sh")).text(); + expect(gate).toContain("/gates/pr-ready"); + }); + + // The sentinel resolution in classifyStop is adapter-agnostic (the `.middle/` + // files are written by the universal skill), so it MUST behave identically + // across adapters. These assertions run against every adapter and must match. + test("classifyStop: blocked.json → asked-question", () => { + const { worktree, middle } = worktreeWithMiddle(); + writeFileSync(join(middle, "blocked.json"), JSON.stringify({ question: "Q?" })); + const r = adapter.classifyStop({ + payload: { cwd: worktree }, + transcriptPath: join(dir, "missing.jsonl"), + sentinelPresent: true, + worktree, + }); + expect(r.kind).toBe("asked-question"); + }); + + test("classifyStop: done.json → done; failed.json → failed; neither → bare-stop", () => { + const { worktree, middle } = worktreeWithMiddle(); + const transcriptPath = join(dir, "empty.jsonl"); + writeFileSync(transcriptPath, ""); + + expect( + adapter.classifyStop({ payload: {}, transcriptPath, sentinelPresent: false, worktree }).kind, + ).toBe("bare-stop"); + + writeFileSync(join(middle, "done.json"), "{}"); + expect( + adapter.classifyStop({ payload: {}, transcriptPath, sentinelPresent: false, worktree }).kind, + ).toBe("done"); + + rmSync(join(middle, "done.json")); + writeFileSync(join(middle, "failed.json"), JSON.stringify({ reason: "x" })); + expect( + adapter.classifyStop({ payload: {}, transcriptPath, sentinelPresent: false, worktree }).kind, + ).toBe("failed"); + }); + + test("detectRateLimit is implemented and returns null on a clean transcript", () => { + const transcriptPath = join(dir, "clean.jsonl"); + writeFileSync(transcriptPath, JSON.stringify({ type: "x", message: { content: "all good" } })); + expect(adapter.detectRateLimit).toBeDefined(); + expect(adapter.detectRateLimit!({ payload: {}, transcriptPath })).toBeNull(); + }); +}); diff --git a/planning/issues/60/decisions.md b/planning/issues/60/decisions.md index 31f447b4..a5d90422 100644 --- a/planning/issues/60/decisions.md +++ b/planning/issues/60/decisions.md @@ -53,3 +53,72 @@ resolution is the same for every adapter. Duplicating it now keeps #61 self-cont #63 is where cross-adapter shared logic gets factored. **Evidence:** Build spec line 795 (Codex rate-limit pattern); Claude `classify.ts`. + +## #63 abstraction-leak audit: findings and dispositions +**File(s):** repo-wide +**Date:** 2026-05-26 + +**Decision:** Audited every adapter-name reference outside the adapter packages +(`grep '"claude"|"codex"|=== "claude"' ... | grep /src/`). Dispositions: + +- **Primary leak — FIXED (#62):** the two hardcoded `getAdapter` registries + (`dispatcher/main.ts`, `cli/docs.ts`) and the four "only claude in Phase 1" + gates. Replaced by the shared registry (`dispatcher/adapters.ts`) + registry- + based validation. This was the real abstraction leak — adapter *dispatch* logic + hardcoded to one CLI. +- **`mm doctor` — FIXED (this phase):** checked only the `claude` binary. Now + checks every configured+enabled adapter's binary; a missing one is a *warning* + (you can still dispatch with the installed adapter), not a blocking failure. +- **Dashboard slot-pill fallback — FIXED (this phase):** `db-deps.ts` fell back to + `["claude"]` with a now-false comment ("codex is a later phase, would 400"). + Updated to `["claude", "codex"]`, matching the banner. +- **`state-issue/parser.ts` + `recommender.ts` rate-limit pair — DELIBERATE + EXCEPTION:** both hardcode the `{claude, codex}` pair when parsing/emitting the + state issue's Rate-limits section. This is *schema-bound*, not an abstraction + leak: `schemas/state-issue.v1.md` fixes the rate-limit lines to exactly those + two adapters. Generalizing to N adapters is a schema (v2) change, out of scope + here. Left as-is by design. + +**Why:** The criterion is "fixed, or documented as a deliberate exception." The +dispatch-path leak is fixed; the two tooling leaks are cheap and in the +abstraction's blast radius, so fixed in-pass; the schema-bound pair is a +deliberate, documented exception gated on a schema bump. + +**Evidence:** `dispatcher/test/adapter-conformance.test.ts` drives both adapters +through the same registry + interface calls and asserts identical sentinel +classification — the automated proof the abstraction holds. + +## The `AgentAdapter` interface did not need to change for Codex +**File(s):** `packages/core/src/adapter.ts` +**Date:** 2026-05-26 + +**Decision:** Codex was implemented against the existing `AgentAdapter` interface +verbatim — no member added, removed, or re-typed. The per-CLI differences +(config-driven auto mode vs. dialog-dismissal, `.codex/config.toml` `[hooks]` vs. +`.claude/settings.json`, rollout JSONL vs. Claude transcript, the rate-limit +pattern) all fit behind the existing methods. This is the Epic's headline +signal: the abstraction held for a second, structurally-different CLI. + +**Why:** Validates the interface design. The only friction points were *empirical* +(Codex's observable formats), not *structural* (the interface shape) — and the +interface already isolates the empirical bits behind `installHooks` / +`resolveTranscriptPath` / `readTranscriptState` / `classifyStop`. + +**Evidence:** `git diff` touches no signature in `packages/core/src/adapter.ts`; +both adapters satisfy the same conformance suite. + +## Live dual-dispatch (Epic headline + #63 criterion 1) is an operator step +**File(s):** n/a +**Date:** 2026-05-26 + +**Decision:** "Dispatch the same issue once per adapter on a test repo, both as +interactive tmux sessions, and confirm conforming output" requires a running +Codex CLI with OpenAI credentials. The Codex CLI is not installed in the dispatch +sandbox, so this is surfaced in the reviewer's brief as the operator-executed +acceptance step (with the exact commands), not run here. + +**Why:** It is inherently a live-environment verification (real CLIs, real auth, a +real test repo). Everything mechanically verifiable headless — the adapter impl, +the selection logic, the same-path cross-adapter conformance test, and the leak +audit — is delivered and green. The live run confirms the empirical baselines and +is the natural point to tighten them. From 420e1997e7781cf3827d63dc5bda88b0351eff29 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 14:02:54 -0400 Subject: [PATCH 05/23] =?UTF-8?q?fix(adapters):=20self-review=20hardening?= =?UTF-8?q?=20=E2=80=94=20enabled-aware=20dispatch=20+=20word-boundaried?= =?UTF-8?q?=20429?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal review-loop findings (resolve the class, each with a test): - mm dispatch resolved its adapter set from Object.keys(config.adapters), ignoring the enabled flag and the registry — so a disabled or unimplemented adapter could be force-dispatched. Now "available" = configured AND enabled AND implemented (intersect with knownAdapters()), matching the daemon's validation. Test: --adapter codex with codex disabled → exit 1, no dispatch. - Codex rate-limit regex matched 429 as a bare substring (e.g. "line 4290", "commit 4291ab"), which would falsely mark a healthy agent rate-limited and skip it. Word-boundaried to \b429\b. Tests: 4290/4291ab/14290/42900 → bare-stop. --- packages/adapters/codex/src/classify.ts | 14 ++++---- packages/adapters/codex/test/adapter.test.ts | 24 ++++++++++++++ packages/cli/src/commands/dispatch.ts | 10 +++++- packages/cli/test/dispatch.test.ts | 34 ++++++++++++++++++++ packages/core/src/select-adapter.ts | 6 +++- 5 files changed, 80 insertions(+), 8 deletions(-) diff --git a/packages/adapters/codex/src/classify.ts b/packages/adapters/codex/src/classify.ts index ec42923c..21f91df1 100644 --- a/packages/adapters/codex/src/classify.ts +++ b/packages/adapters/codex/src/classify.ts @@ -8,13 +8,15 @@ import type { } from "@middle/core"; /** - * Codex's rate-limit signal. The spec's deliberately generous starting pattern - * (`/rate.?limit|429|too many requests/i`), to be tightened as Codex's real - * usage-limit messages are observed on live runs. Unlike Claude's, it carries no - * reset timestamp — Codex's message format hasn't surfaced one, so the - * classification defaults `resetAt` to "unknown" rather than inventing a time. + * Codex's rate-limit signal. The spec's deliberately generous starting pattern, + * to be tightened as Codex's real usage-limit messages are observed on live runs. + * `429` is word-boundaried (`\b429\b`) so it matches the HTTP status, not an + * incidental substring — a transcript tail is full of line numbers, hashes, and + * byte counts ("line 4290", "commit 4291ab"), and a false `rate-limited` would + * halt a healthy agent. Unlike Claude's, the pattern carries no reset timestamp + * (Codex's format hasn't surfaced one), so `resetAt` defaults to "unknown". */ -const RATE_LIMIT_RE = /rate.?limit|429|too many requests/i; +const RATE_LIMIT_RE = /rate.?limit|\b429\b|too many requests/i; /** * Classify the agent's state at a turn-end (`Stop`) hook. The sentinel logic is diff --git a/packages/adapters/codex/test/adapter.test.ts b/packages/adapters/codex/test/adapter.test.ts index b3a8f781..55f77815 100644 --- a/packages/adapters/codex/test/adapter.test.ts +++ b/packages/adapters/codex/test/adapter.test.ts @@ -281,6 +281,30 @@ describe("classifyStop", () => { expect(result.kind).toBe("rate-limited"); }); + test.each([ + ["line 4290 of the file", "4290 — a line number"], + ["commit 4291abcdef", "4291 in a hash"], + ["listening on port 14290", "embedded 4290"], + ["processed 42900 rows", "42900"], + ])("a bare %p is NOT a rate-limit signal → bare-stop (%s)", (text) => { + const { cwd, transcript } = writeMiddleDir(); + writeFileSync( + transcript, + JSON.stringify({ + timestamp: "2026-05-14T12:30:00.000Z", + type: "response_item", + payload: { type: "message", role: "assistant", content: [{ type: "text", text }] }, + }), + ); + const result = codexAdapter.classifyStop({ + payload: { cwd }, + transcriptPath: transcript, + sentinelPresent: false, + worktree: cwd, + }); + expect(result.kind).toBe("bare-stop"); + }); + test("done.json sentinel → done", () => { const { cwd, middle, transcript } = writeMiddleDir(); writeFileSync(join(middle, "done.json"), JSON.stringify({ pr: 155 })); diff --git a/packages/cli/src/commands/dispatch.ts b/packages/cli/src/commands/dispatch.ts index 5f2a05e6..6259095a 100644 --- a/packages/cli/src/commands/dispatch.ts +++ b/packages/cli/src/commands/dispatch.ts @@ -1,6 +1,7 @@ import { existsSync } from "node:fs"; import { basename, join, resolve } from "node:path"; import { loadConfig, selectAdapter } from "@middle/core"; +import { knownAdapters } from "@middle/dispatcher/src/adapters.ts"; import { type StartOptions, runStart } from "./start.ts"; export type DispatchOptions = { @@ -283,7 +284,14 @@ export async function runDispatch( // (selectAdapter rules 1–2). The rate-limit switch (rules 3–4) is the // recommender/auto-dispatch path's concern — a force-dispatch is an explicit // human act, so it doesn't second-guess a rate-limited choice here. - const available = Object.keys(config.adapters); + // + // "Available" = configured AND enabled AND implemented (in the registry) — the + // same set the daemon validates against, so a disabled or unimplemented adapter + // is rejected here with a clear message instead of as a late daemon 400. + const known = new Set(knownAdapters()); + const available = Object.entries(config.adapters) + .filter(([name, a]) => a.enabled && known.has(name)) + .map(([name]) => name); let adapterName: string; if (opts.adapter !== undefined) { if (!available.includes(opts.adapter)) { diff --git a/packages/cli/test/dispatch.test.ts b/packages/cli/test/dispatch.test.ts index 9f070a89..a86f579f 100644 --- a/packages/cli/test/dispatch.test.ts +++ b/packages/cli/test/dispatch.test.ts @@ -333,6 +333,40 @@ describe("runDispatch — control client", () => { } }); + test("a disabled adapter is rejected (exit 1), even via --adapter, before any dispatch", async () => { + const repoPath = makeRepo(); + const { server, dispatchBodies } = fakeDaemon({ states: ["completed"] }); + // Disable codex in config; --adapter codex must not dispatch. + const configPath = join(dir, "config.toml"); + writeFileSync( + configPath, + [ + "[global]", + `dispatcher_port = ${server.port}`, + `db_path = "${join(dir, "db.sqlite3")}"`, + `worktree_root = "${join(dir, "worktrees")}"`, + `log_dir = "${join(dir, "logs")}"`, + "", + "[adapters.codex]", + "enabled = false", + "", + ].join("\n"), + ); + const restore = silenceLogs(); + try { + const code = await runDispatch(repoPath, "6", { + configPath, + startDaemon: () => 0, + adapter: "codex", + }); + expect(code).toBe(1); + expect(dispatchBodies).toEqual([]); // never reached the daemon + } finally { + restore(); + server.stop(true); + } + }); + test("an unconfigured --adapter is rejected (exit 1) before any dispatch", async () => { const repoPath = makeRepo(); const { server, dispatchBodies } = fakeDaemon({ states: ["completed"] }); diff --git a/packages/core/src/select-adapter.ts b/packages/core/src/select-adapter.ts index b66736a0..b0ecbe95 100644 --- a/packages/core/src/select-adapter.ts +++ b/packages/core/src/select-adapter.ts @@ -22,7 +22,11 @@ export type AdapterSelectionInput = { labels: readonly string[]; /** `config.global.default_adapter`. */ defaultAdapter: string; - /** Configured + implemented adapter names a choice is validated against. */ + /** + * The dispatchable adapter names a choice is validated against — the caller's + * already-filtered set of configured, enabled, and implemented adapters. A + * label/default outside this set throws. + */ available: readonly string[]; /** Adapters whose rate limit has not yet reset. Defaults to none. */ rateLimited?: ReadonlySet; From aae7c2618b176146ab31f43ea6d08e3f4a58e30d Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 14:10:58 -0400 Subject: [PATCH 06/23] =?UTF-8?q?docs(adapters):=20record=20empty=20resume?= =?UTF-8?q?=20=E2=80=94=20re-parked=20on=20the=20live-run=20criterion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- planning/issues/60/decisions.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/planning/issues/60/decisions.md b/planning/issues/60/decisions.md index a5d90422..e4c64455 100644 --- a/planning/issues/60/decisions.md +++ b/planning/issues/60/decisions.md @@ -122,3 +122,27 @@ real test repo). Everything mechanically verifiable headless — the adapter imp the selection logic, the same-path cross-adapter conformance test, and the leak audit — is delivered and green. The live run confirms the empirical baselines and is the natural point to tighten them. + +## Resume #1 carried no human answer — re-parked on the same criterion +**File(s):** `.middle/blocked.json` +**Date:** 2026-05-26 + +**Decision:** A resume fired (prompt: "a human answered the open question"), but no +substantive answer was present on any surface. Verified: the agent-question comment +(id 4547214595) was created 18:06 and never edited (`created == updated`); the Epic, +#63, and PR #155 carry no authorizing comment, no `approved` label, and no +live-dispatch artifacts; the injected "reply" was my own parked question echoed back, +attributed to the account that posts on the agent's behalf. So the live-run criterion +remains genuinely unanswered. Re-parked via `.middle/blocked.json` with a sharpened +message stating the prior resume was empty and naming the exact action needed. + +**Why:** I cannot honestly satisfy the PR-ready gate. Option (a) is impossible here +(no `codex` binary, no creds). Option (b) requires a genuine non-bot authorization +comment whose URL backs the `(deferred: )` annotation — and none exists. +Annotating with my own question comment's URL would game the gate; the integrity rule +(must not self-author the deferral) forbids it. Everything else is airtight: +typecheck clean, 794 pass / 0 fail, lint + format clean, branch MERGEABLE / CLEAN. + +**Evidence:** `gh api /repos/thejustinwalsh/middle/issues/comments/4547214595` +(created == updated); `gh issue view 60/63 --json labels,comments`; `gh pr view 155 +--json mergeable,mergeStateStatus` → MERGEABLE / CLEAN. From 47e2bb50a044712496f1ec5ad660aa2be85852bf Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 14:21:17 -0400 Subject: [PATCH 07/23] =?UTF-8?q?docs(adapters):=20diagnose=20resume=20loo?= =?UTF-8?q?p=20as=20a=20channel=20mismatch=20=E2=80=94=20re-park=20on=20a?= =?UTF-8?q?=20one-click=20ask?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- planning/issues/60/decisions.md | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/planning/issues/60/decisions.md b/planning/issues/60/decisions.md index e4c64455..80509786 100644 --- a/planning/issues/60/decisions.md +++ b/planning/issues/60/decisions.md @@ -146,3 +146,46 @@ typecheck clean, 794 pass / 0 fail, lint + format clean, branch MERGEABLE / CLEA **Evidence:** `gh api /repos/thejustinwalsh/middle/issues/comments/4547214595` (created == updated); `gh issue view 60/63 --json labels,comments`; `gh pr view 155 --json mergeable,mergeStateStatus` → MERGEABLE / CLEAN. + +## Resume #2 — diagnosed the loop as a channel mismatch; re-parked on a one-click ask +**File(s):** `.middle/blocked.json` +**Date:** 2026-05-26 + +**Decision:** A third resume fired, again titled "a human answered." This time the +injected reply *did* carry the maintainer's words — it elaborated both options (a) +and (b) operationally and signed "— @thejustinwalsh" — but it did **not** pick one, +and (critically) it never landed as a #60 comment or label. Verified: #60's only +comments are the plan comment + my two agent-question parks (all type `User`); #60's +labels are `phase:10, dogfood, epic` — **no `approved` label**; #63 still open; no +live-run artifacts anywhere. So none of the three *sanctioned, mechanically-readable* +unblock signals exists. Re-parked with a sharpened message that (1) names the +**`approved` label** as the one-click primary path, and (2) diagnoses **why** the +loop persists. + +**Why — the channel mismatch (the real finding):** the maintainer's replies arrive +through middle's dispatch-resume channel (injected into `.middle/prompt.md`), which +middle does **not** mirror to the issue as a #60 comment. So the PR-ready gate +(`packages/dispatcher/src/gates/pr-ready.ts`, which resolves a `(deferred: )` +comment author and checks non-bot) and I can only mechanically read #60 +**comments/labels** — never the resume reply. A bare resume therefore can never +unblock this, no matter how substantive its prose. The fix is to route the +authorization to a surface the gate reads: the `approved` label (one click, +sanctioned by the dispatch rules as "proceed with best judgment") or a one-line #60 +comment. + +**Why not self-author anyway:** the maintainer offered "(a) I run it / (b) defer." +Only (b) is even available to the agent (no `codex`/creds — re-verified this resume: +`command -v codex` empty, no `~/.codex`, no OpenAI env). It is tempting to treat +"(a) unavailable + repeated resumes + 'Everything else is done and verified'" as +authorization-by-elimination and post my own deferral comment. I declined: the gate +exists precisely to stop an agent waving through its own deferral, the maintainer has +not used any sanctioned channel, and marking ready on a self-authored sign-off would +hollow out the gate. Re-parking with a one-click ask preserves the integrity line +*and* gives the loop a real exit. If the maintainer adds `approved` (or comments), +the next resume is genuinely actionable: I post a deferral comment citing that +sanctioned authorization, annotate criterion 2, and proceed to PR-ready. + +**Evidence:** `command -v codex` → empty; `ls ~/.codex` → absent; `env | grep -i +openai` → empty; `gh issue view 60 --json labels` → no `approved`; `gh api +repos/.../issues/60/comments` → 3 comments, last is my RE-PARK; `bun test` 794/0, +typecheck/lint/format clean, `gh pr view 155` → MERGEABLE / CLEAN (re-verified). From 4330461675a04c1e9c31eeb837c67f9c230d12b3 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Tue, 26 May 2026 14:40:36 -0400 Subject: [PATCH 08/23] =?UTF-8?q?docs(adapters):=20correct=20resume-loop?= =?UTF-8?q?=20premise=20=E2=80=94=20Codex=20now=20installed;=20real=20bloc?= =?UTF-8?q?ker=20is=20daemon-runs-main?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- planning/issues/60/decisions.md | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/planning/issues/60/decisions.md b/planning/issues/60/decisions.md index 80509786..b32b8702 100644 --- a/planning/issues/60/decisions.md +++ b/planning/issues/60/decisions.md @@ -189,3 +189,73 @@ sanctioned authorization, annotate criterion 2, and proceed to PR-ready. openai` → empty; `gh issue view 60 --json labels` → no `approved`; `gh api repos/.../issues/60/comments` → 3 comments, last is my RE-PARK; `bun test` 794/0, typecheck/lint/format clean, `gh pr view 155` → MERGEABLE / CLEAN (re-verified). + +## Resume #4 — prior premise disproven (Codex IS installed); real blocker is daemon-runs-main +**File(s):** `.middle/blocked.json`, `planning/issues/60/decisions.md` +**Date:** 2026-05-26 + +**Decision:** A fourth resume fired ("a human answered"), again echoing my own re-park +text with no new gate-readable signal (#60 still has labels `phase:10, dogfood, epic` +— **no `approved`** — and no new authorizing comment). BUT this resume the environment +**materially changed**, which I verified before acting: `codex` is now on PATH +(`@openai/codex` 0.133.0), `~/.codex/auth.json` exists with `OPENAI_API_KEY` + tokens, +`codex doctor` is all-green, `codex login status` → "Logged in using ChatGPT", and +`claude` 2.1.150 is healthy. The auth files are timestamped **today 14:21–14:24** — the +maintainer installed + logged into Codex in direct response to my park #3, which named +"BOTH CLIs installed and authenticated" as option (3)'s prerequisite. So my prior parks' +core premise — "structurally impossible: no `codex`, no `~/.codex`, no creds" — is now +**false**, and I corrected the record rather than re-asserting it. + +**The real, newly-identified blocker (sound, and different from the prior three):** the +live dual-dispatch still cannot run cleanly *from inside this dispatch*, for a reason +that has nothing to do with tooling: +- `mm`/the dispatcher daemon run from `~/Developer/middle` checked out at **`main`** + (HEAD `720044c`). `main` does **not** contain this PR's adapter-registry wiring — + `packages/dispatcher/src/adapters.ts` is absent there, and my commits are not in + `origin/main` (PR #155 unmerged). So the running daemon literally lacks the CodexAdapter + registry under test. +- The **only** daemon is the one hosting **this very workflow (#60)** on port 4120 + (the `middle-thejustinwalsh-middle-60` tmux session). Making it run my branch's code + means restarting it — which **aborts my own run**. +- There is **no global `~/.middle/config.toml`**, and that single path is shared between + my session's `mm`/gate-hook invocations and any daemon I'd start. I cannot stand up + isolated parallel infra (separate port/db/worktree-root) without risking my own + session's gate behavior via that shared config path. Concretely: an isolated parallel + daemon is a fragile, collision-prone, ~2-nested-agent-run operation whose worst case + (port/config/worktree collision) destabilizes the daemon hosting my own workflow and + loses the entire run. The asymmetry (catastrophic worst case vs. a clean re-park) + rules out attempting it autonomously. + +**Conclusion:** criterion 2 is genuinely a **post-merge operator step** — the daemon +must execute this PR's code, which happens naturally once #155 lands on `main`. This is +exactly what #63 tracks. (Note: even the maintainer running `mm dispatch … --adapter +codex` *pre-merge* would hit `main`'s `mm`, which lacks the CodexAdapter registry — so +option (3) as I originally framed it doesn't work for them either until merge or a manual +branch-checkout + daemon restart.) + +**Gate weakness discovered (not exploited):** the PR-ready gate's +`EVIDENCE_RE = /(https?:\/\/\S+|#\d+)/` means a criterion line that *incidentally* +references any `#` (e.g. criterion 2's current "see the blocked question on +**#60**") **auto-passes** the gate as "has evidence", regardless of whether the criterion +is met. So `gh pr ready` would mechanically succeed right now. I deliberately did **not** +exploit this — shipping an unmet criterion on an incidental `#`-ref is the same +gate-gaming I refused via self-authored deferrals; the maintainer's intent (human sign-off +on deferrals) governs over the buggy letter. Candidate follow-up: tighten +`pr-ready.ts` so an evidence link must be distinguishable from an incidental issue +cross-ref (out of this Epic's scope — pre-existing gate logic). + +**Why re-park (and why it's not the same loop):** I corrected a false premise and +identified the true, sound reason criterion 2 is a post-merge step. The remaining +decision is now *purely* "authorize deferring criterion 2 to the #63 post-merge step on a +gate-readable surface" — simpler than before. I will still not self-author the deferral; +`approved` (a deliberate, dispatch-rules-sanctioned human action) or a one-line human +comment is what makes the next resume cleanly actionable. Re-parked with corrected, +simplified reasoning + a thank-you for installing Codex. + +**Evidence:** `codex --version` → 0.133.0; `codex login status` → "Logged in using +ChatGPT"; `readlink -f ~/.bun/bin/mm` → `/home/tjw/Developer/middle/packages/cli/src/index.ts`; +`git -C ~/Developer/middle branch --show-current` → `main`; main lacks +`packages/dispatcher/src/adapters.ts`; `ls ~/.middle/config.toml` → absent; +`gh issue view 60 --json labels` → no `approved`; `bun test` 794/0, typecheck/lint/format +clean, `git rev-list --count origin/main...HEAD` → 7 ahead/0 behind, `gh pr view 155` → +MERGEABLE / CLEAN (all re-verified this resume). From e62f1c6069297a6eabf4128821c11ec5a696e3ed Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:04:40 -0400 Subject: [PATCH 09/23] docs(reconcilers): plan the open-PR divergence reconciler (Epic #168) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six-phase workstream: classify divergence → rebase helper → -X ours merge fallback → applySuccess (push+comment) → applyDemoteToWork (draft+reopen+ enqueue) → reconcileOpenPRs + poller-tick + MERGED-transition immediate sweep. --- planning/issues/168/plan.md | 51 +++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 planning/issues/168/plan.md diff --git a/planning/issues/168/plan.md b/planning/issues/168/plan.md new file mode 100644 index 00000000..06860f3f --- /dev/null +++ b/planning/issues/168/plan.md @@ -0,0 +1,51 @@ +# Epic #168: Open-PR reconciler — rebase or demote stale PRs after a main merge + +**Link:** https://github.com/thejustinwalsh/middle/issues/168 +**Branch:** middle-issue-168 + +## Goal + +When one Epic PR merges to `main`, other open Epic PRs may become non-mergeable. Add an autonomous reconciler that detects that divergence, tries to fix it (rebase first, `-X ours` merge-commit fallback per CLAUDE.md's *new-work-as-base*), and on double-failure demotes the PR back to draft, reopens its last sub-issue, and re-enqueues the Epic — so a ready PR never silently rots between a `main` merge and the human's final-merge gate. + +## Approach + +- New module: `packages/dispatcher/src/reconcilers/pr-divergence.ts` — the whole reconciler lives here so the chain (classify → rebase → merge-fallback → apply-success / demote) composes from one file's exports. +- Persistence: one new migration `006_pr_divergence_state.sql` adding `pr_divergence_state(pr_number, state, classified_at)`; mirrors the small-row pattern from `005_epics.sql`. `state` widens beyond classifier output to include `DEMOTED`/`SKIPPED` (per #172 / #174). +- Identification: middle-managed PR = head-ref starts with `middle-issue-` (the existing `createWorktree` branch convention). The classifier uses `gh pr view --json mergeable,mergeStateStatus,headRefName` so per-PR cost is one `gh` call. +- Composition: helpers are **pure functions** (no daemon imports). The trigger sibling (#174) is what wires them onto the daemon's poller tick — one place for the sequencing logic so the helpers stay independently testable. +- Integration tests for the git helpers use the same in-process fixture-repo pattern the existing worktree tests use (a real local `git init` + temp worktree, no remote). GitHub-shaped helpers (success/demote) use injected gateway stubs the way `reconcile.test.ts` does. + +## Phases + +Each phase = one sub-issue. They land on this single branch and PR in this order. + +1. **#169 — classify divergence.** Module skeleton + `classifyDivergence(prNumber)` reading `gh pr view --json mergeable,mergeStateStatus`. New migration `006_pr_divergence_state.sql`. Unit tests over each branch of the classifier; SQLite writes round-trip. +2. **#170 — rebase helper.** `tryRebaseOntoMain(prNumber)`: resolve worktree → `git fetch origin main` → `git rebase origin/main` → on conflict, `git rebase --abort` and report conflicting paths. Integration test against a fixture repo with FF / non-FF-no-conflict / conflict cases. +3. **#171 — merge-commit fallback.** `tryMergeMainNewWorkAsBase(prNumber)`: `git fetch origin main` + `git merge -X ours origin/main` (approximates new-work-as-base — branch's version wins on conflict so main's net-new lands on top). On the rare structural conflict `-X ours` can't auto-resolve, `git merge --abort` + return paths. Fixture-repo integration test for both paths. +4. **#172 — applySuccess.** `applySuccess(prNumber, resolution, mainCommitSha)`: `git push --force-with-lease`, post one PR comment (`🔁 Reconciled with main (...) after `), update `pr_divergence_state → CLEAN`. Idempotent (no double-push, no double-comment) — verified via gateway stub recording exact call counts. +5. **#173 — applyDemoteToWork.** `applyDemoteToWork(prNumber, conflictingPaths)`: convert PR → draft, identify Epic from sub-issue parent hierarchy, reopen most-recently-closed sub-issue with an escalation comment, post the same comment on both Epic and PR (dual-surface per CLAUDE.md), re-enqueue the Epic through the recommender entry point (so ranking still applies), set state → `DEMOTED`. Idempotent across re-calls. +6. **#174 — wire trigger.** `reconcileOpenPRs()` orchestrator that lists open middle-managed PRs (`headRefName ~ /^middle-issue-/`) and runs the chain. Wire into `poller-cron.ts` alongside `reconcileMergedParks`, with one **immediate sweep** when the existing MERGED-PR detection observes a transition. Rate-limit-aware skip (records `SKIPPED`). End-to-end integration test on a two-PR fixture: merge of one triggers reconciliation of the other; rerunning is idempotent. + +## Files likely to change + +- `packages/dispatcher/src/reconcilers/pr-divergence.ts` — **new module** (the whole reconciler; classifier + helpers + orchestrator). +- `packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql` — **new migration**. +- `packages/dispatcher/src/poller-gateway.ts` — extend the gateway with the small surface the reconciler needs (`getMergeability`, `listOpenManagedPrs`, `pushForceWithLease`, `postPrComment`, `convertPrToDraft`, `reopenIssue`, `lastClosedSubIssue`, `headSha`, `enqueueEpic`). Keep narrow; mirror the existing inject-able gateway pattern. +- `packages/dispatcher/src/github.ts` — only if a needed surface (PR comment post, draft conversion, reopen) doesn't already exist; reuse where it does. +- `packages/dispatcher/src/poller-cron.ts` — call `reconcileOpenPRs` per tick + on the MERGED-transition path. Keep the existing pass isolation (`try`/`catch` per pass). +- `packages/dispatcher/src/main.ts` — wire the reconciler's deps into the daemon's `startPoller` call (no public-surface change; reuses the existing `repoPaths` registry for `repoPath`). +- `packages/dispatcher/test/pr-divergence.test.ts` — **new tests**, one per phase, mirroring `reconcile.test.ts` and `poller.test.ts` patterns. +- `packages/dispatcher/test/pr-divergence-integration.test.ts` — **new integration tests** for the git helpers (real fixture repo via `git init`, no remote). +- `packages/dispatcher/src/index.ts` — add re-exports per the module-index frontmatter convention. + +## Out of scope + +- Cross-repo handling — sticks to the current `[repo]` config (sister to #158, which is splitting that policy out; not blocking). +- Migration to a configurable head-ref convention — the `middle-issue-` prefix is hard-coded for now; #158 will lift it into `[repo]`. +- New ranking logic — re-enqueue goes through the existing recommender path so ranking is unchanged. +- Watchdog-style live remediation — this is a poller-tick reconciliation, not a real-time hook. + +## Open questions + +- **Enqueue entry point for demote (#173).** The recommender's existing path (`runRecommenderForRepo` → state issue → auto-dispatch) is the documented one. Will use it directly via a daemon-injected `enqueueEpic(repo, epicNumber)` callback that triggers a recommender run + `scheduleAutoDispatch(repo)`. If a finer-grained "force this exact Epic" seam is wanted, sub-issue #173 will surface it; otherwise the recommender's own ranking handles it. +- **`pr_divergence_state` row identity.** Issue says `pr_number`; the schema is single-repo by current convention. Will key on `pr_number` PRIMARY KEY (matching the spec) since the daemon today operates one repo per managed checkout. If cross-repo becomes real, a follow-up widens to composite `(repo, pr_number)` — flagged in the row's TSDoc. From a620ac0c72516c5e3e18f9589ebf46a02882a690 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:11:42 -0400 Subject: [PATCH 10/23] feat(reconcilers): classify open-PR divergence against main (#169) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New module packages/dispatcher/src/reconcilers/pr-divergence.ts — `classifyDivergence(deps, repo, prNumber)` reads mergeable/mergeStateStatus from an injected gateway and returns one of CLEAN/BEHIND/CONFLICTED/UNKNOWN, persisting the observation in the new `pr_divergence_state` table (006_pr_divergence_state.sql, keyed (repo, pr_number), CHECK-pinned vocabulary). Pure `classifyMergeability` split out so each branch unit-tests without a gateway. Bumped db.test.ts schema-version assertions to 6. --- .../db/migrations/006_pr_divergence_state.sql | 27 +++ .../src/reconcilers/pr-divergence.ts | 128 ++++++++++++++ packages/dispatcher/test/db.test.ts | 14 +- .../dispatcher/test/pr-divergence.test.ts | 156 ++++++++++++++++++ 4 files changed, 318 insertions(+), 7 deletions(-) create mode 100644 packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql create mode 100644 packages/dispatcher/src/reconcilers/pr-divergence.ts create mode 100644 packages/dispatcher/test/pr-divergence.test.ts diff --git a/packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql b/packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql new file mode 100644 index 00000000..71cecffc --- /dev/null +++ b/packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql @@ -0,0 +1,27 @@ +-- 006_pr_divergence_state.sql +-- Most-recent PR mergeability classification used by the open-PR reconciler +-- (Epic #168). One row per managed PR; the reconciler upserts on every pass so +-- the row is the freshest observation, not a history (an audit log is out of +-- scope — observability reads `events` for trends). +-- +-- `state` covers the four classifier outputs (CLEAN/BEHIND/CONFLICTED/UNKNOWN) +-- plus two terminal-ish outcomes the reconciler writes itself (DEMOTED when the +-- escalation path fires, SKIPPED when rate-limited). The CHECK pins the +-- vocabulary so a typo in the reconciler surfaces as a constraint violation, not +-- a silently-bad row the auto-dispatch loop later believes. +-- +-- Keyed on (repo, pr_number) so the same column works for a future multi-repo +-- daemon — a PR number isn't unique across repos. Today the daemon writes one +-- repo's PRs to this table, but the constraint is correct for tomorrow's case. + +CREATE TABLE pr_divergence_state ( + repo TEXT NOT NULL, -- 'owner/name' + pr_number INTEGER NOT NULL, + state TEXT NOT NULL CHECK (state IN ( + 'CLEAN', 'BEHIND', 'CONFLICTED', 'UNKNOWN', 'DEMOTED', 'SKIPPED' + )), + classified_at INTEGER NOT NULL, -- epoch ms of the latest write + PRIMARY KEY (repo, pr_number) +); + +CREATE INDEX idx_pr_divergence_state ON pr_divergence_state(state); diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts new file mode 100644 index 00000000..2eb84ce7 --- /dev/null +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -0,0 +1,128 @@ +import type { Database } from "bun:sqlite"; + +/** + * Open-PR divergence reconciler (Epic #168). When one Epic PR merges to `main`, + * other open Epic PRs may become non-mergeable until rebased; this module + * detects that drift and applies the resolution chain — rebase first, then + * `-X ours` merge fallback (CLAUDE.md's *new-work-as-base*), then demote-to-work + * if both autonomous attempts fail. + * + * Helpers are pure functions over an injected gateway so they unit-test without + * `gh` or the daemon; the trigger sibling (`reconcileOpenPRs`) is what wires + * them into the poller's tick. + */ + +/** + * Mergeability classification for one PR against the latest `main`. + * + * - `CLEAN` — the PR is fully mergeable (`mergeStateStatus = CLEAN` *and* + * `mergeable = MERGEABLE`); no reconciliation needed. + * - `BEHIND` — head is behind `main` but conflict-free; rebase should succeed. + * - `CONFLICTED` — head conflicts with `main`; rebase will conflict, fall back + * to the `-X ours` merge or demote. + * - `UNKNOWN` — anything else (`BLOCKED` by a required check, `HAS_HOOKS`, + * `UNSTABLE`, a transient `UNKNOWN` from GitHub mid-rollup, or a missing + * field). The reconciler skips it this pass; the next tick reclassifies. + * + * Values are GitHub-vocabulary spellings (UPPERCASE) so the CHECK in + * `006_pr_divergence_state.sql` can mirror them as constants without case + * conversion at the persistence boundary. + */ +export type DivergenceState = "CLEAN" | "BEHIND" | "CONFLICTED" | "UNKNOWN"; + +/** `state` values that may appear in the `pr_divergence_state` table — wider + * than {@link DivergenceState} because the reconciler also writes terminal-ish + * outcomes (DEMOTED on escalation, SKIPPED when rate-limited). */ +export type PersistedDivergenceState = DivergenceState | "DEMOTED" | "SKIPPED"; + +/** + * What `gh pr view --json mergeable,mergeStateStatus` shape we read. Kept + * structural (no implementation-defined values beyond the strings GitHub emits) + * so the gateway stub in tests is a plain object literal. + * + * - `mergeable` ∈ { `MERGEABLE`, `CONFLICTING`, `UNKNOWN` } + * - `mergeStateStatus` ∈ { `CLEAN`, `BEHIND`, `DIRTY`, `BLOCKED`, + * `HAS_HOOKS`, `UNSTABLE`, `UNKNOWN` } + * + * Either field may be absent on legacy fixtures — treated as `UNKNOWN`. + */ +export type MergeabilityView = { + mergeable?: string | null; + mergeStateStatus?: string | null; +}; + +/** The narrow GitHub surface the divergence classifier needs — injectable so + * unit tests need no `gh`. */ +export type DivergenceGateway = { + /** Read the PR's mergeability fields, or null if the PR doesn't exist. */ + getMergeability(repo: string, prNumber: number): Promise; +}; + +/** + * Pure classification of a {@link MergeabilityView} into a {@link DivergenceState}. + * Split from {@link classifyDivergence} so tests can hit every branch without + * threading a gateway. The precedence (DIRTY → BEHIND → CLEAN+MERGEABLE → UNKNOWN) + * mirrors the order in #169's acceptance criteria. + */ +export function classifyMergeability(view: MergeabilityView | null): DivergenceState { + if (!view) return "UNKNOWN"; + const status = view.mergeStateStatus ?? null; + const mergeable = view.mergeable ?? null; + if (status === "DIRTY") return "CONFLICTED"; + if (status === "BEHIND") return "BEHIND"; + if (status === "CLEAN" && mergeable === "MERGEABLE") return "CLEAN"; + return "UNKNOWN"; +} + +/** + * Classify a PR's mergeability against the latest `main` and record the + * observation in `pr_divergence_state`. The recorded state is exactly the + * returned {@link DivergenceState} — the reconciler's terminal-ish outcomes + * (DEMOTED/SKIPPED) are written by the success/demote paths (Phase 4 / 5 / 6), + * not by the classifier. + * + * One `gh` call per PR (`mergeable,mergeStateStatus` on `pr view`), so a sweep + * over `N` open PRs costs `N` REST calls — the rate-limit ceiling + * (Phase 6) is what bounds bursts. + */ +export async function classifyDivergence( + deps: { db: Database; github: DivergenceGateway; now?: () => number }, + repo: string, + prNumber: number, +): Promise { + const view = await deps.github.getMergeability(repo, prNumber); + const state = classifyMergeability(view); + recordDivergenceState(deps.db, repo, prNumber, state, (deps.now ?? Date.now)()); + return state; +} + +/** Upsert one row in `pr_divergence_state`. Exposed so the success/demote/skip + * paths can write their own terminal-ish states without re-reading GitHub. */ +export function recordDivergenceState( + db: Database, + repo: string, + prNumber: number, + state: PersistedDivergenceState, + now: number = Date.now(), +): void { + db.run( + `INSERT INTO pr_divergence_state (repo, pr_number, state, classified_at) + VALUES (?, ?, ?, ?) + ON CONFLICT(repo, pr_number) DO UPDATE SET + state = excluded.state, classified_at = excluded.classified_at`, + [repo, prNumber, state, now], + ); +} + +/** Read the current row, or null if the PR has never been classified. */ +export function getDivergenceState( + db: Database, + repo: string, + prNumber: number, +): { state: PersistedDivergenceState; classifiedAt: number } | null { + const row = db + .query("SELECT state, classified_at FROM pr_divergence_state WHERE repo = ? AND pr_number = ?") + .get(repo, prNumber) as { state: string; classified_at: number } | null; + if (!row) return null; + return { state: row.state as PersistedDivergenceState, classifiedAt: row.classified_at }; +} diff --git a/packages/dispatcher/test/db.test.ts b/packages/dispatcher/test/db.test.ts index 75c3765b..3f727a58 100644 --- a/packages/dispatcher/test/db.test.ts +++ b/packages/dispatcher/test/db.test.ts @@ -61,8 +61,8 @@ describe("runMigrations", () => { test("applies every migration and reports the latest version", () => { const db = openDb(dbPath); - expect(runMigrations(db)).toBe(5); - expect(currentSchemaVersion(db)).toBe(5); + expect(runMigrations(db)).toBe(6); + expect(currentSchemaVersion(db)).toBe(6); db.close(); }); @@ -85,8 +85,8 @@ describe("runMigrations", () => { test("is idempotent — running twice leaves version at the latest and does not throw", () => { const db = openDb(dbPath); runMigrations(db); - expect(runMigrations(db)).toBe(5); - expect(currentSchemaVersion(db)).toBe(5); + expect(runMigrations(db)).toBe(6); + expect(currentSchemaVersion(db)).toBe(6); db.close(); }); @@ -159,8 +159,8 @@ describe("runMigrations", () => { ); db.run(`INSERT INTO events (workflow_id, ts, type) VALUES ('w1', 2, 'session.started')`); - // Now apply the remaining migrations (003 rebuild, then 004, then 005) over the seeded data. - expect(runMigrations(db, realDir)).toBe(5); + // Now apply the remaining migrations (003 rebuild, then 004, 005, 006) over the seeded data. + expect(runMigrations(db, realDir)).toBe(6); // The row survived the rebuild... expect( @@ -183,7 +183,7 @@ describe("runMigrations", () => { describe("openAndMigrate", () => { test("opens, migrates, and returns a ready database", () => { const db = openAndMigrate(dbPath); - expect(currentSchemaVersion(db)).toBe(5); + expect(currentSchemaVersion(db)).toBe(6); db.close(); }); }); diff --git a/packages/dispatcher/test/pr-divergence.test.ts b/packages/dispatcher/test/pr-divergence.test.ts new file mode 100644 index 00000000..6f76e3d1 --- /dev/null +++ b/packages/dispatcher/test/pr-divergence.test.ts @@ -0,0 +1,156 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import type { Database } from "bun:sqlite"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { openAndMigrate } from "../src/db.ts"; +import { + classifyDivergence, + classifyMergeability, + type DivergenceGateway, + getDivergenceState, + type MergeabilityView, + recordDivergenceState, +} from "../src/reconcilers/pr-divergence.ts"; + +let scratch: string; +let db: Database; + +beforeEach(() => { + scratch = mkdtempSync(join(tmpdir(), "middle-pr-div-")); + db = openAndMigrate(join(scratch, "db.sqlite3")); +}); + +afterEach(() => { + db.close(); + rmSync(scratch, { recursive: true, force: true }); +}); + +const REPO = "thejustinwalsh/middle"; + +/** A gateway stub returning a fixed mergeability view, recording calls. */ +function makeGateway( + responses: Record, +): DivergenceGateway & { calls: number } { + const g = { + calls: 0, + async getMergeability(_repo: string, prNumber: number) { + g.calls++; + return responses[prNumber] ?? null; + }, + }; + return g; +} + +describe("classifyMergeability", () => { + test("DIRTY → CONFLICTED regardless of mergeable", () => { + expect(classifyMergeability({ mergeStateStatus: "DIRTY", mergeable: "CONFLICTING" })).toBe( + "CONFLICTED", + ); + expect(classifyMergeability({ mergeStateStatus: "DIRTY", mergeable: "UNKNOWN" })).toBe( + "CONFLICTED", + ); + }); + + test("BEHIND → BEHIND", () => { + expect(classifyMergeability({ mergeStateStatus: "BEHIND", mergeable: "MERGEABLE" })).toBe( + "BEHIND", + ); + }); + + test("CLEAN + MERGEABLE → CLEAN", () => { + expect(classifyMergeability({ mergeStateStatus: "CLEAN", mergeable: "MERGEABLE" })).toBe( + "CLEAN", + ); + }); + + test("CLEAN but not MERGEABLE → UNKNOWN (CI gating, secondary signals)", () => { + expect(classifyMergeability({ mergeStateStatus: "CLEAN", mergeable: "UNKNOWN" })).toBe( + "UNKNOWN", + ); + }); + + test("BLOCKED / HAS_HOOKS / UNSTABLE / UNKNOWN → UNKNOWN", () => { + for (const status of ["BLOCKED", "HAS_HOOKS", "UNSTABLE", "UNKNOWN"]) { + expect(classifyMergeability({ mergeStateStatus: status, mergeable: "MERGEABLE" })).toBe( + "UNKNOWN", + ); + } + }); + + test("a null view (PR doesn't exist) → UNKNOWN", () => { + expect(classifyMergeability(null)).toBe("UNKNOWN"); + }); + + test("missing fields → UNKNOWN (legacy fixtures don't tip the classifier)", () => { + expect(classifyMergeability({})).toBe("UNKNOWN"); + expect(classifyMergeability({ mergeStateStatus: "CLEAN" })).toBe("UNKNOWN"); + expect(classifyMergeability({ mergeable: "MERGEABLE" })).toBe("UNKNOWN"); + }); +}); + +describe("classifyDivergence", () => { + test("classifies BEHIND and persists the row with the supplied clock", async () => { + const github = makeGateway({ + 90: { mergeStateStatus: "BEHIND", mergeable: "MERGEABLE" }, + }); + const now = 1_700_000_000_000; + + expect(await classifyDivergence({ db, github, now: () => now }, REPO, 90)).toBe("BEHIND"); + + expect(github.calls).toBe(1); + expect(getDivergenceState(db, REPO, 90)).toEqual({ state: "BEHIND", classifiedAt: now }); + }); + + test("classifies CONFLICTED and overwrites a prior row (upsert keeps the row fresh)", async () => { + const github = makeGateway({ + 90: { mergeStateStatus: "DIRTY", mergeable: "CONFLICTING" }, + }); + + // Pre-existing stale row from an earlier pass. + recordDivergenceState(db, REPO, 90, "BEHIND", 1_000); + + expect(await classifyDivergence({ db, github, now: () => 2_000 }, REPO, 90)).toBe("CONFLICTED"); + expect(getDivergenceState(db, REPO, 90)).toEqual({ + state: "CONFLICTED", + classifiedAt: 2_000, + }); + }); + + test("classifies CLEAN", async () => { + const github = makeGateway({ + 91: { mergeStateStatus: "CLEAN", mergeable: "MERGEABLE" }, + }); + expect(await classifyDivergence({ db, github, now: () => 3_000 }, REPO, 91)).toBe("CLEAN"); + expect(getDivergenceState(db, REPO, 91)?.state).toBe("CLEAN"); + }); + + test("classifies UNKNOWN for a PR with no mergeability view (gone / 404)", async () => { + const github = makeGateway({}); // no entry for prNumber → returns null + expect(await classifyDivergence({ db, github, now: () => 4_000 }, REPO, 99)).toBe("UNKNOWN"); + expect(getDivergenceState(db, REPO, 99)?.state).toBe("UNKNOWN"); + }); +}); + +describe("recordDivergenceState", () => { + test("accepts terminal-ish states (DEMOTED, SKIPPED) written by sibling phases", () => { + recordDivergenceState(db, REPO, 90, "DEMOTED", 100); + expect(getDivergenceState(db, REPO, 90)).toEqual({ state: "DEMOTED", classifiedAt: 100 }); + + recordDivergenceState(db, REPO, 91, "SKIPPED", 200); + expect(getDivergenceState(db, REPO, 91)).toEqual({ state: "SKIPPED", classifiedAt: 200 }); + }); + + test("the CHECK constraint rejects an out-of-vocabulary state — defends against a reconciler typo", () => { + expect(() => { + recordDivergenceState(db, REPO, 90, "BUSTED" as unknown as "CLEAN", 100); + }).toThrow(); + }); + + test("the (repo, pr_number) PK lets the same pr_number coexist across repos", () => { + recordDivergenceState(db, "owner-a/r", 90, "CLEAN", 100); + recordDivergenceState(db, "owner-b/r", 90, "BEHIND", 200); + expect(getDivergenceState(db, "owner-a/r", 90)?.state).toBe("CLEAN"); + expect(getDivergenceState(db, "owner-b/r", 90)?.state).toBe("BEHIND"); + }); +}); From efa77e2da1a77488535ae027e3d968b393529932 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:17:21 -0400 Subject: [PATCH 11/23] feat(reconcilers): rebase helper for an open Epic PR's worktree (#170) `tryRebaseOntoMain(deps, repo, prNumber)` resolves the PR's worktree by its managed `middle-issue-` head ref (creating it via `createWorktree` if absent under `//issue-/`), fetches `origin main`, runs `git rebase origin/main`, and on conflict captures the unmerged paths from `git diff --diff-filter=U` BEFORE aborting with `git rebase --abort` so the worktree returns to a clean state. Pure `GitOps` + `PrHeadRefGateway` seams keep the helper unit-testable; the production `gitOps` (real `git` via `Bun.spawn`) is exercised in a new fixture-repo integration test covering FF / non-FF-no-conflict / conflict + abort, plus skip-signals for a non-managed head ref and a missing PR. Also exports `parseEpicFromHeadRef` and `worktreePathFor` (unit tests pin the managed-ref vocabulary and the layout matches `createWorktree`). --- .../src/reconcilers/pr-divergence.ts | 174 ++++++++++++++ .../test/pr-divergence-integration.test.ts | 214 ++++++++++++++++++ .../dispatcher/test/pr-divergence.test.ts | 31 +++ 3 files changed, 419 insertions(+) create mode 100644 packages/dispatcher/test/pr-divergence-integration.test.ts diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index 2eb84ce7..dd655826 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -1,4 +1,8 @@ +import { existsSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; import type { Database } from "bun:sqlite"; +import { createWorktree, type WorktreeHandle } from "../worktree.ts"; /** * Open-PR divergence reconciler (Epic #168). When one Epic PR merges to `main`, @@ -126,3 +130,173 @@ export function getDivergenceState( if (!row) return null; return { state: row.state as PersistedDivergenceState, classifiedAt: row.classified_at }; } + +// ── Phase 2: rebase helper ────────────────────────────────────────────────── + +/** Outcome of a rebase or merge attempt. Conflict surfaces the file paths so + * the demote-to-work path can name them in its escalation comment. */ +export type GitResolutionResult = { ok: true } | { ok: false; conflictingPaths: string[] }; + +/** Convention from the rest of the daemon (`createWorktree`): the dispatch + * unit for an Epic with number N is the branch `middle-issue-N`. */ +const HEAD_REF_PREFIX = "middle-issue-"; + +/** Parse the Epic number out of a managed PR's head ref, or null if the ref + * doesn't follow the `middle-issue-` convention (e.g. a non-managed PR). */ +export function parseEpicFromHeadRef(headRefName: string): number | null { + if (!headRefName.startsWith(HEAD_REF_PREFIX)) return null; + const tail = headRefName.slice(HEAD_REF_PREFIX.length); + const n = Number(tail); + return Number.isInteger(n) && n > 0 ? n : null; +} + +/** The narrow GitHub surface the rebase/merge helpers need on top of + * {@link DivergenceGateway} — the PR's head ref so we can find its worktree. */ +export type PrHeadRefGateway = { + /** Read the PR's head-ref name (e.g. `middle-issue-32`), or null if the PR + * doesn't exist. */ + getPrHeadRef(repo: string, prNumber: number): Promise; +}; + +/** + * Pure-`git` operations the rebase/merge helpers run inside a worktree. + * Injectable so unit tests don't shell out, but the production implementation + * (`gitOps`) is what the integration test exercises against a real fixture repo. + */ +export type GitOps = { + /** `git fetch origin ` in `cwd`. Throws on a real fetch failure (network, + * no remote); the helper catches and reports it as an UNKNOWN-shaped result. */ + fetch(cwd: string, ref: string): Promise; + /** `git rebase `. Returns `{ ok: true }` on a clean rebase (incl. FF) + * and `{ ok: false, conflictingPaths }` after aborting on conflict. */ + rebase(cwd: string, upstream: string): Promise; +}; + +async function spawnGit( + cwd: string, + args: string[], +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + const proc = Bun.spawn(["git", "-C", cwd, ...args], { stdout: "pipe", stderr: "pipe" }); + const [stdout, stderr] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + ]); + return { stdout, stderr, exitCode: await proc.exited }; +} + +/** + * The conflicting paths from a worktree mid-rebase, read via + * `git diff --name-only --diff-filter=U` (unmerged-only). Empty when no + * conflict markers exist — the rebase's own non-zero exit then has a different + * cause (a missing upstream, a hook refusal) which the helper surfaces as the + * empty-paths variant of the conflict result, never as a thrown error. + */ +async function readConflictingPaths(cwd: string): Promise { + const r = await spawnGit(cwd, ["diff", "--name-only", "--diff-filter=U"]); + if (r.exitCode !== 0) return []; + return r.stdout.split("\n").filter((l) => l.trim() !== ""); +} + +/** The production {@link GitOps}: real `git` via `Bun.spawn`. */ +export const gitOps: GitOps = { + async fetch(cwd: string, ref: string): Promise { + const r = await spawnGit(cwd, ["fetch", "origin", ref]); + if (r.exitCode !== 0) { + throw new Error(`git fetch origin ${ref} failed: ${r.stderr.trim()}`); + } + }, + async rebase(cwd: string, upstream: string): Promise { + const r = await spawnGit(cwd, ["rebase", upstream]); + if (r.exitCode === 0) return { ok: true }; + // Conflict path: capture paths BEFORE abort (abort clears the unmerged + // index, after which `--diff-filter=U` returns nothing). + const conflictingPaths = await readConflictingPaths(cwd); + await spawnGit(cwd, ["rebase", "--abort"]); + return { ok: false, conflictingPaths }; + }, +}; + +/** Default root for `git worktree` dispatch units — must match `createWorktree`. */ +function defaultWorktreeRoot(): string { + return join(homedir(), ".middle", "worktrees"); +} + +/** Where a worktree for `` and Epic `` should live, matching the + * layout `createWorktree` uses. */ +export function worktreePathFor(repo: string, epicNumber: number, worktreeRoot?: string): string { + return join(worktreeRoot ?? defaultWorktreeRoot(), repo, `issue-${epicNumber}`); +} + +/** Deps shared by the git helpers that operate on a managed PR's worktree — + * the rebase helper here, and the merge-commit fallback / success / demote + * helpers in later phases. Worktree resolution / re-creation is centralized + * here so each helper doesn't duplicate the path math + create fallback. */ +export type WorktreeOpsDeps = { + github: PrHeadRefGateway; + git: GitOps; + /** Local checkout path for a repo slug — needed to recreate a missing worktree. */ + resolveRepoPath: (repo: string) => string; + /** Worktree root override; defaults to `~/.middle/worktrees`. */ + worktreeRoot?: string; + /** Worktree creation seam — defaults to `createWorktree`; injectable for tests. */ + createWorktree?: (opts: { + repoPath: string; + repo: string; + issueNumber: number; + worktreeRoot?: string; + }) => Promise; +}; + +/** + * Resolve a PR's worktree path: existing under the worktree root, otherwise + * recreate it via `createWorktree`. Returns `null` when the PR has no managed + * head ref (not a `middle-issue-` branch) — the caller short-circuits and + * leaves the PR alone. Throws only when the head ref looks managed but the + * recreate path fails (a real wiring/disk error worth surfacing). + */ +export async function resolveWorktreePath( + deps: WorktreeOpsDeps, + repo: string, + prNumber: number, +): Promise<{ worktreePath: string; epicNumber: number } | null> { + const headRef = await deps.github.getPrHeadRef(repo, prNumber); + if (!headRef) return null; + const epicNumber = parseEpicFromHeadRef(headRef); + if (epicNumber === null) return null; + const worktreePath = worktreePathFor(repo, epicNumber, deps.worktreeRoot); + if (existsSync(worktreePath)) return { worktreePath, epicNumber }; + const create = deps.createWorktree ?? createWorktree; + await create({ + repoPath: deps.resolveRepoPath(repo), + repo, + issueNumber: epicNumber, + worktreeRoot: deps.worktreeRoot, + }); + return { worktreePath, epicNumber }; +} + +/** + * Attempt to rebase the PR's worktree onto the latest `origin/main`. The + * function is the composition of: + * + * 1. resolve the worktree (existing under the root, else recreate), + * 2. `git fetch origin main`, + * 3. `git rebase origin/main`, + * 4. on conflict → `git rebase --abort` and report the unmerged paths. + * + * It does NOT push (Phase 4 owns the push) and does NOT fall back to a merge + * commit (Phase 3 / the trigger sibling do). On a head ref that isn't a managed + * `middle-issue-` branch, the function reports `ok: false` with empty + * `conflictingPaths` so the caller skips this PR — distinguishable from an + * actual conflict because no paths are surfaced. + */ +export async function tryRebaseOntoMain( + deps: WorktreeOpsDeps, + repo: string, + prNumber: number, +): Promise { + const resolved = await resolveWorktreePath(deps, repo, prNumber); + if (!resolved) return { ok: false, conflictingPaths: [] }; + await deps.git.fetch(resolved.worktreePath, "main"); + return deps.git.rebase(resolved.worktreePath, "origin/main"); +} diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts new file mode 100644 index 00000000..c89aa2e2 --- /dev/null +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -0,0 +1,214 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { existsSync, mkdtempSync, realpathSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { gitOps, tryRebaseOntoMain } from "../src/reconcilers/pr-divergence.ts"; + +/** + * Integration tests for the rebase / merge helpers exercised against real `git` + * (no `gh`, no GitHub). The fixture builds a bare remote + a "main" working + * checkout (which pushes to it) + a "feature" worktree the helper rebases. + * Three rebase cases — clean fast-forward, non-FF without conflict, conflict + + * abort — plus the merge-commit fallback (Phase 3, exercised in a sibling + * describe block once the helper lands). + */ + +const GIT_ENV = { + ...process.env, + GIT_AUTHOR_NAME: "middle-test", + GIT_AUTHOR_EMAIL: "middle-test@example.invalid", + GIT_COMMITTER_NAME: "middle-test", + GIT_COMMITTER_EMAIL: "middle-test@example.invalid", +}; + +async function git(cwd: string, args: string[]): Promise<{ stdout: string; stderr: string }> { + const proc = Bun.spawn(["git", "-C", cwd, ...args], { + stdout: "pipe", + stderr: "pipe", + env: GIT_ENV, + }); + const [stdout, stderr] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + ]); + const code = await proc.exited; + if (code !== 0) { + throw new Error(`git ${args.join(" ")} failed (${code}): ${stderr.trim()}`); + } + return { stdout, stderr }; +} + +let scratch: string; +let remote: string; // bare repo +let work: string; // main-side working checkout (pushes to `remote`) +let worktree: string; // feature-side working checkout (the rebase target) + +async function writeAndCommit( + cwd: string, + file: string, + content: string, + message: string, +): Promise { + writeFileSync(join(cwd, file), content); + await git(cwd, ["add", file]); + await git(cwd, ["commit", "-m", message]); + return (await git(cwd, ["rev-parse", "HEAD"])).stdout.trim(); +} + +beforeEach(async () => { + scratch = realpathSync(mkdtempSync(join(tmpdir(), "middle-pr-div-int-"))); + remote = join(scratch, "remote.git"); + work = join(scratch, "work"); + worktree = join(scratch, "worktree"); + + // Bare remote: where both sides push/fetch. Pin its default branch to `main` + // so the first push lands as `main` regardless of host git defaults. + await git(scratch, ["init", "--bare", "--initial-branch=main", "remote.git"]); + + // Main-side checkout: seeds main, then makes future divergence by pushing + // additional commits to `origin/main`. Init standalone (cloning an empty bare + // repo fails when the requested ref doesn't exist yet) and wire its remote. + await git(scratch, ["init", "--initial-branch=main", "work"]); + await git(work, ["remote", "add", "origin", remote]); + await writeAndCommit(work, "README.md", "init\n", "init"); + await git(work, ["push", "-u", "origin", "main"]); + + // Feature-side checkout: where the helper actually runs. Cloned from the now- + // seeded remote; the branch is the managed `middle-issue-` convention. + await git(scratch, ["clone", "-b", "main", remote, "worktree"]); + await git(worktree, ["checkout", "-b", "middle-issue-32"]); +}); + +afterEach(() => { + rmSync(scratch, { recursive: true, force: true }); +}); + +/** + * Resolve the PR's worktree to the `worktree` checkout above, bypassing the + * worktree-root math. Tests inject this stub so the helper runs against the + * checkout we control. + */ +function deps(stub: { fetch?: typeof gitOps.fetch; rebase?: typeof gitOps.rebase } = {}) { + return { + github: { + async getPrHeadRef() { + return "middle-issue-32"; + }, + }, + git: { ...gitOps, ...stub }, + resolveRepoPath: () => work, + // Override createWorktree so a "missing" worktree is treated as the fixture. + createWorktree: async () => ({ + repoPath: work, + path: worktree, + branch: "middle-issue-32", + repo: "o/r", + unit: "issue-32", + }), + // Force the helper's resolved path TO the fixture worktree, regardless of + // what `worktreePathFor` would compute under `~/.middle/worktrees`. + worktreeRoot: scratch, + }; +} + +/** + * Plant the fixture worktree under the layout the helper expects + * (`//issue-/`) as a symlink to the real `worktree` checkout. + * `existsSync` on the symlink succeeds, the helper uses it, and `git -C` on the + * symlinked path follows through to the real fixture. + */ +async function aliasFixtureUnderRoot(repo: string, epic: number): Promise { + const target = join(scratch, repo, `issue-${epic}`); + const parent = join(scratch, repo); + await Bun.spawn(["mkdir", "-p", parent], { stderr: "ignore", stdout: "ignore" }).exited; + await Bun.spawn(["ln", "-s", worktree, target], { stderr: "ignore", stdout: "ignore" }).exited; +} + +describe("tryRebaseOntoMain — fixture repo", () => { + test("clean fast-forward: feature has no commits past old main; main advanced → rebase FFs", async () => { + // Main advances by one commit; feature is still at the seed. + await writeAndCommit(work, "main-file.txt", "main only\n", "main: add file"); + await git(work, ["push", "origin", "main"]); + + await aliasFixtureUnderRoot("o/r", 32); + const result = await tryRebaseOntoMain(deps(), "o/r", 999); + expect(result).toEqual({ ok: true }); + + // Feature now contains main's new file (fast-forwarded). + expect(existsSync(join(worktree, "main-file.txt"))).toBe(true); + }); + + test("non-FF, no conflict: feature edits A, main edits B, no shared paths → rebase replays cleanly", async () => { + // Feature edits its own file. + await writeAndCommit(worktree, "feature.txt", "feature\n", "feature: add"); + const featureSha = (await git(worktree, ["rev-parse", "HEAD"])).stdout.trim(); + + // Main advances on a disjoint path. + await writeAndCommit(work, "main.txt", "main\n", "main: add"); + await git(work, ["push", "origin", "main"]); + + await aliasFixtureUnderRoot("o/r", 32); + const result = await tryRebaseOntoMain(deps(), "o/r", 999); + expect(result).toEqual({ ok: true }); + + // Both files exist; HEAD has moved (rebase replayed feature on top of main). + expect(existsSync(join(worktree, "feature.txt"))).toBe(true); + expect(existsSync(join(worktree, "main.txt"))).toBe(true); + const newSha = (await git(worktree, ["rev-parse", "HEAD"])).stdout.trim(); + expect(newSha).not.toBe(featureSha); + }); + + test("conflict: feature + main both edit shared.txt → rebase aborts, paths reported, worktree clean", async () => { + // Seed a shared file on main first. + await writeAndCommit(work, "shared.txt", "line\n", "main: seed shared"); + await git(work, ["push", "origin", "main"]); + // Reset the feature worktree to that seed so both sides start from the same point. + await git(worktree, ["fetch", "origin", "main"]); + await git(worktree, ["reset", "--hard", "origin/main"]); + await git(worktree, ["checkout", "-B", "middle-issue-32"]); + + // Feature edits the shared line. + await writeAndCommit(worktree, "shared.txt", "feature edit\n", "feature: edit shared"); + const featureSha = (await git(worktree, ["rev-parse", "HEAD"])).stdout.trim(); + + // Main edits the SAME line. + await writeAndCommit(work, "shared.txt", "main edit\n", "main: edit shared"); + await git(work, ["push", "origin", "main"]); + + await aliasFixtureUnderRoot("o/r", 32); + const result = await tryRebaseOntoMain(deps(), "o/r", 999); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.conflictingPaths).toEqual(["shared.txt"]); + + // Worktree is clean after abort: HEAD back at feature, no rebase state. + expect((await git(worktree, ["rev-parse", "HEAD"])).stdout.trim()).toBe(featureSha); + expect(existsSync(join(worktree, ".git", "rebase-merge"))).toBe(false); + expect(existsSync(join(worktree, ".git", "rebase-apply"))).toBe(false); + }); + + test("a non-managed head ref (not middle-issue-*) → ok:false with empty paths (skip signal)", async () => { + const skipDeps = { + ...deps(), + github: { + async getPrHeadRef() { + return "feature/random"; + }, + }, + }; + const result = await tryRebaseOntoMain(skipDeps, "o/r", 999); + expect(result).toEqual({ ok: false, conflictingPaths: [] }); + }); + + test("a missing PR (gateway returns null) → ok:false with empty paths (skip signal)", async () => { + const skipDeps = { + ...deps(), + github: { + async getPrHeadRef() { + return null; + }, + }, + }; + const result = await tryRebaseOntoMain(skipDeps, "o/r", 999); + expect(result).toEqual({ ok: false, conflictingPaths: [] }); + }); +}); diff --git a/packages/dispatcher/test/pr-divergence.test.ts b/packages/dispatcher/test/pr-divergence.test.ts index 6f76e3d1..a0afcf2d 100644 --- a/packages/dispatcher/test/pr-divergence.test.ts +++ b/packages/dispatcher/test/pr-divergence.test.ts @@ -10,7 +10,9 @@ import { type DivergenceGateway, getDivergenceState, type MergeabilityView, + parseEpicFromHeadRef, recordDivergenceState, + worktreePathFor, } from "../src/reconcilers/pr-divergence.ts"; let scratch: string; @@ -132,6 +134,35 @@ describe("classifyDivergence", () => { }); }); +describe("parseEpicFromHeadRef", () => { + test("parses `middle-issue-` to the integer N", () => { + expect(parseEpicFromHeadRef("middle-issue-32")).toBe(32); + expect(parseEpicFromHeadRef("middle-issue-1")).toBe(1); + expect(parseEpicFromHeadRef("middle-issue-12345")).toBe(12345); + }); + + test("a non-managed head ref → null (the helper skips it)", () => { + expect(parseEpicFromHeadRef("feature/foo")).toBe(null); + expect(parseEpicFromHeadRef("main")).toBe(null); + expect(parseEpicFromHeadRef("")).toBe(null); + }); + + test("a malformed managed ref → null (defends against an inadvertent rename)", () => { + expect(parseEpicFromHeadRef("middle-issue-")).toBe(null); + expect(parseEpicFromHeadRef("middle-issue-abc")).toBe(null); + expect(parseEpicFromHeadRef("middle-issue-32.5")).toBe(null); + // Negative / zero are not valid Epic numbers (issue numbers start at 1). + expect(parseEpicFromHeadRef("middle-issue-0")).toBe(null); + expect(parseEpicFromHeadRef("middle-issue--1")).toBe(null); + }); +}); + +describe("worktreePathFor", () => { + test("uses //issue- — the same layout createWorktree writes", () => { + expect(worktreePathFor("owner/repo", 32, "/wt-root")).toBe("/wt-root/owner/repo/issue-32"); + }); +}); + describe("recordDivergenceState", () => { test("accepts terminal-ish states (DEMOTED, SKIPPED) written by sibling phases", () => { recordDivergenceState(db, REPO, 90, "DEMOTED", 100); From 0d0a2647a944351e4e20f1c44fa3d1e82e8a7193 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:20:15 -0400 Subject: [PATCH 12/23] feat(reconcilers): -X ours merge fallback for rebase-loop conflicts (#171) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `tryMergeMainNewWorkAsBase(deps, repo, prNumber)` runs `git merge --no-edit --no-ff -X ours origin/main` so content collisions resolve new-work-as-base (the branch's version wins; main's net-new lands on top). Mirrors the rebase helper's shape: same worktree resolution, same GitResolutionResult contract, same abort-on-residual-conflict behavior — paths captured BEFORE `git merge --abort` so the worktree returns to a clean state. Integration tests cover both paths against a real fixture repo: - a same-line content collision (rebase would conflict — the fallback lands; feature's content wins; a merge commit lands in history with ≥2 parents) - a rename/rename conflict -X ours can't auto-resolve (abort, paths reported, no leftover .git/MERGE_HEAD) --no-ff is deliberate: even an FF-eligible merge produces a visible merge commit, so a reviewer can see main was folded in (the reconciliation is loud on purpose). --- .../src/reconcilers/pr-divergence.ts | 42 ++++++++++ .../test/pr-divergence-integration.test.ts | 76 ++++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index dd655826..bc49f25b 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -170,6 +170,11 @@ export type GitOps = { /** `git rebase `. Returns `{ ok: true }` on a clean rebase (incl. FF) * and `{ ok: false, conflictingPaths }` after aborting on conflict. */ rebase(cwd: string, upstream: string): Promise; + /** `git merge -X ours ` — *new-work-as-base* on a content collision: the + * branch's version wins, so main's net-new edits land on top. On a residual + * conflict `-X ours` can't auto-resolve (rare; rename/rename, modify/delete), + * aborts with `git merge --abort` and reports the unmerged paths. */ + mergeOurs(cwd: string, ref: string): Promise; }; async function spawnGit( @@ -214,6 +219,17 @@ export const gitOps: GitOps = { await spawnGit(cwd, ["rebase", "--abort"]); return { ok: false, conflictingPaths }; }, + async mergeOurs(cwd: string, ref: string): Promise { + // `--no-edit` keeps the run non-interactive when -X ours auto-resolves and a + // merge commit is produced. `--no-ff` ensures even a fast-forward-eligible + // merge produces a commit (the reconciliation is loud on purpose so a + // reviewer can see main was folded in). + const r = await spawnGit(cwd, ["merge", "--no-edit", "--no-ff", "-X", "ours", ref]); + if (r.exitCode === 0) return { ok: true }; + const conflictingPaths = await readConflictingPaths(cwd); + await spawnGit(cwd, ["merge", "--abort"]); + return { ok: false, conflictingPaths }; + }, }; /** Default root for `git worktree` dispatch units — must match `createWorktree`. */ @@ -300,3 +316,29 @@ export async function tryRebaseOntoMain( await deps.git.fetch(resolved.worktreePath, "main"); return deps.git.rebase(resolved.worktreePath, "origin/main"); } + +/** + * Merge-commit fallback when `tryRebaseOntoMain` keeps re-conflicting on the + * same hunks (CLAUDE.md → "Keeping the branch mergeable into main" → escape + * hatch). Performs a single `git merge -X ours origin/main` resolved + * **new-work-as-base**: on content collisions the branch's version wins, so + * main's net-new edits land cleanly on top. + * + * - `{ ok: true }` — the merge landed (clean or `-X ours`-resolved). + * - `{ ok: false, conflictingPaths }` — a residual conflict `-X ours` can't + * auto-resolve (structural: rename/rename, modify/delete, dir/file). The + * merge is aborted with `git merge --abort` so the worktree is clean. + * + * Mirrors `tryRebaseOntoMain`'s skip behavior for a non-managed head ref (the + * empty-paths case the trigger sibling distinguishes). + */ +export async function tryMergeMainNewWorkAsBase( + deps: WorktreeOpsDeps, + repo: string, + prNumber: number, +): Promise { + const resolved = await resolveWorktreePath(deps, repo, prNumber); + if (!resolved) return { ok: false, conflictingPaths: [] }; + await deps.git.fetch(resolved.worktreePath, "main"); + return deps.git.mergeOurs(resolved.worktreePath, "origin/main"); +} diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts index c89aa2e2..5c5b328e 100644 --- a/packages/dispatcher/test/pr-divergence-integration.test.ts +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -2,7 +2,11 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test"; import { existsSync, mkdtempSync, realpathSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { gitOps, tryRebaseOntoMain } from "../src/reconcilers/pr-divergence.ts"; +import { + gitOps, + tryMergeMainNewWorkAsBase, + tryRebaseOntoMain, +} from "../src/reconcilers/pr-divergence.ts"; /** * Integration tests for the rebase / merge helpers exercised against real `git` @@ -212,3 +216,73 @@ describe("tryRebaseOntoMain — fixture repo", () => { expect(result).toEqual({ ok: false, conflictingPaths: [] }); }); }); + +describe("tryMergeMainNewWorkAsBase — fixture repo", () => { + test("rebase would loop but merge -X ours lands cleanly (same line, feature wins)", async () => { + // Seed shared file on main first, reset feature so both start from one commit. + await writeAndCommit(work, "shared.txt", "line\n", "main: seed shared"); + await git(work, ["push", "origin", "main"]); + await git(worktree, ["fetch", "origin", "main"]); + await git(worktree, ["reset", "--hard", "origin/main"]); + await git(worktree, ["checkout", "-B", "middle-issue-32"]); + + // Same line, both sides. Rebase would conflict; merge -X ours auto-resolves. + await writeAndCommit(worktree, "shared.txt", "feature edit\n", "feature: edit shared"); + await writeAndCommit(work, "shared.txt", "main edit\n", "main: edit shared"); + await git(work, ["push", "origin", "main"]); + + await aliasFixtureUnderRoot("o/r", 32); + + // Sanity: rebase fails first (we exercised this in the rebase suite). + const rebaseResult = await tryRebaseOntoMain(deps(), "o/r", 999); + expect(rebaseResult.ok).toBe(false); + + // The fallback lands; feature's content wins (-X ours preserves the branch). + const mergeResult = await tryMergeMainNewWorkAsBase(deps(), "o/r", 999); + expect(mergeResult).toEqual({ ok: true }); + + const contents = await Bun.file(join(worktree, "shared.txt")).text(); + expect(contents).toBe("feature edit\n"); + + // A merge commit landed (not a fast-forward) — the reconciliation is visible + // in history so a reviewer can see main was folded in. + const parents = (await git(worktree, ["rev-list", "--parents", "-n", "1", "HEAD"])).stdout + .trim() + .split(/\s+/); + expect(parents.length).toBeGreaterThanOrEqual(3); // child + ≥2 parents + }); + + test("residual conflict -X ours can't auto-resolve (rename/rename) → abort, paths reported", async () => { + // Seed a baseline file, reset feature to it. + await writeAndCommit(work, "shared.txt", "baseline\n", "main: seed shared"); + await git(work, ["push", "origin", "main"]); + await git(worktree, ["fetch", "origin", "main"]); + await git(worktree, ["reset", "--hard", "origin/main"]); + await git(worktree, ["checkout", "-B", "middle-issue-32"]); + + // Feature renames shared.txt → feature-name.txt + await git(worktree, ["mv", "shared.txt", "feature-name.txt"]); + await git(worktree, ["commit", "-m", "feature: rename"]); + const featureSha = (await git(worktree, ["rev-parse", "HEAD"])).stdout.trim(); + + // Main renames shared.txt → main-name.txt — rename/rename is a structural + // conflict that -X ours cannot resolve. + await git(work, ["mv", "shared.txt", "main-name.txt"]); + await git(work, ["commit", "-m", "main: rename"]); + await git(work, ["push", "origin", "main"]); + + await aliasFixtureUnderRoot("o/r", 32); + const result = await tryMergeMainNewWorkAsBase(deps(), "o/r", 999); + expect(result.ok).toBe(false); + if (!result.ok) { + // The unmerged paths are whatever git's rename/rename resolver records; + // assert it surfaces at least one of the two renamed targets, so a real + // bug surfaces (an unconditionally-empty paths list). + expect(result.conflictingPaths.length).toBeGreaterThan(0); + } + + // Worktree clean after abort: HEAD back at feature, no leftover MERGE_HEAD. + expect((await git(worktree, ["rev-parse", "HEAD"])).stdout.trim()).toBe(featureSha); + expect(existsSync(join(worktree, ".git", "MERGE_HEAD"))).toBe(false); + }); +}); From 1edb47399d086353fa6470ab5d5b3fc44db1d99b Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:24:48 -0400 Subject: [PATCH 13/23] =?UTF-8?q?feat(reconcilers):=20applySuccess=20?= =?UTF-8?q?=E2=80=94=20force-push=20+=20announce=20+=20state=E2=86=92CLEAN?= =?UTF-8?q?=20(#172)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `applySuccess(deps, repo, prNumber, resolution, mainCommitSha)` lands a clean rebase or merge by pushing the worktree back to its PR branch, posting one announcement comment on the PR, and recording CLEAN in pr_divergence_state. Idempotent across consecutive calls for the same reconciliation: - Push is conditional: `git fetch origin ` first, then push only if local HEAD differs from origin/; force-with-lease never plain --force. - Comment is gated by a hidden HTML marker (``) embedded in the body — listed comments are scanned for it, so a second call with the same (sha, resolution) skips the post but a future main sha re-announces. - State row is upserted (idempotent by construction). Adds three GitOps methods backing this: `revParse(cwd, ref)`, `pushForceWithLease(cwd, branch)`, plus the `--no-edit --no-ff` already on `mergeOurs`. Three integration tests exercise the real-`git` paths against the fixture: push-happens + comment-once + double-call idempotence, sha-keyed re-announcement, and the non-managed head-ref no-op. --- .../src/reconcilers/pr-divergence.ts | 104 ++++++++++++++ .../test/pr-divergence-integration.test.ts | 131 ++++++++++++++++++ 2 files changed, 235 insertions(+) diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index bc49f25b..eb984892 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -175,6 +175,14 @@ export type GitOps = { * conflict `-X ours` can't auto-resolve (rare; rename/rename, modify/delete), * aborts with `git merge --abort` and reports the unmerged paths. */ mergeOurs(cwd: string, ref: string): Promise; + /** `git rev-parse ` — the SHA the ref points at, or null if it doesn't + * resolve (e.g. no remote-tracking ref yet). */ + revParse(cwd: string, ref: string): Promise; + /** `git push --force-with-lease origin ` — agent-managed branches only. + * Throws on push failure (a real conflict caught by the lease, or a permission + * / network error); `applySuccess` propagates so the trigger sibling retries + * next pass instead of silently advancing state. */ + pushForceWithLease(cwd: string, branch: string): Promise; }; async function spawnGit( @@ -230,6 +238,20 @@ export const gitOps: GitOps = { await spawnGit(cwd, ["merge", "--abort"]); return { ok: false, conflictingPaths }; }, + async revParse(cwd: string, ref: string): Promise { + // `--verify` keeps `rev-parse` from emitting a "fail-silent" fallback string + // for an unknown ref; we want null, not "". + const r = await spawnGit(cwd, ["rev-parse", "--verify", "--quiet", ref]); + if (r.exitCode !== 0) return null; + const sha = r.stdout.trim(); + return sha === "" ? null : sha; + }, + async pushForceWithLease(cwd: string, branch: string): Promise { + const r = await spawnGit(cwd, ["push", "--force-with-lease", "origin", branch]); + if (r.exitCode !== 0) { + throw new Error(`git push --force-with-lease origin ${branch} failed: ${r.stderr.trim()}`); + } + }, }; /** Default root for `git worktree` dispatch units — must match `createWorktree`. */ @@ -342,3 +364,85 @@ export async function tryMergeMainNewWorkAsBase( await deps.git.fetch(resolved.worktreePath, "main"); return deps.git.mergeOurs(resolved.worktreePath, "origin/main"); } + +// ── Phase 4: applySuccess ─────────────────────────────────────────────────── + +/** Resolution kind passed to {@link applySuccess}; mirrored verbatim in the PR + * comment so reviewers can see which path landed the reconciliation. */ +export type ReconciliationResolution = "rebased" | "merged-new-work-as-base"; + +/** The narrow comment surface `applySuccess` needs — listing for idempotency, + * posting for the one announcement. Matches the existing + * {@link "../github.ts".GitHubGateway} method names so the daemon-side + * composition is a thin `Pick`. */ +export type PrCommentGateway = { + listIssueComments(repo: string, issueNumber: number): Promise<{ body: string }[]>; + postComment(repo: string, issueNumber: number, body: string): Promise; +}; + +/** Deps for `applySuccess` — the union of worktree resolution (head ref + + * git) and PR-comment ops, plus the SQLite handle for the state update. */ +export type ApplySuccessDeps = WorktreeOpsDeps & { + db: Database; + github: PrHeadRefGateway & PrCommentGateway; + now?: () => number; +}; + +/** Render the hidden HTML marker that makes comment-posting idempotent across + * consecutive `applySuccess` calls for the *same* reconciliation + * (resolution + main sha). A future main sha re-allows a fresh announcement. */ +function reconciledMarker(resolution: ReconciliationResolution, mainCommitSha: string): string { + return ``; +} + +/** + * Apply the success path of a reconciliation: push the rebased / merged + * worktree back to its PR branch, post a single announcement on the PR, and + * record `CLEAN` in `pr_divergence_state`. Idempotent across consecutive + * invocations for the same reconciliation: + * + * - **Push:** `git fetch origin ` first, then push only if the local + * HEAD differs from `origin/` — so a re-call when the branch is + * already at the target state is a no-op (per #172's spec). + * - **Comment:** scans existing comments for the resolution+sha marker; posts + * only if absent. The marker is hidden HTML in the body, so the visible + * text stays clean. + * - **State row:** upserted to `CLEAN` — idempotent by construction. + * + * Force-pushing uses `--force-with-lease` (never plain `--force`), so a + * concurrent push from a human collaborator surfaces as a push failure rather + * than silently overwriting their work. + * + * A non-managed head ref (the PR isn't `middle-issue-`) short-circuits as a + * no-op — `applySuccess` is never the right action for a non-managed PR. + */ +export async function applySuccess( + deps: ApplySuccessDeps, + repo: string, + prNumber: number, + resolution: ReconciliationResolution, + mainCommitSha: string, +): Promise { + const resolved = await resolveWorktreePath(deps, repo, prNumber); + if (!resolved) return; + const branch = `${HEAD_REF_PREFIX}${resolved.epicNumber}`; + + // Sync the remote-tracking ref so the local-vs-remote comparison reflects + // the live origin state, not whatever was last cached. + await deps.git.fetch(resolved.worktreePath, branch); + const localSha = await deps.git.revParse(resolved.worktreePath, "HEAD"); + const remoteSha = await deps.git.revParse(resolved.worktreePath, `refs/remotes/origin/${branch}`); + if (localSha !== null && localSha !== remoteSha) { + await deps.git.pushForceWithLease(resolved.worktreePath, branch); + } + + const marker = reconciledMarker(resolution, mainCommitSha); + const existing = await deps.github.listIssueComments(repo, prNumber); + const alreadyPosted = existing.some((c) => (c.body ?? "").includes(marker)); + if (!alreadyPosted) { + const body = `🔁 Reconciled with main (${resolution}) after ${mainCommitSha.slice(0, 9)}\n\n${marker}`; + await deps.github.postComment(repo, prNumber, body); + } + + recordDivergenceState(deps.db, repo, prNumber, "CLEAN", (deps.now ?? Date.now)()); +} diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts index 5c5b328e..988ed1aa 100644 --- a/packages/dispatcher/test/pr-divergence-integration.test.ts +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -2,7 +2,11 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test"; import { existsSync, mkdtempSync, realpathSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; +import { type Database } from "bun:sqlite"; +import { openAndMigrate } from "../src/db.ts"; import { + applySuccess, + getDivergenceState, gitOps, tryMergeMainNewWorkAsBase, tryRebaseOntoMain, @@ -46,6 +50,7 @@ let scratch: string; let remote: string; // bare repo let work: string; // main-side working checkout (pushes to `remote`) let worktree: string; // feature-side working checkout (the rebase target) +let db: Database; // for applySuccess persistence checks async function writeAndCommit( cwd: string, @@ -81,9 +86,15 @@ beforeEach(async () => { // seeded remote; the branch is the managed `middle-issue-` convention. await git(scratch, ["clone", "-b", "main", remote, "worktree"]); await git(worktree, ["checkout", "-b", "middle-issue-32"]); + // Push the feature branch so `git fetch origin middle-issue-32` succeeds in + // applySuccess — the remote-tracking ref needs to exist before we compare. + await git(worktree, ["push", "-u", "origin", "middle-issue-32"]); + + db = openAndMigrate(join(scratch, "db.sqlite3")); }); afterEach(() => { + db.close(); rmSync(scratch, { recursive: true, force: true }); }); @@ -252,6 +263,9 @@ describe("tryMergeMainNewWorkAsBase — fixture repo", () => { expect(parents.length).toBeGreaterThanOrEqual(3); // child + ≥2 parents }); + // applySuccess integration tests live alongside the git helpers so they + // share the fixture; declared here so the shared describe block stays one + // unit. (See the `applySuccess` describe block below.) test("residual conflict -X ours can't auto-resolve (rename/rename) → abort, paths reported", async () => { // Seed a baseline file, reset feature to it. await writeAndCommit(work, "shared.txt", "baseline\n", "main: seed shared"); @@ -286,3 +300,120 @@ describe("tryMergeMainNewWorkAsBase — fixture repo", () => { expect(existsSync(join(worktree, ".git", "MERGE_HEAD"))).toBe(false); }); }); + +describe("applySuccess — fixture repo", () => { + /** + * Spy on PR-comment listing + posting. Mirrors the existing + * `GitHubGateway` subset {@link applySuccess} consumes. + */ + function makeCommentSpy(): { + listIssueComments: (repo: string, prNumber: number) => Promise<{ body: string }[]>; + postComment: (repo: string, prNumber: number, body: string) => Promise; + posted: string[]; + } { + const posted: string[] = []; + return { + posted, + async listIssueComments() { + return posted.map((body) => ({ body })); + }, + async postComment(_repo, _prNumber, body) { + posted.push(body); + }, + }; + } + + test("pushes the rebased branch, posts one PR comment, and records CLEAN — twice = idempotent", async () => { + // Make the feature branch ahead of origin/middle-issue-32: do a rebase + // first (simulating Phase 2 having run) so the local branch differs from + // its already-pushed remote. + await writeAndCommit(work, "main-file.txt", "main only\n", "main: add file"); + await git(work, ["push", "origin", "main"]); + await aliasFixtureUnderRoot("o/r", 32); + + const rebaseResult = await tryRebaseOntoMain(deps(), "o/r", 999); + expect(rebaseResult).toEqual({ ok: true }); + + const comments = makeCommentSpy(); + const baseDeps = deps(); + const successDeps = { + ...baseDeps, + db, + github: { ...baseDeps.github, ...comments }, + now: () => 1_700_000_000_000, + }; + + // First invocation: pushes (local was ahead) and posts the announcement. + await applySuccess(successDeps, "o/r", 999, "rebased", "abcdef1234567890"); + + expect(comments.posted.length).toBe(1); + expect(comments.posted[0]).toContain("🔁 Reconciled with main (rebased) after abcdef123"); + expect(comments.posted[0]).toContain(""); + expect(getDivergenceState(db, "o/r", 999)).toEqual({ + state: "CLEAN", + classifiedAt: 1_700_000_000_000, + }); + + // The branch landed on origin (force-with-lease push succeeded — the bare + // remote now has the rebased HEAD). + const localSha = (await git(worktree, ["rev-parse", "HEAD"])).stdout.trim(); + const remoteSha = (await git(remote, ["rev-parse", "middle-issue-32"])).stdout.trim(); + expect(remoteSha).toBe(localSha); + + // Second invocation for the same reconciliation: no double-comment, no + // error. The state row's classified_at re-stamps (upsert) so the row stays + // fresh; that's expected, not a duplication issue. + await applySuccess( + { ...successDeps, now: () => 1_700_000_001_000 }, + "o/r", + 999, + "rebased", + "abcdef1234567890", + ); + expect(comments.posted.length).toBe(1); + expect(getDivergenceState(db, "o/r", 999)?.classifiedAt).toBe(1_700_000_001_000); + }); + + test("a different mainCommitSha allows a fresh announcement (the marker is sha-keyed)", async () => { + // Feature is already in sync; no push needed. But a *new* reconciliation + // against a different main sha is a different event and should re-announce. + await aliasFixtureUnderRoot("o/r", 32); + const comments = makeCommentSpy(); + const baseDeps = deps(); + const successDeps = { + ...baseDeps, + db, + github: { ...baseDeps.github, ...comments }, + now: () => 100, + }; + + await applySuccess(successDeps, "o/r", 999, "merged-new-work-as-base", "aaaaaaaaa11111"); + await applySuccess(successDeps, "o/r", 999, "merged-new-work-as-base", "aaaaaaaaa11111"); + expect(comments.posted.length).toBe(1); // same sha → skipped + + await applySuccess(successDeps, "o/r", 999, "merged-new-work-as-base", "bbbbbbbbb22222"); + expect(comments.posted.length).toBe(2); // new sha → re-announced + expect(comments.posted[1]).toContain("merged-new-work-as-base"); + expect(comments.posted[1]).toContain("bbbbbbbbb"); + }); + + test("a non-managed head ref is a no-op (no push, no comment, no row)", async () => { + const comments = makeCommentSpy(); + const baseDeps = deps(); + const successDeps = { + ...baseDeps, + db, + github: { + ...baseDeps.github, + ...comments, + async getPrHeadRef() { + return "feature/random"; + }, + }, + now: () => 100, + }; + await applySuccess(successDeps, "o/r", 999, "rebased", "deadbeef00000"); + expect(comments.posted).toEqual([]); + expect(getDivergenceState(db, "o/r", 999)).toBe(null); + }); +}); From db2fc051125a597c73415a3c8d1488837ded16db Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:29:08 -0400 Subject: [PATCH 14/23] =?UTF-8?q?feat(reconcilers):=20applyDemoteToWork=20?= =?UTF-8?q?=E2=80=94=20flip=20draft=20+=20reopen=20sub-issue=20+=20dual-su?= =?UTF-8?q?rface=20escalate=20+=20re-enqueue=20(#173)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `applyDemoteToWork(deps, repo, prNumber, conflictingPaths)` escalates when both autonomous attempts (rebase + -X ours merge) have failed: 1. Resolves the Epic via the PR's `middle-issue-` head ref. 2. Skips entirely if the PR is already a draft — the demote has already landed. This gate survives a classifier overwrite of pr_divergence_state, since the live PR state is the source of truth for the action. 3. Converts the PR to draft. 4. Reopens the most-recently-closed sub-issue with an in-line escalation comment (or skips when no sub-issue is closed yet). 5. Posts a dual-surface escalation on both the Epic and the PR (CLAUDE.md review-feedback convention), embedding a hidden HTML marker keyed on the Epic number so a partial-retry doesn't pile on duplicates. 6. Re-enqueues the Epic through the injected `enqueueEpic` callback so the recommender's ranking still applies (Phase 6 wires it to a recommender run + scheduleAutoDispatch). 7. Records DEMOTED in pr_divergence_state. Six unit tests cover: full demote happy path (all calls in order), idempotent re-call via PR.isDraft, partial-retry marker safety, Epic with no closed sub-issues (graceful), non-managed head ref (no-op), missing PR (no-op). --- .../src/reconcilers/pr-divergence.ts | 140 +++++++++++ .../dispatcher/test/pr-divergence.test.ts | 219 ++++++++++++++++++ 2 files changed, 359 insertions(+) diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index eb984892..7aa05993 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -446,3 +446,143 @@ export async function applySuccess( recordDivergenceState(deps.db, repo, prNumber, "CLEAN", (deps.now ?? Date.now)()); } + +// ── Phase 5: applyDemoteToWork ────────────────────────────────────────────── + +/** One closed sub-issue under an Epic, as + * {@link ApplyDemoteGateway.listClosedSubIssues} returns them. `closedAt` is + * epoch ms or null if GitHub doesn't surface it (a hand-edited row). */ +export type ClosedSubIssue = { number: number; closedAt: number | null }; + +/** The narrow gateway `applyDemoteToWork` needs on top of comment posting: + * the PR's draft state, the draft-conversion mutation, the sub-issue listing, + * and the reopen call. Production wiring lives in Phase 6. */ +export type ApplyDemoteGateway = PrHeadRefGateway & + PrCommentGateway & { + /** Read the PR's draft status, or null if the PR doesn't exist. */ + getPullRequest(repo: string, prNumber: number): Promise<{ isDraft: boolean } | null>; + /** Convert a PR back to a draft. Caller already checked `isDraft = false`. */ + convertPrToDraft(repo: string, prNumber: number): Promise; + /** Closed sub-issues of an Epic, most-recently-closed first + * (caller relies on the ordering — don't return unsorted). */ + listClosedSubIssues(repo: string, epicNumber: number): Promise; + /** Reopen an issue and optionally post a comment in the same call. Idempotent + * on GitHub's side: reopening an already-open issue is a no-op. */ + reopenIssue(repo: string, issueNumber: number, options?: { comment?: string }): Promise; + }; + +/** Deps for `applyDemoteToWork` — gateway + db + the re-enqueue seam (caller + * wires it through the recommender entry point so ranking still applies). */ +export type ApplyDemoteDeps = { + db: Database; + github: ApplyDemoteGateway; + /** Re-enqueue the Epic for dispatch. Daemon-wired to a recommender run + + * `scheduleAutoDispatch(repo)` so the recommender's ranking still applies. + * The wiring is idempotent — the existing collision guard on the + * enqueue path no-ops a duplicate dispatch for the same Epic. */ + enqueueEpic: (repo: string, epicNumber: number) => Promise; + now?: () => number; +}; + +/** Hidden HTML marker that gates the dual-surface escalation comment per Epic. + * A retry after a partial failure (PR was demoted but a follow-on call lost + * the comment-post step) sees the marker and skips the duplicate. */ +function demoteMarker(epicNumber: number): string { + return ``; +} + +function renderDemoteEscalation( + epicNumber: number, + prNumber: number, + conflictingPaths: string[], +): string { + const pathsLine = + conflictingPaths.length > 0 + ? conflictingPaths.map((p) => `- \`${p}\``).join("\n") + : "_(no conflicting paths reported — the rebase and merge fallback both failed without surfacing unmerged files; investigate manually)_"; + return [ + `🛑 **Reconciliation escalation:** PR #${prNumber} for Epic #${epicNumber} could not be auto-reconciled with \`main\`.`, + ``, + `Both autonomous attempts failed:`, + `1. \`git rebase origin/main\` — conflicts`, + `2. \`git merge -X ours origin/main\` (new-work-as-base) — residual conflict`, + ``, + `Conflicting paths:`, + pathsLine, + ``, + `The PR has been flipped back to **draft** and the most-recently-closed sub-issue reopened so a fresh agent can pick up conflict resolution. The Epic has been re-enqueued through the recommender's ranking — it will be dispatched again when slots free up.`, + ``, + demoteMarker(epicNumber), + ].join("\n"); +} + +function renderSubIssueReopenComment(prNumber: number, epicNumber: number): string { + return `Reopened by the open-PR reconciler: PR #${prNumber} for Epic #${epicNumber} could not be auto-reconciled with \`main\`. See the escalation comment on the Epic and the PR for details.`; +} + +/** + * Demote a PR back to work when both autonomous reconciliation attempts (rebase + * + `-X ours` merge) have failed. Flips the PR to draft, reopens the most- + * recently-closed sub-issue of the Epic with an escalation comment, posts the + * same escalation on both the Epic and the PR (dual-surface per CLAUDE.md's + * review-feedback convention), re-enqueues the Epic through the recommender's + * entry point, and records `DEMOTED` in `pr_divergence_state`. + * + * Idempotency rests on two independent gates so a partial-retry doesn't pile + * on duplicates: + * - **PR.isDraft** — if the PR is already a draft, the demote has already + * landed; the whole function returns. This survives a classifier overwrite + * of the state row back to CONFLICTED (a re-classification under DEMOTED). + * - **comment marker** — the dual-surface comments include a hidden HTML + * marker keyed on the Epic number; on a retry that comes after the post + * succeeded but before the row was written, the listing-then-post sequence + * sees the marker and skips. (Re-`reopenIssue` and re-`enqueueEpic` are + * themselves idempotent — sub-issue reopen is a no-op on open, the enqueue + * collides against the existing-workflow guard.) + * + * Non-managed head refs (the PR isn't `middle-issue-`) short-circuit as a + * no-op — the reconciler is never the right hand for those. + */ +export async function applyDemoteToWork( + deps: ApplyDemoteDeps, + repo: string, + prNumber: number, + conflictingPaths: string[], +): Promise { + const headRef = await deps.github.getPrHeadRef(repo, prNumber); + if (!headRef) return; + const epicNumber = parseEpicFromHeadRef(headRef); + if (epicNumber === null) return; + + const pr = await deps.github.getPullRequest(repo, prNumber); + if (!pr) return; + if (pr.isDraft) return; // already demoted — fully idempotent + + await deps.github.convertPrToDraft(repo, prNumber); + + // Most-recently-closed sub-issue, if any. An Epic with no closed sub-issues + // (every phase is still open) means there's nothing to reopen — that's not + // an error; the demote still proceeds with the comments + re-enqueue. + const closedSubs = await deps.github.listClosedSubIssues(repo, epicNumber); + const targetSub = closedSubs[0]; // gateway contract: most-recently-closed first + if (targetSub) { + await deps.github.reopenIssue(repo, targetSub.number, { + comment: renderSubIssueReopenComment(prNumber, epicNumber), + }); + } + + // Dual-surface escalation. Same marker on both surfaces gates re-posts. + const escalationBody = renderDemoteEscalation(epicNumber, prNumber, conflictingPaths); + const marker = demoteMarker(epicNumber); + + for (const issueNumber of [prNumber, epicNumber]) { + const existing = await deps.github.listIssueComments(repo, issueNumber); + if (!existing.some((c) => (c.body ?? "").includes(marker))) { + await deps.github.postComment(repo, issueNumber, escalationBody); + } + } + + await deps.enqueueEpic(repo, epicNumber); + + recordDivergenceState(deps.db, repo, prNumber, "DEMOTED", (deps.now ?? Date.now)()); +} diff --git a/packages/dispatcher/test/pr-divergence.test.ts b/packages/dispatcher/test/pr-divergence.test.ts index a0afcf2d..c7c4ea25 100644 --- a/packages/dispatcher/test/pr-divergence.test.ts +++ b/packages/dispatcher/test/pr-divergence.test.ts @@ -5,8 +5,11 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { openAndMigrate } from "../src/db.ts"; import { + type ApplyDemoteGateway, + applyDemoteToWork, classifyDivergence, classifyMergeability, + type ClosedSubIssue, type DivergenceGateway, getDivergenceState, type MergeabilityView, @@ -185,3 +188,219 @@ describe("recordDivergenceState", () => { expect(getDivergenceState(db, "owner-b/r", 90)?.state).toBe("BEHIND"); }); }); + +/** + * Build an `ApplyDemoteGateway` spy that records every call (so the test can + * assert exact call counts across consecutive invocations) and threads a + * settable `isDraft` for the PR — flipping it on the first call models how + * GitHub responds on the second. + */ +type DemoteSpy = { + gateway: ApplyDemoteGateway; + state: { + isDraft: boolean; + headRef: string | null; + /** Comments per issue number, keyed by issue number. */ + comments: Map; + closedSubs: ClosedSubIssue[]; + }; + calls: { + convertPrToDraft: Array<[string, number]>; + reopenIssue: Array<{ repo: string; issueNumber: number; comment: string | undefined }>; + listClosedSubIssues: Array<[string, number]>; + postComment: Array<{ repo: string; issueNumber: number; body: string }>; + }; +}; + +function makeDemoteSpy(over: Partial = {}): DemoteSpy { + const state: DemoteSpy["state"] = { + isDraft: false, + headRef: "middle-issue-32", + comments: new Map(), + closedSubs: [{ number: 50, closedAt: 1_700_000_000_000 }], + ...over, + }; + const calls: DemoteSpy["calls"] = { + convertPrToDraft: [], + reopenIssue: [], + listClosedSubIssues: [], + postComment: [], + }; + const gateway: ApplyDemoteGateway = { + async getPrHeadRef() { + return state.headRef; + }, + async getPullRequest() { + return { isDraft: state.isDraft }; + }, + async convertPrToDraft(repo, prNumber) { + calls.convertPrToDraft.push([repo, prNumber]); + state.isDraft = true; // model the live PR flipping + }, + async listClosedSubIssues(repo, epicNumber) { + calls.listClosedSubIssues.push([repo, epicNumber]); + return state.closedSubs; + }, + async reopenIssue(repo, issueNumber, options) { + calls.reopenIssue.push({ repo, issueNumber, comment: options?.comment }); + }, + async listIssueComments(_repo, issueNumber) { + return (state.comments.get(issueNumber) ?? []).map((body) => ({ body })); + }, + async postComment(repo, issueNumber, body) { + calls.postComment.push({ repo, issueNumber, body }); + const bucket = state.comments.get(issueNumber) ?? []; + bucket.push(body); + state.comments.set(issueNumber, bucket); + }, + }; + return { gateway, state, calls }; +} + +describe("applyDemoteToWork", () => { + test("flips PR draft, reopens sub-issue, posts dual-surface comment, re-enqueues, state→DEMOTED", async () => { + const spy = makeDemoteSpy(); + const enqueues: Array<[string, number]> = []; + await applyDemoteToWork( + { + db, + github: spy.gateway, + enqueueEpic: async (r, e) => { + enqueues.push([r, e]); + }, + now: () => 1_700_000_100_000, + }, + REPO, + 99, + ["packages/dispatcher/src/main.ts", "docs/README.md"], + ); + + expect(spy.calls.convertPrToDraft).toEqual([[REPO, 99]]); + expect(spy.calls.reopenIssue.length).toBe(1); + expect(spy.calls.reopenIssue[0]?.issueNumber).toBe(50); + expect(spy.calls.reopenIssue[0]?.comment).toContain("PR #99 for Epic #32"); + expect(spy.calls.postComment.length).toBe(2); + // Dual surface: one on the PR (99) and one on the Epic (32 — derived from head ref). + expect(new Set(spy.calls.postComment.map((c) => c.issueNumber))).toEqual(new Set([99, 32])); + // Conflicting paths are surfaced in the escalation body. + for (const post of spy.calls.postComment) { + expect(post.body).toContain("packages/dispatcher/src/main.ts"); + expect(post.body).toContain("docs/README.md"); + expect(post.body).toContain(""); + } + expect(enqueues).toEqual([[REPO, 32]]); + expect(getDivergenceState(db, REPO, 99)).toEqual({ + state: "DEMOTED", + classifiedAt: 1_700_000_100_000, + }); + }); + + test("idempotent: a second call sees PR.isDraft and skips everything", async () => { + const spy = makeDemoteSpy(); + const enqueues: Array<[string, number]> = []; + const deps = { + db, + github: spy.gateway, + enqueueEpic: async (r: string, e: number) => { + enqueues.push([r, e]); + }, + now: () => 100, + }; + + await applyDemoteToWork(deps, REPO, 99, ["a.txt"]); + await applyDemoteToWork(deps, REPO, 99, ["a.txt"]); // second call + + expect(spy.calls.convertPrToDraft.length).toBe(1); + expect(spy.calls.reopenIssue.length).toBe(1); + expect(spy.calls.postComment.length).toBe(2); + expect(enqueues.length).toBe(1); + }); + + test("partial-retry safety: existing marker on PR skips the duplicate PR comment, still posts on Epic", async () => { + // Simulate a crash after PR draft + PR comment but before Epic comment. On + // retry, the PR's marker is found and skipped; the Epic still needs the post. + // To trigger that path we keep PR.isDraft=false (so the function proceeds) + // and pre-seed the PR's comments with the marker. + const spy = makeDemoteSpy({ + comments: new Map([[99, ["…earlier escalation… "]]]), + }); + await applyDemoteToWork( + { + db, + github: spy.gateway, + enqueueEpic: async () => {}, + now: () => 100, + }, + REPO, + 99, + ["x"], + ); + // Only the Epic comment posts — the PR's existing marker gates the duplicate. + expect(spy.calls.postComment.length).toBe(1); + expect(spy.calls.postComment[0]?.issueNumber).toBe(32); + }); + + test("Epic with no closed sub-issues: still demotes + comments + enqueues; no reopen call", async () => { + const spy = makeDemoteSpy({ closedSubs: [] }); + const enqueues: Array<[string, number]> = []; + await applyDemoteToWork( + { + db, + github: spy.gateway, + enqueueEpic: async (r, e) => { + enqueues.push([r, e]); + }, + now: () => 100, + }, + REPO, + 99, + ["x"], + ); + expect(spy.calls.reopenIssue.length).toBe(0); + expect(spy.calls.convertPrToDraft.length).toBe(1); + expect(spy.calls.postComment.length).toBe(2); + expect(enqueues.length).toBe(1); + }); + + test("non-managed head ref → no-op (no draft, no comments, no enqueue, no row)", async () => { + const spy = makeDemoteSpy({ headRef: "feature/random" }); + const enqueues: Array<[string, number]> = []; + await applyDemoteToWork( + { + db, + github: spy.gateway, + enqueueEpic: async (r, e) => { + enqueues.push([r, e]); + }, + }, + REPO, + 99, + ["x"], + ); + expect(spy.calls.convertPrToDraft).toEqual([]); + expect(spy.calls.postComment).toEqual([]); + expect(enqueues).toEqual([]); + expect(getDivergenceState(db, REPO, 99)).toBe(null); + }); + + test("PR doesn't exist (gateway returns null) → no-op", async () => { + const spy = makeDemoteSpy(); + // Override getPullRequest to return null. + const gateway = { + ...spy.gateway, + getPullRequest: async () => null, + }; + await applyDemoteToWork( + { + db, + github: gateway, + enqueueEpic: async () => {}, + }, + REPO, + 99, + ["x"], + ); + expect(spy.calls.convertPrToDraft).toEqual([]); + expect(spy.calls.postComment).toEqual([]); + }); +}); From 143f5ea4cc1f0c09e181e891e8ca886d1efe9a72 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:36:48 -0400 Subject: [PATCH 15/23] feat(reconcilers): reconcileOpenPRs orchestrator + poller-tick wiring (#174) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `reconcileOpenPRs(deps, repo)` composes Phases 1-5 into one pass: listOpenManagedPrs → for each: classifyDivergence → CLEAN/UNKNOWN: pass → BEHIND/CONFLICTED: tryRebaseOntoMain → ok: applySuccess('rebased') → conflict: tryMergeMainNewWorkAsBase → ok: applySuccess('merged-new-work-as-base') → conflict: applyDemoteToWork(union of paths) - Rate-limit aware: skips the pass when GitHub budget < `rateLimitBuffer` (default 100, matches the resume poller). Returns `skippedForBudget: true`. - Burst-capped at `maxPrsPerPass` (default 25). Per-PR failures are isolated and logged; the pass continues. - Production `ghReconcilerGateway` exports the new gh-backed calls (listOpenManagedPrs, getMainCommitSha, getMergeability, getPrHeadRef, getPullRequest, convertPrToDraft, listClosedSubIssues, reopenIssue). Composed with `ghGitHub` for comment ops at the daemon-wiring site. Wiring: - `startPoller` gains a `ReconcilerHooks` arg with `perTickSweep` (per-tick sweep over every managed repo) and `onMergedTransition` (immediate sweep fired from inside `reconcileMergedParks` when a parked Epic's PR is observed transitioning to MERGED — divergence on siblings heals at the moment of merge, not up to a tick later). - `main.ts` builds `reconcileOpenPRsForRepo` (composing `ghGitHub` + `ghReconcilerGateway`, threading `runRecommenderForRepo` + `scheduleAutoDispatch` as the `enqueueEpic` seam so re-dispatch still routes through the recommender's ranking). Six added integration tests cover the end-to-end paths against the real fixture repo: BEHIND→rebase→applySuccess (+ idempotent re-tick reading CLEAN), CONFLICTED→merge-fallback, double-failure→demote, rate-limit floor short- circuit, CLEAN walk (no side effects), and listing-throws isolation. --- packages/dispatcher/src/main.ts | 79 ++++- packages/dispatcher/src/poller-cron.ts | 35 +- packages/dispatcher/src/poller.ts | 21 ++ .../src/reconcilers/pr-divergence.ts | 301 ++++++++++++++++++ .../test/pr-divergence-integration.test.ts | 295 +++++++++++++++++ 5 files changed, 715 insertions(+), 16 deletions(-) diff --git a/packages/dispatcher/src/main.ts b/packages/dispatcher/src/main.ts index fd751f70..4ea2f212 100644 --- a/packages/dispatcher/src/main.ts +++ b/packages/dispatcher/src/main.ts @@ -28,6 +28,7 @@ import { addRateLimitObserver, clearRateLimitObservers, getRateLimitState } from import { ghSurfaceProblem, resolveRecommenderOptions } from "./recommender-run.ts"; import { ghPollGateway } from "./poller-gateway.ts"; import { startPoller } from "./poller-cron.ts"; +import { ghReconcilerGateway, gitOps, reconcileOpenPRs } from "./reconcilers/pr-divergence.ts"; import { runRecommenderCronPass, startRecommenderCron } from "./recommender-cron.ts"; import { isPaused, listManagedRepos, registerManagedRepo } from "./repo-config.ts"; import { getSlotState, hasFreeSlot } from "./slots.ts"; @@ -591,19 +592,77 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { getAdapter, }); + // Open-PR divergence reconciler (Epic #168). Sweep every known managed repo + // for open PRs that drifted from main; rebase first, -X ours merge fallback + // second, demote-to-work as the escalation. Composed across the existing + // `ghGitHub` (comment ops) and the reconciler-specific `ghReconcilerGateway` + // (head ref, mergeability, draft conversion, sub-issue listing, reopen). + // Re-enqueue routes through the recommender so ranking still applies. + const reconcilerGithub = { ...ghGitHub, ...ghReconcilerGateway }; + async function reconcileOpenPRsForRepo(repo: string): Promise { + const repoPath = repoPaths.get(repo); + if (repoPath === undefined) return; + try { + const result = await reconcileOpenPRs( + { + db, + github: reconcilerGithub, + git: gitOps, + resolveRepoPath: () => repoPath, + worktreeRoot: config.global.worktreeRoot, + enqueueEpic: async (r, epicNumber) => { + const result = await runRecommenderForRepo(repoPaths.get(r) ?? repoPath); + if (result.status >= 400) { + console.error( + `[pr-divergence] re-enqueue recommender for ${r} #${epicNumber} failed (${result.status}): ${result.body}`, + ); + } + scheduleAutoDispatch(r); + }, + getRateLimit: () => ghPollGateway.getRateLimit(), + }, + repo, + ); + if (result.reconciled > 0 || result.skippedForBudget) { + console.log( + `[pr-divergence] ${repo}: reconciled=${result.reconciled} passed=${result.passed} skippedForBudget=${result.skippedForBudget}`, + ); + } + } catch (error) { + console.error(`[pr-divergence] ${repo} failed: ${(error as Error).message}`); + } + } + // GitHub poller: every 60s, for each parked workflow with an armed wait, fire // its resume signal when the unblocking event appears (a human reply, or a PR // review verdict). `fireSignal` delivers it to this engine — the one that now - // hosts the parked executions, so review-resume actually fires. - const stopPoller = await startPoller({ - db, - github: ghPollGateway, - fireSignal: (workflowId, payload) => engine.signal(workflowId, RESUME_EVENT, payload), - // Reconcile pass: when a parked Epic's PR has merged/closed, finalize the row - // and best-effort tear down its worktree (repo checkout from the registry). - removeWorktree: (repo, worktreePath) => - worktreePath ? pruneWorktreeAt(repoPaths.get(repo) ?? null, worktreePath) : Promise.resolve(), - }); + // hosts the parked executions, so review-resume actually fires. The poller + // tick also runs the open-PR divergence reconciler (Epic #168) once per + // managed repo; a MERGED-transition observed inside `reconcileMergedParks` + // additionally triggers an immediate sweep so siblings aren't left stale for + // up to a full interval. + const stopPoller = await startPoller( + { + db, + github: ghPollGateway, + fireSignal: (workflowId, payload) => engine.signal(workflowId, RESUME_EVENT, payload), + // Reconcile pass: when a parked Epic's PR has merged/closed, finalize the row + // and best-effort tear down its worktree (repo checkout from the registry). + removeWorktree: (repo, worktreePath) => + worktreePath + ? pruneWorktreeAt(repoPaths.get(repo) ?? null, worktreePath) + : Promise.resolve(), + }, + undefined, + { + perTickSweep: async () => { + for (const repo of repoPaths.keys()) { + await reconcileOpenPRsForRepo(repo); + } + }, + onMergedTransition: (repo) => reconcileOpenPRsForRepo(repo), + }, + ); // Recommender cron (#135): every minute, run the recommender for each managed // repo whose `[recommender] interval_minutes` has elapsed — the periodic source diff --git a/packages/dispatcher/src/poller-cron.ts b/packages/dispatcher/src/poller-cron.ts index 5d3b0d5e..f8aadba0 100644 --- a/packages/dispatcher/src/poller-cron.ts +++ b/packages/dispatcher/src/poller-cron.ts @@ -11,20 +11,36 @@ import { type PollerDeps, reconcileMergedParks, runPoller } from "./poller.ts"; */ export const POLLER_INTERVAL_MS = 120_000; +/** + * Extra reconciliation work the daemon hangs off each poller tick (Epic #168). + * `perTickSweep` runs after the resume poll + merged-parks reconciliation, once + * per tick. `onMergedTransition` is invoked from `reconcileMergedParks` whenever + * a parked Epic's PR is observed transitioning to MERGED — the daemon wires it + * to an *immediate* `reconcileOpenPRs` sweep so divergence on the sibling Epic + * PRs is healed at the moment of merge, not up to a tick later. + */ +export type ReconcilerHooks = { + perTickSweep?: () => Promise; + onMergedTransition?: (repo: string) => Promise; +}; + /** * Stand up the GitHub poller as a bunqueue cron: every `intervalMs` (default * {@link POLLER_INTERVAL_MS}) it runs one {@link runPoller} pass over parked * workflows with an armed wait, firing the resume signal when the unblocking * event appears, then runs one {@link reconcileMergedParks} pass to finalize - * parked workflows whose Epic PR has landed/closed. Returns a stop function that - * tears the cron down. Each pass is resilient on its own (per-workflow failures - * are isolated); this wrapper guards each so a thrown pass never crashes the - * cron worker — and isolates them from each other so a failed resume poll still - * lets reconciliation run, and vice versa. + * parked workflows whose Epic PR has landed/closed, then an optional + * `reconcilers.perTickSweep` for the open-PR divergence reconciler (Epic #168). + * + * Returns a stop function that tears the cron down. Each pass is resilient on + * its own (per-workflow failures are isolated); this wrapper guards each so a + * thrown pass never crashes the cron worker — and isolates them from each other + * so a failed resume poll still lets reconciliation run, and vice versa. */ export async function startPoller( deps: PollerDeps, intervalMs: number = POLLER_INTERVAL_MS, + reconcilers: ReconcilerHooks = {}, ): Promise<() => Promise> { const queue = new Bunqueue("middle-poller", { embedded: true, @@ -35,10 +51,17 @@ export async function startPoller( console.error(`[poller] pass failed: ${(error as Error).message}`); } try { - await reconcileMergedParks(deps); + await reconcileMergedParks({ ...deps, onMergedTransition: reconcilers.onMergedTransition }); } catch (error) { console.error(`[reconcile] pass failed: ${(error as Error).message}`); } + if (reconcilers.perTickSweep) { + try { + await reconcilers.perTickSweep(); + } catch (error) { + console.error(`[pr-divergence] tick sweep failed: ${(error as Error).message}`); + } + } }, }); await queue.every("poller-tick", intervalMs); diff --git a/packages/dispatcher/src/poller.ts b/packages/dispatcher/src/poller.ts index c5d710ab..bedd9e5f 100644 --- a/packages/dispatcher/src/poller.ts +++ b/packages/dispatcher/src/poller.ts @@ -352,6 +352,15 @@ export type ReconcileDeps = { rateLimitBuffer?: number; /** Cap on rows reconciled per pass. Defaults to {@link DEFAULT_MAX_POLLS_PER_PASS}. */ maxPollsPerPass?: number; + /** + * Fired once per repo whose parked workflow was finalized because its PR + * transitioned to MERGED. The daemon wires this to an immediate + * `reconcileOpenPRs` sweep (Epic #168), so divergence on sibling Epic PRs is + * healed at the moment of merge rather than up to a tick later. Per-repo + * de-duplication is the caller's job — multiple MERGED transitions on the + * same repo this pass fire `onMergedTransition` once per row. + */ + onMergedTransition?: (repo: string) => Promise; }; export async function reconcileMergedParks(deps: ReconcileDeps): Promise { @@ -394,6 +403,18 @@ export async function reconcileMergedParks(deps: ReconcileDeps): Promise ); } } + // Epic #168 hook: a MERGED transition is the moment divergence may have + // emerged on sibling Epic PRs. Best-effort and isolated — a throw here + // never blocks the rest of the parks pass. + if (life.state === "MERGED" && deps.onMergedTransition) { + try { + await deps.onMergedTransition(wf.repo); + } catch (error) { + console.error( + `[reconcile] onMergedTransition for ${wf.repo} failed (continuing): ${(error as Error).message}`, + ); + } + } } catch (error) { console.error( `[reconcile] failed for workflow ${wf.id} (${wf.repo}#${wf.epicNumber}): ${(error as Error).message}`, diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index 7aa05993..44919c3b 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -586,3 +586,304 @@ export async function applyDemoteToWork( recordDivergenceState(deps.db, repo, prNumber, "DEMOTED", (deps.now ?? Date.now)()); } + +// ── Phase 6: reconcileOpenPRs + production gateway ───────────────────────── + +/** Header info for a managed open PR — drives the per-PR reconciliation chain. */ +export type OpenManagedPr = { + prNumber: number; + /** Always starts with `middle-issue-`; the reconciler's listing filter enforces it. */ + headRefName: string; +}; + +/** The narrow GitHub surface the orchestrator needs on top of the per-phase + * gateways: listing the managed PRs to walk + reading the current main HEAD + * SHA (used by `applySuccess` so the comment names the main the reconciliation + * caught up to). */ +export type OrchestratorGateway = { + /** Open PRs whose head ref is a managed `middle-issue-` branch. + * Implementations list and filter — the gh PR search syntax can't strictly + * pin a prefix. Capped at 100 by `gh pr list --limit 100`; a backlog past + * that is the rate-limit floor's problem, not the orchestrator's. */ + listOpenManagedPrs(repo: string): Promise; + /** The current SHA of `main` on origin — embedded in `applySuccess`'s + * comment marker so a future reconciliation against a newer main re-announces. */ + getMainCommitSha(repo: string): Promise; +}; + +/** Composite gateway the orchestrator needs end-to-end. */ +export type ReconcilerGateway = OrchestratorGateway & DivergenceGateway & ApplyDemoteGateway; + +/** GitHub's REST budget status — mirrors {@link "../poller.ts".RateLimitStatus}. + * Re-stated here so the reconciler doesn't import from `poller.ts` for one type. */ +export type RestBudget = { remaining: number; resetAt: number }; + +/** Skip the pass when GitHub's remaining REST budget is below this. Matches + * the resume poller's default ({@link "../poller.ts".DEFAULT_RATE_LIMIT_BUFFER}) + * so the two reconcilers exercise the same restraint. */ +export const DEFAULT_RECONCILER_BUDGET_FLOOR = 100; + +/** Cap on PRs reconciled in one pass — bounds the burst when many PRs accumulate. */ +export const DEFAULT_MAX_PRS_PER_PASS = 25; + +/** Deps the orchestrator pulls together — the per-phase deps + gateway + + * rate-limit read + the recommender enqueue seam. Daemon-wired in Phase 6. */ +export type ReconcileOpenPRsDeps = WorktreeOpsDeps & { + db: Database; + github: ReconcilerGateway; + /** Re-enqueue the Epic for dispatch (drives `applyDemoteToWork`). */ + enqueueEpic: (repo: string, epicNumber: number) => Promise; + /** GitHub REST budget — checking it costs no budget. */ + getRateLimit: () => Promise; + /** Skip the pass when `getRateLimit().remaining < rateLimitBuffer`. */ + rateLimitBuffer?: number; + /** Cap on PRs reconciled per pass. */ + maxPrsPerPass?: number; + now?: () => number; +}; + +/** Counters returned from one `reconcileOpenPRs` pass. */ +export type ReconcileOpenPRsResult = { + /** PRs that the chain advanced (rebased / merged / demoted), summed. */ + reconciled: number; + /** PRs walked but left as-is (CLEAN or UNKNOWN — nothing to do this pass). */ + passed: number; + /** True when the whole pass was short-circuited by the rate-limit floor. */ + skippedForBudget: boolean; +}; + +/** + * Walk one repo's open managed PRs and apply the reconciliation chain: + * + * classifyDivergence → if BEHIND/CONFLICTED → + * tryRebaseOntoMain → ok → applySuccess('rebased') + * → conflict → + * tryMergeMainNewWorkAsBase → ok → applySuccess('merged-new-work-as-base') + * → conflict → applyDemoteToWork(union of paths) + * + * Per-PR failures are isolated and logged; the pass continues. Skipped wholesale + * when GitHub's REST budget is below `rateLimitBuffer` — the (free) `rate_limit` + * read is the only call that costs nothing. Per-pass burst is capped at + * `maxPrsPerPass`; the remainder is picked up next tick. + * + * Returns counters for logging/tests. Does not throw — the cron wrapper that + * calls this never has to guard. + */ +export async function reconcileOpenPRs( + deps: ReconcileOpenPRsDeps, + repo: string, +): Promise { + const buffer = deps.rateLimitBuffer ?? DEFAULT_RECONCILER_BUDGET_FLOOR; + const maxPerPass = deps.maxPrsPerPass ?? DEFAULT_MAX_PRS_PER_PASS; + + const budget = await deps.getRateLimit(); + if (budget.remaining < buffer) { + console.error( + `[pr-divergence] GitHub budget low (${budget.remaining} < ${buffer}); skipping pass — resets ${new Date(budget.resetAt).toISOString()}`, + ); + return { reconciled: 0, passed: 0, skippedForBudget: true }; + } + + let prs: OpenManagedPr[]; + try { + prs = await deps.github.listOpenManagedPrs(repo); + } catch (error) { + console.error( + `[pr-divergence] list open managed PRs for ${repo} failed: ${(error as Error).message}`, + ); + return { reconciled: 0, passed: 0, skippedForBudget: false }; + } + if (prs.length === 0) { + return { reconciled: 0, passed: 0, skippedForBudget: false }; + } + + // Fetched once per pass so applySuccess's comment marker is consistent across + // every PR the pass advances. Failures (transient GitHub) leave the marker + // unset — applySuccess simply skips its comment step in that case (the marker + // would be ambiguous), but the rebase/merge state already landed. + let mainSha: string | null = null; + try { + mainSha = await deps.github.getMainCommitSha(repo); + } catch (error) { + console.error(`[pr-divergence] read main SHA for ${repo} failed: ${(error as Error).message}`); + } + + let reconciled = 0; + let passed = 0; + + for (const pr of prs.slice(0, maxPerPass)) { + try { + const divergence = await classifyDivergence(deps, repo, pr.prNumber); + if (divergence === "CLEAN" || divergence === "UNKNOWN") { + passed++; + continue; + } + + // BEHIND or CONFLICTED — try rebase first. + const rebaseResult = await tryRebaseOntoMain(deps, repo, pr.prNumber); + if (rebaseResult.ok) { + if (mainSha) await applySuccess(deps, repo, pr.prNumber, "rebased", mainSha); + reconciled++; + continue; + } + + // Rebase loop — try -X ours merge fallback (new-work-as-base). + const mergeResult = await tryMergeMainNewWorkAsBase(deps, repo, pr.prNumber); + if (mergeResult.ok) { + if (mainSha) { + await applySuccess(deps, repo, pr.prNumber, "merged-new-work-as-base", mainSha); + } + reconciled++; + continue; + } + + // Both failed — demote. Union the conflict paths so the escalation surfaces + // every file either attempt tripped on. + const conflictingPaths = Array.from( + new Set([...rebaseResult.conflictingPaths, ...mergeResult.conflictingPaths]), + ); + await applyDemoteToWork( + { + db: deps.db, + github: deps.github, + enqueueEpic: deps.enqueueEpic, + now: deps.now, + }, + repo, + pr.prNumber, + conflictingPaths, + ); + reconciled++; + } catch (error) { + console.error( + `[pr-divergence] ${repo} PR #${pr.prNumber} reconciliation failed: ${(error as Error).message}`, + ); + } + } + + return { reconciled, passed, skippedForBudget: false }; +} + +// ── Production `gh`-backed gateway (for the daemon) ──────────────────────── + +async function ghSpawn( + argv: string[], +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + const proc = Bun.spawn(["gh", ...argv], { + stdout: "pipe", + stderr: "pipe", + stdin: "ignore", + }); + const [stdout, stderr] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + ]); + return { stdout, stderr, exitCode: await proc.exited }; +} + +async function ghJson(argv: string[]): Promise { + const r = await ghSpawn(argv); + if (r.exitCode !== 0) { + throw new Error(`gh ${argv.join(" ")} failed: ${r.stderr.trim()}`); + } + return JSON.parse(r.stdout) as T; +} + +/** + * Production gateway backing {@link reconcileOpenPRs}. Composes with + * `ghGitHub`'s comment ops at the daemon-wiring site; methods that overlap + * (`listIssueComments`, `postComment`) are intentionally absent here so the + * daemon's spread keeps `ghGitHub` as the canonical comment poster. + */ +export const ghReconcilerGateway: Omit = { + async listOpenManagedPrs(repo) { + const rows = await ghJson>([ + "pr", + "list", + "--repo", + repo, + "--state", + "open", + "--json", + "number,headRefName", + "--limit", + "100", + ]); + return rows + .filter((p) => p.headRefName.startsWith(HEAD_REF_PREFIX)) + .map((p) => ({ prNumber: p.number, headRefName: p.headRefName })); + }, + async getMainCommitSha(repo) { + const r = await ghSpawn(["api", `/repos/${repo}/branches/main`, "--jq", ".commit.sha"]); + if (r.exitCode !== 0) return null; + const sha = r.stdout.trim(); + return sha === "" ? null : sha; + }, + async getMergeability(repo, prNumber) { + const r = await ghSpawn([ + "pr", + "view", + String(prNumber), + "--repo", + repo, + "--json", + "mergeable,mergeStateStatus", + ]); + if (r.exitCode !== 0) return null; + return JSON.parse(r.stdout) as MergeabilityView; + }, + async getPrHeadRef(repo, prNumber) { + const r = await ghSpawn([ + "pr", + "view", + String(prNumber), + "--repo", + repo, + "--json", + "headRefName", + "--jq", + ".headRefName", + ]); + if (r.exitCode !== 0) return null; + const ref = r.stdout.trim(); + return ref === "" ? null : ref; + }, + async getPullRequest(repo, prNumber) { + const r = await ghSpawn(["pr", "view", String(prNumber), "--repo", repo, "--json", "isDraft"]); + if (r.exitCode !== 0) return null; + return JSON.parse(r.stdout) as { isDraft: boolean }; + }, + async convertPrToDraft(repo, prNumber) { + const r = await ghSpawn(["pr", "ready", String(prNumber), "--repo", repo, "--undo"]); + if (r.exitCode !== 0) { + throw new Error(`gh pr ready --undo #${prNumber} failed: ${r.stderr.trim()}`); + } + }, + async listClosedSubIssues(repo, epicNumber) { + const rows = await ghJson>([ + "api", + `/repos/${repo}/issues/${epicNumber}/sub_issues`, + "--paginate", + "--jq", + '[.[] | select(.state == "closed") | {number, closed_at}]', + ]); + const out: ClosedSubIssue[] = rows.map((s) => ({ + number: s.number, + closedAt: s.closed_at ? Date.parse(s.closed_at) : null, + })); + // Newest-first by closed_at; nulls sink to the end so a hand-edited row + // doesn't shadow a real recent close. + out.sort((a, b) => (b.closedAt ?? 0) - (a.closedAt ?? 0)); + return out; + }, + async reopenIssue(repo, issueNumber, options) { + const argv = ["issue", "reopen", String(issueNumber), "--repo", repo]; + if (options?.comment !== undefined) { + argv.push("--comment", options.comment); + } + const r = await ghSpawn(argv); + if (r.exitCode !== 0) { + throw new Error(`gh issue reopen #${issueNumber} failed: ${r.stderr.trim()}`); + } + }, +}; diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts index 988ed1aa..cd1d0e77 100644 --- a/packages/dispatcher/test/pr-divergence-integration.test.ts +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -6,8 +6,14 @@ import { type Database } from "bun:sqlite"; import { openAndMigrate } from "../src/db.ts"; import { applySuccess, + type ClosedSubIssue, + type DivergenceState, getDivergenceState, gitOps, + type MergeabilityView, + type OpenManagedPr, + reconcileOpenPRs, + type ReconcilerGateway, tryMergeMainNewWorkAsBase, tryRebaseOntoMain, } from "../src/reconcilers/pr-divergence.ts"; @@ -417,3 +423,292 @@ describe("applySuccess — fixture repo", () => { expect(getDivergenceState(db, "o/r", 999)).toBe(null); }); }); + +describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { + /** + * Build a fully-stubbed `ReconcilerGateway` keyed by PR number for the + * orchestrator's read paths, and recording counters for its write paths. + * Comments + open PR list + closed sub-issues are driven by the per-test + * setup; the git operations (rebase / merge / push) run against the real + * fixture worktree. + */ + function makeOrchestratorGateway(opts: { + openPrs: OpenManagedPr[]; + mergeability: Record; + headRefs: Record; + closedSubIssues?: ClosedSubIssue[]; + mainSha?: string; + }) { + const calls = { + listOpenManagedPrs: 0, + getMainCommitSha: 0, + getMergeability: 0 as number, + getPrHeadRef: 0 as number, + postComment: [] as { issueNumber: number; body: string }[], + convertPrToDraft: [] as number[], + reopenIssue: [] as { issueNumber: number; comment: string | undefined }[], + }; + const comments = new Map(); + const drafts = new Set(); + const gateway: ReconcilerGateway = { + async listOpenManagedPrs() { + calls.listOpenManagedPrs++; + return opts.openPrs; + }, + async getMainCommitSha() { + calls.getMainCommitSha++; + return opts.mainSha ?? "main-sha-123"; + }, + async getMergeability(_repo, prNumber) { + calls.getMergeability++; + return opts.mergeability[prNumber] ?? null; + }, + async getPrHeadRef(_repo, prNumber) { + calls.getPrHeadRef++; + return opts.headRefs[prNumber] ?? null; + }, + async getPullRequest(_repo, prNumber) { + return { isDraft: drafts.has(prNumber) }; + }, + async convertPrToDraft(_repo, prNumber) { + calls.convertPrToDraft.push(prNumber); + drafts.add(prNumber); + }, + async listClosedSubIssues() { + return opts.closedSubIssues ?? []; + }, + async reopenIssue(_repo, issueNumber, options) { + calls.reopenIssue.push({ issueNumber, comment: options?.comment }); + }, + async listIssueComments(_repo, issueNumber) { + return (comments.get(issueNumber) ?? []).map((body) => ({ body })); + }, + async postComment(_repo, issueNumber, body) { + calls.postComment.push({ issueNumber, body }); + const bucket = comments.get(issueNumber) ?? []; + bucket.push(body); + comments.set(issueNumber, bucket); + }, + }; + return { gateway, calls, comments, drafts }; + } + + test("BEHIND PR rebases cleanly on the next tick, applies success, and a re-tick is idempotent", async () => { + // Set up the fixture: main advances by one disjoint commit so the feature + // branch is BEHIND but conflict-free (the rebase helper's clean path). + await writeAndCommit(work, "main.txt", "main\n", "main: add file"); + await git(work, ["push", "origin", "main"]); + await aliasFixtureUnderRoot("o/r", 32); + + const fixture = makeOrchestratorGateway({ + openPrs: [{ prNumber: 100, headRefName: "middle-issue-32" }], + headRefs: { 100: "middle-issue-32" }, + mergeability: { + // First tick sees BEHIND; second tick sees CLEAN (the rebase landed). + 100: { mergeStateStatus: "BEHIND", mergeable: "MERGEABLE" }, + }, + }); + + const baseDeps = deps(); + const enqueues: Array<[string, number]> = []; + const orchDeps = { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async (r: string, e: number) => { + enqueues.push([r, e]); + }, + getRateLimit: async () => ({ remaining: 5000, resetAt: 0 }), + now: () => 1_700_000_000_000, + }; + + const r1 = await reconcileOpenPRs(orchDeps, "o/r"); + expect(r1).toEqual({ reconciled: 1, passed: 0, skippedForBudget: false }); + // The rebase moved feature on top of main; applySuccess pushed and posted. + expect(fixture.calls.postComment.length).toBe(1); + expect(fixture.calls.postComment[0]?.issueNumber).toBe(100); + expect(fixture.calls.postComment[0]?.body).toContain("(rebased)"); + expect(getDivergenceState(db, "o/r", 100)?.state).toBe("CLEAN"); + + // Second tick: simulate GitHub now reporting CLEAN (the push landed there). + // The orchestrator classifies CLEAN → passed, no further side effects. + fixture.calls.postComment = []; // reset to verify no new posts + const orchDeps2 = { + ...orchDeps, + github: { + ...fixture.gateway, + async getMergeability() { + return { mergeStateStatus: "CLEAN", mergeable: "MERGEABLE" } as MergeabilityView; + }, + }, + }; + const r2 = await reconcileOpenPRs(orchDeps2, "o/r"); + expect(r2).toEqual({ reconciled: 0, passed: 1, skippedForBudget: false }); + expect(fixture.calls.postComment).toEqual([]); + expect(fixture.calls.convertPrToDraft).toEqual([]); + expect(enqueues).toEqual([]); + }); + + test("CONFLICTED PR rebase-fails → merge fallback lands → applySuccess('merged-new-work-as-base')", async () => { + // Same-line edits on both sides — rebase will conflict, -X ours will resolve. + await writeAndCommit(work, "shared.txt", "line\n", "main: seed shared"); + await git(work, ["push", "origin", "main"]); + await git(worktree, ["fetch", "origin", "main"]); + await git(worktree, ["reset", "--hard", "origin/main"]); + await git(worktree, ["checkout", "-B", "middle-issue-32"]); + await git(worktree, ["push", "-f", "origin", "middle-issue-32"]); + + await writeAndCommit(worktree, "shared.txt", "feature edit\n", "feature: edit shared"); + await writeAndCommit(work, "shared.txt", "main edit\n", "main: edit shared"); + await git(work, ["push", "origin", "main"]); + + await aliasFixtureUnderRoot("o/r", 32); + + const fixture = makeOrchestratorGateway({ + openPrs: [{ prNumber: 101, headRefName: "middle-issue-32" }], + headRefs: { 101: "middle-issue-32" }, + mergeability: { + 101: { mergeStateStatus: "DIRTY", mergeable: "CONFLICTING" }, + }, + }); + + const baseDeps = deps(); + const r = await reconcileOpenPRs( + { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async () => {}, + getRateLimit: async () => ({ remaining: 5000, resetAt: 0 }), + }, + "o/r", + ); + expect(r.reconciled).toBe(1); + expect(fixture.calls.postComment.length).toBe(1); + expect(fixture.calls.postComment[0]?.body).toContain("(merged-new-work-as-base)"); + }); + + test("CONFLICTED PR both attempts fail (rename/rename) → applyDemoteToWork fires", async () => { + // Rename/rename: -X ours can't resolve. + await writeAndCommit(work, "shared.txt", "baseline\n", "main: seed shared"); + await git(work, ["push", "origin", "main"]); + await git(worktree, ["fetch", "origin", "main"]); + await git(worktree, ["reset", "--hard", "origin/main"]); + await git(worktree, ["checkout", "-B", "middle-issue-32"]); + await git(worktree, ["push", "-f", "origin", "middle-issue-32"]); + + await git(worktree, ["mv", "shared.txt", "feature-name.txt"]); + await git(worktree, ["commit", "-m", "feature: rename"]); + await git(work, ["mv", "shared.txt", "main-name.txt"]); + await git(work, ["commit", "-m", "main: rename"]); + await git(work, ["push", "origin", "main"]); + + await aliasFixtureUnderRoot("o/r", 32); + + const fixture = makeOrchestratorGateway({ + openPrs: [{ prNumber: 102, headRefName: "middle-issue-32" }], + headRefs: { 102: "middle-issue-32" }, + mergeability: { + 102: { mergeStateStatus: "DIRTY", mergeable: "CONFLICTING" }, + }, + closedSubIssues: [{ number: 50, closedAt: 1_700_000_000_000 }], + }); + + const baseDeps = deps(); + const enqueues: Array<[string, number]> = []; + const r = await reconcileOpenPRs( + { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async (repo, epicNumber) => { + enqueues.push([repo, epicNumber]); + }, + getRateLimit: async () => ({ remaining: 5000, resetAt: 0 }), + }, + "o/r", + ); + expect(r.reconciled).toBe(1); + + // Demote landed: draft conversion + sub-issue reopen + dual-surface comments + enqueue. + expect(fixture.calls.convertPrToDraft).toEqual([102]); + expect(fixture.calls.reopenIssue.length).toBe(1); + expect(fixture.calls.reopenIssue[0]?.issueNumber).toBe(50); + expect(new Set(fixture.calls.postComment.map((c) => c.issueNumber))).toEqual( + new Set([102, 32]), + ); + for (const c of fixture.calls.postComment) { + expect(c.body).toContain(""); + } + expect(enqueues).toEqual([["o/r", 32]]); + expect(getDivergenceState(db, "o/r", 102)?.state).toBe("DEMOTED"); + }); + + test("rate-limit floor short-circuits the pass; no listing happens", async () => { + const fixture = makeOrchestratorGateway({ + openPrs: [], + headRefs: {}, + mergeability: {}, + }); + const baseDeps = deps(); + const r = await reconcileOpenPRs( + { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async () => {}, + getRateLimit: async () => ({ remaining: 10, resetAt: Date.now() + 60_000 }), + rateLimitBuffer: 100, + }, + "o/r", + ); + expect(r).toEqual({ reconciled: 0, passed: 0, skippedForBudget: true }); + expect(fixture.calls.listOpenManagedPrs).toBe(0); + }); + + test("CLEAN PR → walked but unchanged; nothing posted, no state advance", async () => { + const fixture = makeOrchestratorGateway({ + openPrs: [{ prNumber: 103, headRefName: "middle-issue-32" }], + headRefs: { 103: "middle-issue-32" }, + mergeability: { 103: { mergeStateStatus: "CLEAN", mergeable: "MERGEABLE" } }, + }); + const baseDeps = deps(); + const r = await reconcileOpenPRs( + { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async () => {}, + getRateLimit: async () => ({ remaining: 5000, resetAt: 0 }), + }, + "o/r", + ); + expect(r).toEqual({ reconciled: 0, passed: 1, skippedForBudget: false }); + expect(fixture.calls.postComment).toEqual([]); + // Classifier still wrote a row (the recording invariant from Phase 1). + expect(getDivergenceState(db, "o/r", 103)?.state).toBe("CLEAN" satisfies DivergenceState); + }); + + test("listOpenManagedPrs throws → pass returns 0s and logs, no orchestration", async () => { + const fixture = makeOrchestratorGateway({ + openPrs: [], + headRefs: {}, + mergeability: {}, + }); + fixture.gateway.listOpenManagedPrs = async () => { + throw new Error("transient gh outage"); + }; + const baseDeps = deps(); + const r = await reconcileOpenPRs( + { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async () => {}, + getRateLimit: async () => ({ remaining: 5000, resetAt: 0 }), + }, + "o/r", + ); + expect(r).toEqual({ reconciled: 0, passed: 0, skippedForBudget: false }); + }); +}); From e50fc233c4fc2871ea8a7672d9d43d090a7a38ba Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:40:06 -0400 Subject: [PATCH 16/23] test(reconcilers): multi-PR orchestrator + onMergedTransition wiring tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acceptance-criteria coverage for Epic #168: - `reconcile.test.ts`: a `reconcileMergedParks` pass with multiple MERGED transitions fires `onMergedTransition` once per row (Epic #168 hook the daemon wires to an immediate `reconcileOpenPRs` sweep); a thrown hook is isolated and the merged-parks pass still finalizes every row. - `pr-divergence-integration.test.ts`: a two-PR pass exercises the iteration — one BEHIND PR rebases to CLEAN (applySuccess posted), one already-CLEAN PR is walked but unchanged. Proves the orchestrator's loop on the spec's "merging one triggers the reconciler; the other rebases" topology. --- .../test/pr-divergence-integration.test.ts | 42 +++++++++++++++++ packages/dispatcher/test/reconcile.test.ts | 45 +++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts index cd1d0e77..62aafbfc 100644 --- a/packages/dispatcher/test/pr-divergence-integration.test.ts +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -689,6 +689,48 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { expect(getDivergenceState(db, "o/r", 103)?.state).toBe("CLEAN" satisfies DivergenceState); }); + test("two open managed PRs in one pass — both walked, mix of CLEAN + BEHIND→rebased", async () => { + // Pass-through fixture: one PR is up-to-date (CLEAN), the other is BEHIND + // (will rebase via the fixture worktree). Proves the orchestrator iterates + // the list and applies the right chain per-PR — the spec's "merging one + // triggers the reconciler; the other rebases onto the new main" topology. + await writeAndCommit(work, "main.txt", "main\n", "main: add file"); + await git(work, ["push", "origin", "main"]); + await aliasFixtureUnderRoot("o/r", 32); + + const fixture = makeOrchestratorGateway({ + openPrs: [ + { prNumber: 200, headRefName: "middle-issue-32" }, // will rebase BEHIND→CLEAN + { prNumber: 201, headRefName: "middle-issue-99" }, // already CLEAN + ], + headRefs: { 200: "middle-issue-32", 201: "middle-issue-99" }, + mergeability: { + 200: { mergeStateStatus: "BEHIND", mergeable: "MERGEABLE" }, + 201: { mergeStateStatus: "CLEAN", mergeable: "MERGEABLE" }, + }, + }); + + const baseDeps = deps(); + const r = await reconcileOpenPRs( + { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async () => {}, + getRateLimit: async () => ({ remaining: 5000, resetAt: 0 }), + }, + "o/r", + ); + expect(r).toEqual({ reconciled: 1, passed: 1, skippedForBudget: false }); + + // PR 200 got an applySuccess comment; PR 201 (CLEAN) got nothing. + expect(fixture.calls.postComment.length).toBe(1); + expect(fixture.calls.postComment[0]?.issueNumber).toBe(200); + // Both rows are persisted reflecting their classified state. + expect(getDivergenceState(db, "o/r", 200)?.state).toBe("CLEAN" satisfies DivergenceState); // applySuccess wrote CLEAN + expect(getDivergenceState(db, "o/r", 201)?.state).toBe("CLEAN" satisfies DivergenceState); // classifier wrote CLEAN + }); + test("listOpenManagedPrs throws → pass returns 0s and logs, no orchestration", async () => { const fixture = makeOrchestratorGateway({ openPrs: [], diff --git a/packages/dispatcher/test/reconcile.test.ts b/packages/dispatcher/test/reconcile.test.ts index 0e3486ad..8213f26e 100644 --- a/packages/dispatcher/test/reconcile.test.ts +++ b/packages/dispatcher/test/reconcile.test.ts @@ -142,6 +142,51 @@ describe("reconcileMergedParks", () => { expect(getWorkflow(db, id)?.state).toBe("waiting-human"); // untouched — budget-gated }); + test("fires onMergedTransition once per MERGED transition observed (Epic #168 wiring)", async () => { + seedParked(70); + seedParked(71); + seedParked(72); + const { deps } = makeDeps({ + 70: { number: 1, state: "MERGED" }, + 71: { number: 2, state: "CLOSED" }, // not MERGED → does NOT trigger the hook + 72: { number: 3, state: "MERGED" }, + }); + + const triggered: string[] = []; + await reconcileMergedParks({ + ...deps, + onMergedTransition: async (repo) => { + triggered.push(repo); + }, + }); + + // Two MERGED transitions observed → hook fired twice (per-pass de-dup is + // the caller's job; the daemon's reconcileOpenPRsForRepo is itself idempotent). + expect(triggered).toEqual([REPO, REPO]); + }); + + test("a thrown onMergedTransition is isolated — the merged-parks pass still finishes", async () => { + const idA = seedParked(75); + const idB = seedParked(76); + const { deps } = makeDeps({ + 75: { number: 4, state: "MERGED" }, + 76: { number: 5, state: "MERGED" }, + }); + const triggered: string[] = []; + expect( + await reconcileMergedParks({ + ...deps, + onMergedTransition: async () => { + triggered.push("called"); + throw new Error("downstream sweep boom"); + }, + }), + ).toBe(2); + expect(triggered.length).toBe(2); + expect(getWorkflow(db, idA)?.state).toBe("completed"); + expect(getWorkflow(db, idB)?.state).toBe("completed"); + }); + test("honors the per-pass burst cap", async () => { seedParked(60); seedParked(61); From 0a5316f59353248175537530b0447b8792ff9ea9 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:52:29 -0400 Subject: [PATCH 17/23] =?UTF-8?q?fix(reconcilers):=20self-review=20hardeni?= =?UTF-8?q?ng=20=E2=80=94=20distinguish=20skip=20vs=20fail,=20manual=20rec?= =?UTF-8?q?overy,=20observability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-push internal code-review pass surfaced six adjacent edges in the same class — the reconciler's result/failure shapes leaking into wrong dispatch paths. Fix the class within the module's blast radius, each fix tested: 1. **`gitOps.rebase` / `mergeOurs` throw on non-conflict failures** — when the underlying command exits non-zero with zero unmerged paths (missing upstream, hook refusal, dirty worktree), it's a wiring failure, not a path-less conflict. Before: shaped as `{ok:false, conflictingPaths:[]}` indistinguishable from the non-managed-PR skip signal. After: throws; the orchestrator's per-PR try/catch logs and increments the new `failed` counter. 2. **`applySuccess` accepts `mainCommitSha: string | null`** — a transient `getMainCommitSha` failure (Phase 6) used to silently skip the entire applySuccess (push + comment + state→CLEAN), but the orchestrator still counted it as `reconciled`. After: push + state row are unconditional; only the comment step skips when the SHA is null (the marker would be ambiguous). A later pass with a readable SHA posts under the correct marker. 3. **`applyDemoteToWork` skips `reopenIssue` when the Epic already carries the demote marker** — a human who reviews the escalation, manually fixes the conflict, marks the PR ready, and closes the reopened sub-issue shouldn't have it re-opened by the next divergence. The PR.isDraft gate doesn't catch this (human un-drafted it); the Epic marker does. 4. **`listClosedSubIssues` defends against `NaN` from `Date.parse`** — malformed `closed_at` (hand-edited rows) now coerces to null instead of becoming a NaN that destabilizes the V8 sort. 5. **`getMergeability` / `getPullRequest` guard `JSON.parse`** via a new `safeJsonParse` helper — empty/malformed `gh` stdout (observed in the wild when auth warnings interleave) now returns null instead of throwing `SyntaxError`. `ghJson` similarly wraps for the throwing path. 6. **`reconcileOpenPRs` reports a `failed` counter** + logs when the `maxPrsPerPass` cap drops the tail of a large backlog. Distinguishes a pass of all-CLEAN from a pass of all-failures for observability. 7. **`main.ts` dedups `onMergedTransition` per pass** — a single `reconcileMergedParks` pass observing N MERGED transitions on the same repo used to fire N full-repo sweeps; now coalesced to one per tick. Five added tests pin the new contracts: null-mainCommitSha skip-comment path, non-conflict rebase throw, manual-recovery marker gate. Existing tests updated for the new `failed` field on the result shape. --- packages/dispatcher/src/main.ts | 22 ++- .../src/reconcilers/pr-divergence.ts | 167 +++++++++++++----- .../test/pr-divergence-integration.test.ts | 55 +++++- .../dispatcher/test/pr-divergence.test.ts | 39 ++++ 4 files changed, 234 insertions(+), 49 deletions(-) diff --git a/packages/dispatcher/src/main.ts b/packages/dispatcher/src/main.ts index 4ea2f212..05f3570d 100644 --- a/packages/dispatcher/src/main.ts +++ b/packages/dispatcher/src/main.ts @@ -660,7 +660,27 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { await reconcileOpenPRsForRepo(repo); } }, - onMergedTransition: (repo) => reconcileOpenPRsForRepo(repo), + // Per-repo de-dup: multiple MERGED transitions observed in one pass + // would otherwise fire N concurrent full-repo sweeps; sweep at most once + // per repo per pass. Cleared at the start of each invocation so the next + // pass picks up a fresh set. + onMergedTransition: (() => { + let firedThisPass = new Set(); + let resetTimer: ReturnType | null = null; + return async (repo: string) => { + if (firedThisPass.has(repo)) return; + firedThisPass.add(repo); + // Reset the dedup set on the next tick so subsequent passes don't + // inherit it; this is best-effort coalescing within one pass. + if (resetTimer === null) { + resetTimer = setTimeout(() => { + firedThisPass = new Set(); + resetTimer = null; + }, 0); + } + await reconcileOpenPRsForRepo(repo); + }; + })(), }, ); diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index 44919c3b..1c900da6 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -198,11 +198,14 @@ async function spawnGit( } /** - * The conflicting paths from a worktree mid-rebase, read via - * `git diff --name-only --diff-filter=U` (unmerged-only). Empty when no - * conflict markers exist — the rebase's own non-zero exit then has a different - * cause (a missing upstream, a hook refusal) which the helper surfaces as the - * empty-paths variant of the conflict result, never as a thrown error. + * The conflicting paths from a worktree mid-rebase/merge, read via + * `git diff --name-only --diff-filter=U` (unmerged-only). Returns the list of + * paths git considers unmerged right now — empty when none. + * + * The helpers below distinguish "non-zero exit AND no unmerged paths" (a real + * wiring failure: missing upstream, hook refusal, dirty worktree) from "non- + * zero exit AND ≥1 unmerged path" (a genuine merge conflict). The first throws; + * the second returns the paths so the caller can fall back / demote. */ async function readConflictingPaths(cwd: string): Promise { const r = await spawnGit(cwd, ["diff", "--name-only", "--diff-filter=U"]); @@ -225,6 +228,14 @@ export const gitOps: GitOps = { // index, after which `--diff-filter=U` returns nothing). const conflictingPaths = await readConflictingPaths(cwd); await spawnGit(cwd, ["rebase", "--abort"]); + if (conflictingPaths.length === 0) { + // Non-zero exit AND no unmerged files = wiring failure (missing + // upstream, hook refusal, dirty worktree). Surface the underlying error + // so the orchestrator's per-PR try/catch logs it and the pass continues; + // do NOT shape it as `{ok:false, conflictingPaths:[]}`, which the + // applyDemoteToWork path would read as a real (path-less) conflict. + throw new Error(`git rebase ${upstream} failed without unmerged files: ${r.stderr.trim()}`); + } return { ok: false, conflictingPaths }; }, async mergeOurs(cwd: string, ref: string): Promise { @@ -236,6 +247,11 @@ export const gitOps: GitOps = { if (r.exitCode === 0) return { ok: true }; const conflictingPaths = await readConflictingPaths(cwd); await spawnGit(cwd, ["merge", "--abort"]); + if (conflictingPaths.length === 0) { + // Same distinction as `rebase` above — surface a real failure rather than + // a path-less conflict shape. + throw new Error(`git merge ${ref} failed without unmerged files: ${r.stderr.trim()}`); + } return { ok: false, conflictingPaths }; }, async revParse(cwd: string, ref: string): Promise { @@ -406,7 +422,12 @@ function reconciledMarker(resolution: ReconciliationResolution, mainCommitSha: s * already at the target state is a no-op (per #172's spec). * - **Comment:** scans existing comments for the resolution+sha marker; posts * only if absent. The marker is hidden HTML in the body, so the visible - * text stays clean. + * text stays clean. When `mainCommitSha` is null (a transient + * `getMainCommitSha` failure during the same pass), the comment step is + * **skipped** — the marker would be ambiguous — but the push + state-row + * write still happen. A later pass with a readable main SHA posts the + * announcement; the sha-keyed marker prevents a duplicate once the SHA + * settles. * - **State row:** upserted to `CLEAN` — idempotent by construction. * * Force-pushing uses `--force-with-lease` (never plain `--force`), so a @@ -421,7 +442,7 @@ export async function applySuccess( repo: string, prNumber: number, resolution: ReconciliationResolution, - mainCommitSha: string, + mainCommitSha: string | null, ): Promise { const resolved = await resolveWorktreePath(deps, repo, prNumber); if (!resolved) return; @@ -436,12 +457,18 @@ export async function applySuccess( await deps.git.pushForceWithLease(resolved.worktreePath, branch); } - const marker = reconciledMarker(resolution, mainCommitSha); - const existing = await deps.github.listIssueComments(repo, prNumber); - const alreadyPosted = existing.some((c) => (c.body ?? "").includes(marker)); - if (!alreadyPosted) { - const body = `🔁 Reconciled with main (${resolution}) after ${mainCommitSha.slice(0, 9)}\n\n${marker}`; - await deps.github.postComment(repo, prNumber, body); + // Comment only when we have a main SHA to key the marker by; the next pass + // re-posts under the right SHA. Push + state-row write are unconditional — + // the rebase/merge already landed locally, so failing to comment shouldn't + // also fail to record the success. + if (mainCommitSha !== null) { + const marker = reconciledMarker(resolution, mainCommitSha); + const existing = await deps.github.listIssueComments(repo, prNumber); + const alreadyPosted = existing.some((c) => (c.body ?? "").includes(marker)); + if (!alreadyPosted) { + const body = `🔁 Reconciled with main (${resolution}) after ${mainCommitSha.slice(0, 9)}\n\n${marker}`; + await deps.github.postComment(repo, prNumber, body); + } } recordDivergenceState(deps.db, repo, prNumber, "CLEAN", (deps.now ?? Date.now)()); @@ -533,12 +560,21 @@ function renderSubIssueReopenComment(prNumber: number, epicNumber: number): stri * - **PR.isDraft** — if the PR is already a draft, the demote has already * landed; the whole function returns. This survives a classifier overwrite * of the state row back to CONFLICTED (a re-classification under DEMOTED). + * - **Epic-keyed marker on prior demote** — if the Epic already carries the + * demote marker from a previous incident, skip the sub-issue reopen even if + * the PR was un-drafted in the meantime (e.g. a human reviewed the + * escalation, manually fixed the conflict, marked the PR ready, and a fresh + * divergence emerged). Without this gate the reconciler would re-fire + * `reopenIssue` against a sub-issue the human had already closed. * - **comment marker** — the dual-surface comments include a hidden HTML * marker keyed on the Epic number; on a retry that comes after the post * succeeded but before the row was written, the listing-then-post sequence - * sees the marker and skips. (Re-`reopenIssue` and re-`enqueueEpic` are - * themselves idempotent — sub-issue reopen is a no-op on open, the enqueue - * collides against the existing-workflow guard.) + * sees the marker and skips. + * + * `enqueueEpic` is itself idempotent (the daemon-wired implementation collides + * against the existing-workflow guard) so a re-enqueue on the same Epic is + * safe; we still call it on each demote pass so the recommender's ranking gets + * a fresh nudge after the new divergence. * * Non-managed head refs (the PR isn't `middle-issue-`) short-circuit as a * no-op — the reconciler is never the right hand for those. @@ -560,23 +596,33 @@ export async function applyDemoteToWork( await deps.github.convertPrToDraft(repo, prNumber); - // Most-recently-closed sub-issue, if any. An Epic with no closed sub-issues - // (every phase is still open) means there's nothing to reopen — that's not - // an error; the demote still proceeds with the comments + re-enqueue. - const closedSubs = await deps.github.listClosedSubIssues(repo, epicNumber); - const targetSub = closedSubs[0]; // gateway contract: most-recently-closed first - if (targetSub) { - await deps.github.reopenIssue(repo, targetSub.number, { - comment: renderSubIssueReopenComment(prNumber, epicNumber), - }); + const marker = demoteMarker(epicNumber); + const epicComments = await deps.github.listIssueComments(repo, epicNumber); + const epicAlreadyDemoted = epicComments.some((c) => (c.body ?? "").includes(marker)); + + // Most-recently-closed sub-issue, if any. Skip the reopen when the Epic + // already carries a prior demote marker — a human may have manually fixed + // the conflict and closed that sub-issue; we don't fight their recovery. + // The fresh divergence still demotes the PR + posts the new escalation + + // re-enqueues; only the reopen is suppressed. + if (!epicAlreadyDemoted) { + const closedSubs = await deps.github.listClosedSubIssues(repo, epicNumber); + const targetSub = closedSubs[0]; // gateway contract: most-recently-closed first + if (targetSub) { + await deps.github.reopenIssue(repo, targetSub.number, { + comment: renderSubIssueReopenComment(prNumber, epicNumber), + }); + } } // Dual-surface escalation. Same marker on both surfaces gates re-posts. const escalationBody = renderDemoteEscalation(epicNumber, prNumber, conflictingPaths); - const marker = demoteMarker(epicNumber); for (const issueNumber of [prNumber, epicNumber]) { - const existing = await deps.github.listIssueComments(repo, issueNumber); + const existing = + issueNumber === epicNumber + ? epicComments + : await deps.github.listIssueComments(repo, issueNumber); if (!existing.some((c) => (c.body ?? "").includes(marker))) { await deps.github.postComment(repo, issueNumber, escalationBody); } @@ -648,6 +694,11 @@ export type ReconcileOpenPRsResult = { reconciled: number; /** PRs walked but left as-is (CLEAN or UNKNOWN — nothing to do this pass). */ passed: number; + /** PRs whose chain threw before completing — observability for transient + * GitHub / git failures the orchestrator's per-PR try/catch logged and + * isolated. Counted alongside `reconciled`/`passed` so a pass of all-failures + * is distinguishable from a pass of all-CLEAN. */ + failed: number; /** True when the whole pass was short-circuited by the rate-limit floor. */ skippedForBudget: boolean; }; @@ -681,7 +732,7 @@ export async function reconcileOpenPRs( console.error( `[pr-divergence] GitHub budget low (${budget.remaining} < ${buffer}); skipping pass — resets ${new Date(budget.resetAt).toISOString()}`, ); - return { reconciled: 0, passed: 0, skippedForBudget: true }; + return { reconciled: 0, passed: 0, failed: 0, skippedForBudget: true }; } let prs: OpenManagedPr[]; @@ -691,10 +742,15 @@ export async function reconcileOpenPRs( console.error( `[pr-divergence] list open managed PRs for ${repo} failed: ${(error as Error).message}`, ); - return { reconciled: 0, passed: 0, skippedForBudget: false }; + return { reconciled: 0, passed: 0, failed: 0, skippedForBudget: false }; } if (prs.length === 0) { - return { reconciled: 0, passed: 0, skippedForBudget: false }; + return { reconciled: 0, passed: 0, failed: 0, skippedForBudget: false }; + } + if (prs.length > maxPerPass) { + console.error( + `[pr-divergence] ${repo}: ${prs.length} managed PRs > ${maxPerPass} per-pass cap; processing the first ${maxPerPass} (remainder next tick)`, + ); } // Fetched once per pass so applySuccess's comment marker is consistent across @@ -710,6 +766,7 @@ export async function reconcileOpenPRs( let reconciled = 0; let passed = 0; + let failed = 0; for (const pr of prs.slice(0, maxPerPass)) { try { @@ -722,7 +779,9 @@ export async function reconcileOpenPRs( // BEHIND or CONFLICTED — try rebase first. const rebaseResult = await tryRebaseOntoMain(deps, repo, pr.prNumber); if (rebaseResult.ok) { - if (mainSha) await applySuccess(deps, repo, pr.prNumber, "rebased", mainSha); + // applySuccess pushes + records CLEAN regardless of mainSha; the + // comment step skips itself when mainSha is null. + await applySuccess(deps, repo, pr.prNumber, "rebased", mainSha); reconciled++; continue; } @@ -730,9 +789,7 @@ export async function reconcileOpenPRs( // Rebase loop — try -X ours merge fallback (new-work-as-base). const mergeResult = await tryMergeMainNewWorkAsBase(deps, repo, pr.prNumber); if (mergeResult.ok) { - if (mainSha) { - await applySuccess(deps, repo, pr.prNumber, "merged-new-work-as-base", mainSha); - } + await applySuccess(deps, repo, pr.prNumber, "merged-new-work-as-base", mainSha); reconciled++; continue; } @@ -755,13 +812,14 @@ export async function reconcileOpenPRs( ); reconciled++; } catch (error) { + failed++; console.error( `[pr-divergence] ${repo} PR #${pr.prNumber} reconciliation failed: ${(error as Error).message}`, ); } } - return { reconciled, passed, skippedForBudget: false }; + return { reconciled, passed, failed, skippedForBudget: false }; } // ── Production `gh`-backed gateway (for the daemon) ──────────────────────── @@ -786,7 +844,27 @@ async function ghJson(argv: string[]): Promise { if (r.exitCode !== 0) { throw new Error(`gh ${argv.join(" ")} failed: ${r.stderr.trim()}`); } - return JSON.parse(r.stdout) as T; + try { + return JSON.parse(r.stdout) as T; + } catch (error) { + throw new Error( + `gh ${argv.join(" ")} returned unparseable JSON (${(error as Error).message}): ${r.stdout.trim()}`, + ); + } +} + +/** Parse JSON without throwing — the caller distinguishes null (PR doesn't + * exist OR gh emitted malformed output) from a usable object. Empty `stdout` + * has been observed in the wild when `gh` interleaves auth-warning text on a + * successful-exit; we want to log and continue, not throw. */ +function safeJsonParse(stdout: string): T | null { + const trimmed = stdout.trim(); + if (trimmed === "") return null; + try { + return JSON.parse(trimmed) as T; + } catch { + return null; + } } /** @@ -830,7 +908,7 @@ export const ghReconcilerGateway: Omit(r.stdout); }, async getPrHeadRef(repo, prNumber) { const r = await ghSpawn([ @@ -851,7 +929,7 @@ export const ghReconcilerGateway: Omit(r.stdout); }, async convertPrToDraft(repo, prNumber) { const r = await ghSpawn(["pr", "ready", String(prNumber), "--repo", repo, "--undo"]); @@ -867,10 +945,15 @@ export const ghReconcilerGateway: Omit ({ - number: s.number, - closedAt: s.closed_at ? Date.parse(s.closed_at) : null, - })); + const out: ClosedSubIssue[] = rows.map((s) => { + // Coerce malformed `closed_at` (hand-edited rows, weird timezones) to + // null rather than NaN — NaN sort comparisons are V8-unstable. + const parsed = s.closed_at ? Date.parse(s.closed_at) : Number.NaN; + return { + number: s.number, + closedAt: Number.isFinite(parsed) ? parsed : null, + }; + }); // Newest-first by closed_at; nulls sink to the end so a hand-edited row // doesn't shadow a real recent close. out.sort((a, b) => (b.closedAt ?? 0) - (a.closedAt ?? 0)); diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts index 62aafbfc..abbe10b7 100644 --- a/packages/dispatcher/test/pr-divergence-integration.test.ts +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -232,6 +232,21 @@ describe("tryRebaseOntoMain — fixture repo", () => { const result = await tryRebaseOntoMain(skipDeps, "o/r", 999); expect(result).toEqual({ ok: false, conflictingPaths: [] }); }); + + test("non-conflict rebase failure (missing upstream) THROWS — not shaped as a path-less conflict", async () => { + // Make `git rebase` fail by passing a ref that doesn't exist on origin. + // The fixture has no `origin/does-not-exist`, so the rebase exits non-zero + // with no unmerged paths — the self-review hardening must surface this as + // a real error, not as `{ok:false, conflictingPaths:[]}` (which the + // orchestrator would mistake for a non-managed-PR skip). + await aliasFixtureUnderRoot("o/r", 32); + // Direct call into gitOps.rebase against a missing ref proves the contract: + // a non-conflict failure throws so the orchestrator's per-PR try/catch + // logs it, instead of papering over it as a path-less conflict result. + await expect(gitOps.rebase(worktree, "does-not-exist-ref")).rejects.toThrow( + /failed without unmerged files/, + ); + }); }); describe("tryMergeMainNewWorkAsBase — fixture repo", () => { @@ -403,6 +418,34 @@ describe("applySuccess — fixture repo", () => { expect(comments.posted[1]).toContain("bbbbbbbbb"); }); + test("null mainCommitSha skips the comment but still pushes and records CLEAN (self-review hardening)", async () => { + // Make the local branch ahead of origin so the push branch is exercised. + await writeAndCommit(work, "main-file.txt", "main only\n", "main: add file"); + await git(work, ["push", "origin", "main"]); + await aliasFixtureUnderRoot("o/r", 32); + const rebase = await tryRebaseOntoMain(deps(), "o/r", 999); + expect(rebase).toEqual({ ok: true }); + + const comments = makeCommentSpy(); + const baseDeps = deps(); + const successDeps = { + ...baseDeps, + db, + github: { ...baseDeps.github, ...comments }, + now: () => 5_000, + }; + + await applySuccess(successDeps, "o/r", 999, "rebased", null); + + expect(comments.posted).toEqual([]); // no marker → no comment posted + expect(getDivergenceState(db, "o/r", 999)).toEqual({ state: "CLEAN", classifiedAt: 5_000 }); + + // Push DID happen — the bare remote now has the rebased HEAD. + const localSha = (await git(worktree, ["rev-parse", "HEAD"])).stdout.trim(); + const remoteSha = (await git(remote, ["rev-parse", "middle-issue-32"])).stdout.trim(); + expect(remoteSha).toBe(localSha); + }); + test("a non-managed head ref is a no-op (no push, no comment, no row)", async () => { const comments = makeCommentSpy(); const baseDeps = deps(); @@ -523,7 +566,7 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { }; const r1 = await reconcileOpenPRs(orchDeps, "o/r"); - expect(r1).toEqual({ reconciled: 1, passed: 0, skippedForBudget: false }); + expect(r1).toEqual({ reconciled: 1, passed: 0, failed: 0, skippedForBudget: false }); // The rebase moved feature on top of main; applySuccess pushed and posted. expect(fixture.calls.postComment.length).toBe(1); expect(fixture.calls.postComment[0]?.issueNumber).toBe(100); @@ -543,7 +586,7 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { }, }; const r2 = await reconcileOpenPRs(orchDeps2, "o/r"); - expect(r2).toEqual({ reconciled: 0, passed: 1, skippedForBudget: false }); + expect(r2).toEqual({ reconciled: 0, passed: 1, failed: 0, skippedForBudget: false }); expect(fixture.calls.postComment).toEqual([]); expect(fixture.calls.convertPrToDraft).toEqual([]); expect(enqueues).toEqual([]); @@ -662,7 +705,7 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { }, "o/r", ); - expect(r).toEqual({ reconciled: 0, passed: 0, skippedForBudget: true }); + expect(r).toEqual({ reconciled: 0, passed: 0, failed: 0, skippedForBudget: true }); expect(fixture.calls.listOpenManagedPrs).toBe(0); }); @@ -683,7 +726,7 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { }, "o/r", ); - expect(r).toEqual({ reconciled: 0, passed: 1, skippedForBudget: false }); + expect(r).toEqual({ reconciled: 0, passed: 1, failed: 0, skippedForBudget: false }); expect(fixture.calls.postComment).toEqual([]); // Classifier still wrote a row (the recording invariant from Phase 1). expect(getDivergenceState(db, "o/r", 103)?.state).toBe("CLEAN" satisfies DivergenceState); @@ -721,7 +764,7 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { }, "o/r", ); - expect(r).toEqual({ reconciled: 1, passed: 1, skippedForBudget: false }); + expect(r).toEqual({ reconciled: 1, passed: 1, failed: 0, skippedForBudget: false }); // PR 200 got an applySuccess comment; PR 201 (CLEAN) got nothing. expect(fixture.calls.postComment.length).toBe(1); @@ -751,6 +794,6 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { }, "o/r", ); - expect(r).toEqual({ reconciled: 0, passed: 0, skippedForBudget: false }); + expect(r).toEqual({ reconciled: 0, passed: 0, failed: 0, skippedForBudget: false }); }); }); diff --git a/packages/dispatcher/test/pr-divergence.test.ts b/packages/dispatcher/test/pr-divergence.test.ts index c7c4ea25..7c0a8945 100644 --- a/packages/dispatcher/test/pr-divergence.test.ts +++ b/packages/dispatcher/test/pr-divergence.test.ts @@ -383,6 +383,45 @@ describe("applyDemoteToWork", () => { expect(getDivergenceState(db, REPO, 99)).toBe(null); }); + test("manual recovery: an Epic that already carries the demote marker skips the reopen call (self-review hardening)", async () => { + // Scenario: prior demote landed → marker on Epic 32. Human reviewed, + // manually fixed conflicts, marked PR ready, closed the reopened sub-issue. + // Now a fresh divergence emerges and reconciler runs applyDemoteToWork. + // PR.isDraft = false (human marked ready), so the early-exit doesn't fire, + // but the Epic-marker gate must suppress the reopen so the human's manual + // sub-issue close isn't undone. + const spy = makeDemoteSpy({ + comments: new Map([ + [32, ["…earlier demote escalation… "]], + ]), + }); + const enqueues: Array<[string, number]> = []; + await applyDemoteToWork( + { + db, + github: spy.gateway, + enqueueEpic: async (r, e) => { + enqueues.push([r, e]); + }, + now: () => 100, + }, + REPO, + 99, + ["x"], + ); + + // PR is re-drafted + state→DEMOTED + Epic is re-enqueued (all expected on a + // fresh divergence) — but reopenIssue must NOT fire (the marker says we + // already escalated this Epic; don't fight the human's manual recovery). + expect(spy.calls.convertPrToDraft).toEqual([[REPO, 99]]); + expect(spy.calls.reopenIssue).toEqual([]); + expect(enqueues).toEqual([[REPO, 32]]); + expect(getDivergenceState(db, REPO, 99)?.state).toBe("DEMOTED"); + // The marker on the Epic also gates the duplicate Epic comment — only PR + // gets a fresh comment (its marker is absent). + expect(new Set(spy.calls.postComment.map((c) => c.issueNumber))).toEqual(new Set([99])); + }); + test("PR doesn't exist (gateway returns null) → no-op", async () => { const spy = makeDemoteSpy(); // Override getPullRequest to return null. From ab77f8f0fb81377812d6a6d372ca722f7e6b7d65 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 01:58:29 -0400 Subject: [PATCH 18/23] fix(reconcilers): move onMergedTransition dedup inside reconcileMergedParks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second internal review pass caught a race in the call-site timer dedup: `setTimeout(..., 0)` resets the dedup set as soon as the event loop services macrotasks, which happens between `await` points inside the iteration loop — so a pass with N MERGED rows on the same repo could still fire a second sweep. Fix by moving the dedup to the natural per-pass boundary inside `reconcileMergedParks` (a `Set` constructed at the start of the loop). The caller's only job is now to forward the hook; no timer involved. - `poller.ts`: `reconcileMergedParks` tracks fired repos in `mergedRepos`; docstring on `onMergedTransition` updated to "at most once per repo per pass" with rationale. - `main.ts`: dropped the call-site IIFE timer; `onMergedTransition` is a thin forwarder. - `reconcile.test.ts`: existing tests updated to assert single-fire dedup (two MERGED rows, one repo → triggered.length === 1). - `pr-divergence-integration.test.ts`: new test exercises the orchestrator's `failed` counter (a per-PR throw increments it, the pass continues for subsequent PRs) — was previously only pinned at 0 across green paths. - `pr-divergence.ts`: tightened `applySuccess` docstring on the null-mainSha path: a later pass announces against whatever main it then reads, not necessarily the SHA this pass would have used. --- packages/dispatcher/src/main.ts | 26 +++---------- packages/dispatcher/src/poller.ts | 28 ++++++++----- .../src/reconcilers/pr-divergence.ts | 7 ++-- .../test/pr-divergence-integration.test.ts | 39 +++++++++++++++++++ packages/dispatcher/test/reconcile.test.ts | 13 ++++--- 5 files changed, 75 insertions(+), 38 deletions(-) diff --git a/packages/dispatcher/src/main.ts b/packages/dispatcher/src/main.ts index 05f3570d..c5a94ac4 100644 --- a/packages/dispatcher/src/main.ts +++ b/packages/dispatcher/src/main.ts @@ -660,27 +660,11 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { await reconcileOpenPRsForRepo(repo); } }, - // Per-repo de-dup: multiple MERGED transitions observed in one pass - // would otherwise fire N concurrent full-repo sweeps; sweep at most once - // per repo per pass. Cleared at the start of each invocation so the next - // pass picks up a fresh set. - onMergedTransition: (() => { - let firedThisPass = new Set(); - let resetTimer: ReturnType | null = null; - return async (repo: string) => { - if (firedThisPass.has(repo)) return; - firedThisPass.add(repo); - // Reset the dedup set on the next tick so subsequent passes don't - // inherit it; this is best-effort coalescing within one pass. - if (resetTimer === null) { - resetTimer = setTimeout(() => { - firedThisPass = new Set(); - resetTimer = null; - }, 0); - } - await reconcileOpenPRsForRepo(repo); - }; - })(), + // Per-pass dedup lives inside `reconcileMergedParks` (it's the natural + // boundary; a caller-side timer would race against the loop's await + // points). Here we just forward to the per-repo sweep — at most one fire + // per repo per pass is already guaranteed by the upstream contract. + onMergedTransition: (repo) => reconcileOpenPRsForRepo(repo), }, ); diff --git a/packages/dispatcher/src/poller.ts b/packages/dispatcher/src/poller.ts index bedd9e5f..65c236c7 100644 --- a/packages/dispatcher/src/poller.ts +++ b/packages/dispatcher/src/poller.ts @@ -353,12 +353,13 @@ export type ReconcileDeps = { /** Cap on rows reconciled per pass. Defaults to {@link DEFAULT_MAX_POLLS_PER_PASS}. */ maxPollsPerPass?: number; /** - * Fired once per repo whose parked workflow was finalized because its PR - * transitioned to MERGED. The daemon wires this to an immediate - * `reconcileOpenPRs` sweep (Epic #168), so divergence on sibling Epic PRs is - * healed at the moment of merge rather than up to a tick later. Per-repo - * de-duplication is the caller's job — multiple MERGED transitions on the - * same repo this pass fire `onMergedTransition` once per row. + * Fired *at most once per repo per pass* when a parked workflow's PR is + * observed transitioning to MERGED. The daemon wires this to an immediate + * `reconcileOpenPRs` sweep (Epic #168) so divergence on sibling Epic PRs is + * healed at the moment of merge rather than up to a tick later. Per-pass + * de-duplication is enforced inside `reconcileMergedParks` (not the + * caller) — a timer- or microtask-based dedup at the call site would race + * against the `await` boundaries inside the iteration loop. */ onMergedTransition?: (repo: string) => Promise; }; @@ -379,6 +380,12 @@ export async function reconcileMergedParks(deps: ReconcileDeps): Promise } let reconciled = 0; + // Per-pass dedup for the `onMergedTransition` hook. Done here (not at the + // call site) because a timer- or microtask-based dedup in the caller would + // race against the `await deps.removeWorktree` / `await deps.onMergedTransition` + // boundaries below — a `setTimeout(0)` reset can fire between iterations and + // let a second sweep through for the same repo. + const mergedRepos = new Set(); for (const wf of parked.slice(0, maxPerPass)) { try { const life = await deps.github.findEpicPrLifecycle(wf.repo, wf.epicNumber); @@ -404,9 +411,12 @@ export async function reconcileMergedParks(deps: ReconcileDeps): Promise } } // Epic #168 hook: a MERGED transition is the moment divergence may have - // emerged on sibling Epic PRs. Best-effort and isolated — a throw here - // never blocks the rest of the parks pass. - if (life.state === "MERGED" && deps.onMergedTransition) { + // emerged on sibling Epic PRs. Fires at most once per repo per pass + // (`mergedRepos` guard) so a pass with N MERGED rows on the same repo + // doesn't fire N concurrent reconcile sweeps. Best-effort and isolated — + // a throw here never blocks the rest of the parks pass. + if (life.state === "MERGED" && deps.onMergedTransition && !mergedRepos.has(wf.repo)) { + mergedRepos.add(wf.repo); try { await deps.onMergedTransition(wf.repo); } catch (error) { diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index 1c900da6..564f5a1f 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -425,9 +425,10 @@ function reconciledMarker(resolution: ReconciliationResolution, mainCommitSha: s * text stays clean. When `mainCommitSha` is null (a transient * `getMainCommitSha` failure during the same pass), the comment step is * **skipped** — the marker would be ambiguous — but the push + state-row - * write still happen. A later pass with a readable main SHA posts the - * announcement; the sha-keyed marker prevents a duplicate once the SHA - * settles. + * write still happen. A later reconciliation pass announces against + * *whatever main it reads then* (which may be a newer SHA than the one + * this pass would have used); the sha-keyed marker prevents a duplicate + * when the SHA matches and allows a fresh announcement when main moved. * - **State row:** upserted to `CLEAN` — idempotent by construction. * * Force-pushing uses `--force-with-lease` (never plain `--force`), so a diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts index abbe10b7..941fb4fe 100644 --- a/packages/dispatcher/test/pr-divergence-integration.test.ts +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -774,6 +774,45 @@ describe("reconcileOpenPRs — end-to-end against the fixture repo", () => { expect(getDivergenceState(db, "o/r", 201)?.state).toBe("CLEAN" satisfies DivergenceState); // classifier wrote CLEAN }); + test("per-PR throw increments `failed` and the pass continues on subsequent PRs (self-review hardening)", async () => { + // Two PRs: the first throws inside the chain, the second runs cleanly. + // The failed counter must increment, the pass must not abort, and the + // second PR's outcome must be unaffected. + await aliasFixtureUnderRoot("o/r", 32); + + const fixture = makeOrchestratorGateway({ + openPrs: [ + { prNumber: 300, headRefName: "middle-issue-32" }, + { prNumber: 301, headRefName: "middle-issue-99" }, + ], + headRefs: { 300: "middle-issue-32", 301: "middle-issue-99" }, + mergeability: { + 300: { mergeStateStatus: "CLEAN", mergeable: "MERGEABLE" }, + 301: { mergeStateStatus: "CLEAN", mergeable: "MERGEABLE" }, + }, + }); + // Make the first PR throw during classification. + const originalGetMergeability = fixture.gateway.getMergeability; + fixture.gateway.getMergeability = async (repo, prNumber) => { + if (prNumber === 300) throw new Error("transient classify boom"); + return originalGetMergeability(repo, prNumber); + }; + + const baseDeps = deps(); + const r = await reconcileOpenPRs( + { + ...baseDeps, + db, + github: fixture.gateway, + enqueueEpic: async () => {}, + getRateLimit: async () => ({ remaining: 5000, resetAt: 0 }), + }, + "o/r", + ); + // The throwing PR shows up as `failed: 1`; the clean PR is `passed: 1`. + expect(r).toEqual({ reconciled: 0, passed: 1, failed: 1, skippedForBudget: false }); + }); + test("listOpenManagedPrs throws → pass returns 0s and logs, no orchestration", async () => { const fixture = makeOrchestratorGateway({ openPrs: [], diff --git a/packages/dispatcher/test/reconcile.test.ts b/packages/dispatcher/test/reconcile.test.ts index 8213f26e..23fafd1a 100644 --- a/packages/dispatcher/test/reconcile.test.ts +++ b/packages/dispatcher/test/reconcile.test.ts @@ -142,7 +142,7 @@ describe("reconcileMergedParks", () => { expect(getWorkflow(db, id)?.state).toBe("waiting-human"); // untouched — budget-gated }); - test("fires onMergedTransition once per MERGED transition observed (Epic #168 wiring)", async () => { + test("fires onMergedTransition at most once per repo per pass (Epic #168 wiring)", async () => { seedParked(70); seedParked(71); seedParked(72); @@ -160,9 +160,10 @@ describe("reconcileMergedParks", () => { }, }); - // Two MERGED transitions observed → hook fired twice (per-pass de-dup is - // the caller's job; the daemon's reconcileOpenPRsForRepo is itself idempotent). - expect(triggered).toEqual([REPO, REPO]); + // Two MERGED rows on the same repo → hook fires once (dedup inside + // `reconcileMergedParks` — a per-pass Set guards against the burst that + // a caller-side timer would race against the await points and miss). + expect(triggered).toEqual([REPO]); }); test("a thrown onMergedTransition is isolated — the merged-parks pass still finishes", async () => { @@ -182,7 +183,9 @@ describe("reconcileMergedParks", () => { }, }), ).toBe(2); - expect(triggered.length).toBe(2); + // Per-pass dedup: same repo, two MERGED rows → hook fires once but the + // throw is still isolated and both rows finalize. + expect(triggered.length).toBe(1); expect(getWorkflow(db, idA)?.state).toBe("completed"); expect(getWorkflow(db, idB)?.state).toBe("completed"); }); From 58c53557fb7eff2415d3e614c81876188c3139ea Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 02:01:34 -0400 Subject: [PATCH 19/23] test(reconcilers): cover the merge-twin non-conflict throw path (symmetric to rebase) The rebase non-conflict throw is asserted; `gitOps.mergeOurs` has the same hardening but no test pinned it. A regression that dropped the throw on the merge twin would have slipped through. Mirror the rebase assertion against a missing ref. --- .../dispatcher/test/pr-divergence-integration.test.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/dispatcher/test/pr-divergence-integration.test.ts b/packages/dispatcher/test/pr-divergence-integration.test.ts index 941fb4fe..4144624f 100644 --- a/packages/dispatcher/test/pr-divergence-integration.test.ts +++ b/packages/dispatcher/test/pr-divergence-integration.test.ts @@ -247,6 +247,16 @@ describe("tryRebaseOntoMain — fixture repo", () => { /failed without unmerged files/, ); }); + + test("non-conflict merge failure (missing ref) THROWS — symmetric to the rebase hardening", async () => { + // Symmetric coverage for `gitOps.mergeOurs`: the same "non-zero exit AND + // no unmerged paths" shape must throw, not silently map to a path-less + // conflict result. Without this assertion, a regression that dropped the + // throw on the merge twin would slip through. + await expect(gitOps.mergeOurs(worktree, "does-not-exist-ref")).rejects.toThrow( + /failed without unmerged files/, + ); + }); }); describe("tryMergeMainNewWorkAsBase — fixture repo", () => { From 730e6c4da58bc24ab7d11d3b721a6a8c3d64338e Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 02:51:11 -0400 Subject: [PATCH 20/23] =?UTF-8?q?fix(adapters):=20exact-key=20registry=20l?= =?UTF-8?q?ookup=20=E2=80=94=20Map,=20not=20plain=20object?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A plain-object REGISTRY lets `getAdapter("toString")` resolve the inherited `Object.prototype.toString` instead of throwing — `getAdapter`'s `adapter === undefined` guard only catches truly-missing keys, not inherited ones. Callers that validate names by calling `getAdapter` and treating thrown errors as "unknown" (e.g. `resolveRecommenderOptions`) would let a non- `AgentAdapter` value through. Switch to `Map` so lookups are exact-key by construction; `knownAdapters`, `isKnownAdapter`, `getAdapter` all use the Map's API. Regression tests parameterize over the suspect prototype keys (`toString`, `constructor`, `hasOwnProperty`, `__proto__`). Addresses CodeRabbit on PR #175. --- packages/dispatcher/src/adapters.ts | 19 ++++++++++++------- .../test/adapter-conformance.test.ts | 17 ++++++++++++++++- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/packages/dispatcher/src/adapters.ts b/packages/dispatcher/src/adapters.ts index e819a907..fc569ad5 100644 --- a/packages/dispatcher/src/adapters.ts +++ b/packages/dispatcher/src/adapters.ts @@ -9,19 +9,24 @@ import { claudeAdapter } from "@middle/adapter-claude"; import { codexAdapter } from "@middle/adapter-codex"; import type { AgentAdapter } from "@middle/core"; -const REGISTRY: Readonly> = { - claude: claudeAdapter, - codex: codexAdapter, -}; +// `Map` (not a plain object) so lookups never resolve inherited keys: an +// attacker-controlled name like `toString` or `constructor` would otherwise hit +// `Object.prototype` and bypass `getAdapter`'s `undefined` guard, returning a +// non-`AgentAdapter` value to callers that validate names by side-effecting +// `getAdapter`. +const REGISTRY = new Map([ + ["claude", claudeAdapter], + ["codex", codexAdapter], +]); /** The names of every implemented adapter. */ export function knownAdapters(): string[] { - return Object.keys(REGISTRY); + return [...REGISTRY.keys()]; } /** Whether `name` resolves to an implemented adapter. */ export function isKnownAdapter(name: string): boolean { - return Object.hasOwn(REGISTRY, name); + return REGISTRY.has(name); } /** @@ -30,7 +35,7 @@ export function isKnownAdapter(name: string): boolean { * non-throwing check guard with {@link isKnownAdapter} first. */ export function getAdapter(name: string): AgentAdapter { - const adapter = REGISTRY[name]; + const adapter = REGISTRY.get(name); if (adapter === undefined) { throw new Error(`unknown adapter: ${name} (known: ${knownAdapters().join(", ")})`); } diff --git a/packages/dispatcher/test/adapter-conformance.test.ts b/packages/dispatcher/test/adapter-conformance.test.ts index 706dda98..63e31240 100644 --- a/packages/dispatcher/test/adapter-conformance.test.ts +++ b/packages/dispatcher/test/adapter-conformance.test.ts @@ -3,7 +3,7 @@ import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { isNormalizedEvent } from "@middle/core"; -import { getAdapter, knownAdapters } from "../src/adapters.ts"; +import { getAdapter, isKnownAdapter, knownAdapters } from "../src/adapters.ts"; // The proof that the `AgentAdapter` abstraction holds across both adapters: the // SAME sequence of interface calls is driven against every registered adapter @@ -33,6 +33,21 @@ test("the registry knows both adapters", () => { expect(knownAdapters().sort()).toEqual(["claude", "codex"]); }); +// Regression: a plain-object registry would inherit `Object.prototype` keys +// (`toString`, `constructor`, `hasOwnProperty`, …) and `getAdapter("toString")` +// would return the inherited function instead of throwing. Lookups must be +// exact-key — the registry is a `Map` for this reason. +describe("registry lookup is exact-key (no prototype walk)", () => { + for (const protoKey of ["toString", "constructor", "hasOwnProperty", "__proto__"]) { + test(`getAdapter(${JSON.stringify(protoKey)}) throws unknown-adapter`, () => { + expect(() => getAdapter(protoKey)).toThrow(/unknown adapter/); + }); + test(`isKnownAdapter(${JSON.stringify(protoKey)}) is false`, () => { + expect(isKnownAdapter(protoKey)).toBe(false); + }); + } +}); + describe.each(knownAdapters())("AgentAdapter contract — %s", (name) => { const adapter = getAdapter(name); From 56dbd63952dd914f2f931111590216a1b1399af7 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 02:51:25 -0400 Subject: [PATCH 21/23] fix(dispatcher): daemon adapter gate honors config.enabled across all entry points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `isKnownAdapter` gates only on "implemented", so a disabled-but-implemented adapter could slip through the daemon's manual-dispatch route, the recommender validator, and the docs validator — all daemon-side entry points reachable below the CLI's enabled-check (a direct `/control/dispatch` POST or a dashboard call). Each surface needs the same gate. - `dispatchEpicManual` + the hook-server's `/control/dispatch` route now share a single source of truth (`adapterRejectionReason` in main.ts), wired via `ControlPlane.adapterRejection: (name) => string | null`. The new contract collapses the gate + the diagnostic into one method, so both surfaces report "adapter codex is disabled in config" vs "unknown adapter: ghost" identically — previously the route always said "unknown" for both, which is misleading. - `resolveRecommenderOptions` and `resolveDocumentationOptions` get the same enabled-check in line with the main.ts gate. Tests: - `main.test.ts` — daemon-spawn integration test exercises the disabled and unknown cases via real `/control/dispatch` POSTs, asserting both the 400 status and the JSON body's distinct wording. - `control-routes.test.ts` — unit test for the route's body validation pinning the same disabled-vs-unknown distinction. - `recommender-run.test.ts` / `documentation-run.test.ts` — resolver-level coverage that a disabled adapter is refused with the matching message. Addresses CodeRabbit on PR #175 (main.ts:348, main.ts:479); also resolves the same class of bug one step over in the recommender + docs validators (surfaced by the internal clean-eyes pass). --- packages/dispatcher/src/documentation-run.ts | 7 +++ packages/dispatcher/src/hook-server.ts | 20 ++++--- packages/dispatcher/src/main.ts | 26 +++++++-- packages/dispatcher/src/recommender-run.ts | 7 +++ .../dispatcher/test/control-routes.test.ts | 32 ++++++++++- .../dispatcher/test/documentation-run.test.ts | 27 +++++++++- packages/dispatcher/test/main.test.ts | 53 +++++++++++++++++++ .../dispatcher/test/recommender-run.test.ts | 45 ++++++++++++++++ 8 files changed, 205 insertions(+), 12 deletions(-) diff --git a/packages/dispatcher/src/documentation-run.ts b/packages/dispatcher/src/documentation-run.ts index 576fd344..b32ec4e8 100644 --- a/packages/dispatcher/src/documentation-run.ts +++ b/packages/dispatcher/src/documentation-run.ts @@ -88,6 +88,13 @@ export async function resolveDocumentationOptions( } catch (error) { return { ok: false, error: (error as Error).message }; } + // Dispatchable = implemented (above) AND enabled in config — mirror the + // daemon's manual-dispatch + recommender-validator gates so a + // `[docs] adapter = "x"` pointing at a disabled adapter can't sneak through + // the daemon's documentation route below the CLI's enabled-check. + if (!(config.adapters[adapterName]?.enabled ?? false)) { + return { ok: false, error: `adapter ${adapterName} is disabled in config` }; + } let target: DocsTargetSummary; try { diff --git a/packages/dispatcher/src/hook-server.ts b/packages/dispatcher/src/hook-server.ts index 2357700e..7e7d61d2 100644 --- a/packages/dispatcher/src/hook-server.ts +++ b/packages/dispatcher/src/hook-server.ts @@ -39,8 +39,14 @@ export type ControlPlane = { hub: EventHub; /** Reported by `/health` so a client can confirm compatibility. */ version: string; - /** Whether `name` is a dispatchable adapter (body validation). */ - knownAdapter: (name: string) => boolean; + /** + * Body-validation for the adapter name: returns a 400 error message when + * `name` is not dispatchable (unknown OR disabled), or `null` when it is. + * Daemon wiring distinguishes the two cases in the message so a user trying + * to dispatch on a disabled adapter sees the real reason instead of a + * misleading "unknown adapter" message. + */ + adapterRejection: (name: string) => string | null; /** * Atomically start a dispatch on the daemon's long-lived engine and resolve * the workflow id — or resolve `null` if the Epic already has an active @@ -398,10 +404,12 @@ export class HookServer implements SessionGate { if (typeof epicNumber !== "number" || !Number.isInteger(epicNumber) || epicNumber < 1) { return this.#badRequest("epicNumber must be an integer >= 1"); } - if (typeof adapter !== "string" || !control.knownAdapter(adapter)) { - return this.#badRequest( - `unknown adapter: ${typeof adapter === "string" ? adapter : "(missing)"}`, - ); + if (typeof adapter !== "string") { + return this.#badRequest("unknown adapter: (missing)"); + } + const reject = control.adapterRejection(adapter); + if (reject !== null) { + return this.#badRequest(reject); } const dispatchInput = { repo: normalizedRepo, repoPath, epicNumber, adapter }; diff --git a/packages/dispatcher/src/main.ts b/packages/dispatcher/src/main.ts index c5a94ac4..fbb0ce5a 100644 --- a/packages/dispatcher/src/main.ts +++ b/packages/dispatcher/src/main.ts @@ -99,6 +99,22 @@ function readVersion(): string { export async function runDaemon(opts: RunDaemonOptions = {}): Promise { const config = loadConfig({ globalPath: process.env.MIDDLE_CONFIG }); + // The single source of truth for "is this adapter dispatchable right now, + // and if not, why?" Used by `dispatchEpicManual` (the dashboard's direct + // hostExtras path) and the hook server's `/control/dispatch` route via + // `ControlPlane.adapterRejection`, so both surfaces gate AND format the + // diagnostic identically — the user trying to dispatch on a disabled adapter + // sees the real reason, never a misleading "unknown" message. `isKnownAdapter` + // alone gates only on "implemented"; the CLI does its own gating, but a + // direct route POST or a dashboard call lands here without the CLI's check. + const adapterRejectionReason = (name: string): string | null => { + if (!isKnownAdapter(name)) return `unknown adapter: ${name}`; + if (!(config.adapters[name]?.enabled ?? false)) { + return `adapter ${name} is disabled in config`; + } + return null; + }; + mkdirSync(dirname(config.global.dbPath), { recursive: true }); const db = openAndMigrate(config.global.dbPath); @@ -345,8 +361,9 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { if (repoPath === undefined) { return { status: 400, body: JSON.stringify({ error: `unknown repo: ${normalizedRepo}` }) }; } - if (!isKnownAdapter(adapter)) { - return { status: 400, body: JSON.stringify({ error: `unknown adapter: ${adapter}` }) }; + const reject = adapterRejectionReason(adapter); + if (reject !== null) { + return { status: 400, body: JSON.stringify({ error: reject }) }; } const input = { repo: normalizedRepo, repoPath, epicNumber, adapter }; if (!slotAvailable(input)) { @@ -476,7 +493,10 @@ export async function runDaemon(opts: RunDaemonOptions = {}): Promise { const control: ControlPlane = { hub, version, - knownAdapter: isKnownAdapter, + // The control-route guard mirrors `dispatchEpicManual` — only configured, + // enabled, implemented adapters are dispatchable from `/control/dispatch`, + // and the message distinguishes "unknown" from "disabled" identically. + adapterRejection: adapterRejectionReason, // A route dispatch is a manual `mm dispatch` — recorded `source: 'manual'`. startDispatch: (input) => startDispatchImpl(input, "manual"), // Manual dispatch respects slot limits (the loop does its own accounting). diff --git a/packages/dispatcher/src/recommender-run.ts b/packages/dispatcher/src/recommender-run.ts index 1411af91..f1cf505b 100644 --- a/packages/dispatcher/src/recommender-run.ts +++ b/packages/dispatcher/src/recommender-run.ts @@ -118,6 +118,13 @@ export async function resolveRecommenderOptions( } catch (error) { return { ok: false, error: (error as Error).message }; } + // Dispatchable = implemented (above) AND enabled in config — mirror the + // daemon's manual-dispatch gate so a `[recommender] adapter = "x"` pointing + // at a disabled adapter can't sneak through the `/trigger/recommender` + // entry point (the CLI gates earlier, the dashboard hits this directly). + if (!(config.adapters[adapterName]?.enabled ?? false)) { + return { ok: false, error: `adapter ${adapterName} is disabled in config` }; + } const schemaPath = join(repoPath, "schemas", "state-issue.v1.md"); if (!existsSync(schemaPath)) { return { diff --git a/packages/dispatcher/test/control-routes.test.ts b/packages/dispatcher/test/control-routes.test.ts index cd8ec709..36e96485 100644 --- a/packages/dispatcher/test/control-routes.test.ts +++ b/packages/dispatcher/test/control-routes.test.ts @@ -19,7 +19,7 @@ function makeControl(overrides: Partial = {}): ControlPlane { return { hub, version: "1.2.3", - knownAdapter: (name) => name === "claude", + adapterRejection: (name) => (name === "claude" ? null : `unknown adapter: ${name}`), startDispatch: async (input) => { // `startDispatch` is the single source of truth for the 409 guard: a // colliding Epic resolves `null`. The stub drives that off `collisionEpics`. @@ -101,6 +101,36 @@ describe("HookServer control routes", () => { expect(startCalls).toEqual([]); }); + test("POST /control/dispatch surfaces the disabled-vs-unknown distinction in the 400 body", async () => { + // `ControlPlane.adapterRejection` is the single source of truth for the + // wording, so the route's 400 message must reflect *why* the adapter was + // rejected — never the misleading "unknown adapter" for a disabled-but- + // implemented adapter. The wiring in main.ts threads `adapterRejectionReason`. + startWith( + makeControl({ + adapterRejection: (name) => { + if (name === "claude") return null; + if (name === "codex") return `adapter ${name} is disabled in config`; + return `unknown adapter: ${name}`; + }, + }), + ); + + const disabled = await fetch(`${base}/control/dispatch`, { + method: "POST", + body: JSON.stringify({ repo: "o/r", repoPath: "/abs", epicNumber: 1, adapter: "codex" }), + }); + expect(disabled.status).toBe(400); + expect(await disabled.json()).toEqual({ error: "adapter codex is disabled in config" }); + + const unknown = await fetch(`${base}/control/dispatch`, { + method: "POST", + body: JSON.stringify({ repo: "o/r", repoPath: "/abs", epicNumber: 1, adapter: "ghost" }), + }); + expect(unknown.status).toBe(400); + expect(await unknown.json()).toEqual({ error: "unknown adapter: ghost" }); + }); + test("POST /control/dispatch refuses with 429 when no slot is available (manual respects limits)", async () => { startWith(makeControl({ slotAvailable: () => false })); const res = await fetch(`${base}/control/dispatch`, { diff --git a/packages/dispatcher/test/documentation-run.test.ts b/packages/dispatcher/test/documentation-run.test.ts index c2ee2b2f..85ea0198 100644 --- a/packages/dispatcher/test/documentation-run.test.ts +++ b/packages/dispatcher/test/documentation-run.test.ts @@ -99,7 +99,10 @@ function baseOptions( }; } -/** A minimal config with the global defaults and the given `[docs]` block. */ +/** A minimal config with the global defaults and the given `[docs]` block. + * The adapter table mirrors the production-loaded defaults (`loadConfig` fills + * these from the schema) so `resolveDocumentationOptions`'s enabled-gate sees + * realistic data. */ function configWith(docs?: MiddleConfig["docs"]): MiddleConfig { return { global: { @@ -110,7 +113,10 @@ function configWith(docs?: MiddleConfig["docs"]): MiddleConfig { worktreeRoot: join(scratch, "worktrees"), dbPath: join(scratch, "db.sqlite3"), }, - adapters: {}, + adapters: { + claude: { enabled: true, binary: "claude", extraArgs: [] }, + codex: { enabled: true, binary: "codex", extraArgs: [] }, + }, dashboard: { windowed: false, theme: "auto" }, docs, }; @@ -182,6 +188,23 @@ describe("resolveDocumentationOptions", () => { if (!result.ok) expect(result.error).toContain("unknown adapter: ghost"); }); + test("rejects an implemented-but-disabled adapter — mirrors the daemon's dispatch gate", async () => { + // The daemon's manual-dispatch route already gates on configured+enabled+ + // implemented (`adapterRejectionReason` in main.ts); the docs validator must match so + // a `[docs] adapter = "x"` pointing at a disabled adapter can't sneak in + // through `/control/dispatch` (CLI gates earlier, the dashboard hits here). + const config = configWith({ + enabled: true, + intervalMinutes: 60, + adapter: "codex", + write: false, + }); + config.adapters.codex = { enabled: false, binary: "codex", extraArgs: [] }; + const result = await resolveDocumentationOptions(repoPath, config, stubAdapter); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.error).toContain("adapter codex is disabled in config"); + }); + test("resolves the markdown fallback target for a plain repo", async () => { const result = await resolveDocumentationOptions(repoPath, configWith(undefined), stubAdapter); expect(result.ok).toBe(true); diff --git a/packages/dispatcher/test/main.test.ts b/packages/dispatcher/test/main.test.ts index 36538ed0..b5b2be47 100644 --- a/packages/dispatcher/test/main.test.ts +++ b/packages/dispatcher/test/main.test.ts @@ -181,6 +181,59 @@ describe("dispatcher main", () => { } }, 20_000); + test("daemon rejects a disabled adapter on /control/dispatch (configured+enabled+implemented gate)", async () => { + // Override codex to disabled in this test's config; claude stays on (default). + writeFileSync( + configPath, + [ + "[global]", + "dispatcher_port = 0", + `db_path = "${join(dir, "db.sqlite3")}"`, + `worktree_root = "${join(dir, "worktrees")}"`, + `log_dir = "${join(dir, "logs")}"`, + "", + "[adapters.codex]", + "enabled = false", + "", + ].join("\n"), + ); + const repoPath = join(realpathSync(dir), "repo-disabled"); + expect( + await Bun.spawn(["git", "init", repoPath], { stdout: "ignore", stderr: "ignore" }).exited, + ).toBe(0); + + const { proc, port } = await startDaemon(); + try { + const base = `http://127.0.0.1:${port}`; + const body = (adapter: string) => + JSON.stringify({ repo: "o/r", repoPath, epicNumber: 88, adapter }); + + // Disabled-but-implemented adapter — must 400 with a "disabled" reason, + // NOT "unknown adapter": the route reaches the daemon below the CLI gate + // (a direct POST or a dashboard call), so the daemon owns this check. + const disabled = await fetch(`${base}/control/dispatch`, { + method: "POST", + body: body("codex"), + }); + expect(disabled.status).toBe(400); + expect(await disabled.json()).toEqual({ error: "adapter codex is disabled in config" }); + + // Unimplemented adapter — still 400, with the distinct "unknown" wording + // (so an operator can tell a typo from a deliberate disable). + const unknown = await fetch(`${base}/control/dispatch`, { + method: "POST", + body: body("nonexistent"), + }); + expect(unknown.status).toBe(400); + expect(await unknown.json()).toEqual({ error: "unknown adapter: nonexistent" }); + + proc.kill("SIGTERM"); + expect(await proc.exited).toBe(0); + } finally { + proc.kill("SIGKILL"); + } + }, 20_000); + test("two concurrent dispatches of the same Epic: exactly one starts, the other 409s", async () => { const repoPath = join(realpathSync(dir), "repo-concurrent"); const gitEnv = { diff --git a/packages/dispatcher/test/recommender-run.test.ts b/packages/dispatcher/test/recommender-run.test.ts index ab3948b4..e2984db3 100644 --- a/packages/dispatcher/test/recommender-run.test.ts +++ b/packages/dispatcher/test/recommender-run.test.ts @@ -6,10 +6,12 @@ import type { AgentAdapter, HookPayload } from "@middle/core"; import { renderStateIssue } from "@middle/state-issue"; import { openAndMigrate } from "../src/db.ts"; import type { SessionGate } from "../src/hook-server.ts"; +import type { MiddleConfig } from "@middle/core"; import { dispatchRecommender, type DispatchRecommenderOptions, type RecommenderRunOverrides, + resolveRecommenderOptions, } from "../src/recommender-run.ts"; import type { RecommenderContext } from "../src/workflows/recommender.ts"; @@ -204,3 +206,46 @@ describe("dispatchRecommender — enqueues a recommender workflow (read-only)", expect(calls).toEqual([]); }); }); + +describe("resolveRecommenderOptions", () => { + function configWithAdapters(overrides?: Partial): MiddleConfig { + return { + global: { + dispatcherPort: 0, + maxConcurrent: 4, + defaultAdapter: "claude", + logDir: "/tmp/logs", + worktreeRoot: join(scratch, "worktrees"), + dbPath: join(scratch, "db.sqlite3"), + }, + adapters: { + claude: { enabled: true, binary: "claude", extraArgs: [] }, + codex: { enabled: true, binary: "codex", extraArgs: [] }, + ...overrides, + }, + dashboard: { windowed: false, theme: "auto" }, + stateIssue: { number: 1, label: "agent:dispatch" }, + }; + } + + test("rejects an implemented-but-disabled adapter — mirrors the daemon's dispatch gate", async () => { + // Same class of bug as the main.ts manual-dispatch check: a daemon route + // (`/trigger/recommender` or the dashboard's "run recommender now") hits + // this validator below the CLI's enabled-check, so the validator must + // refuse a disabled adapter rather than silently dispatching on it. + const schemaDir = join(repoPath, "schemas"); + await Bun.write(join(schemaDir, "state-issue.v1.md"), "schema stub\n"); + const config = configWithAdapters({ + codex: { enabled: false, binary: "codex", extraArgs: [] }, + }); + config.recommender = { + enabled: true, + adapter: "codex", + intervalMinutes: 15, + autoDispatch: false, + }; + const result = await resolveRecommenderOptions(repoPath, config, () => stubAdapter()); + expect(result.ok).toBe(false); + if (!result.ok) expect(result.error).toContain("adapter codex is disabled in config"); + }); +}); From 162966098d561997f15b3db7deeae163f50f3646 Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 02:51:33 -0400 Subject: [PATCH 22/23] fix(poller): align POLLER_INTERVAL_MS with the CLAUDE.md 60s cadence contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dispatcher's CLAUDE.md pins the cron cadence at `WATCHDOG_INTERVAL_MS = 30s` and `POLLER_INTERVAL_MS = 60s`, but the code literal had drifted to 120s and silently doubled production cadence — the cron uses this constant when `startPoller`'s `intervalMs` is omitted, which is the daemon's call shape. Resyncing to 60s heals MERGED-transition divergence within one tick per Epic #168's intent and keeps the doc + code aligned. Add a regression test pinning the value so a future shift surfaces immediately. Addresses CodeRabbit on PR #175. --- packages/dispatcher/src/poller-cron.ts | 17 ++++++++++------- packages/dispatcher/test/poller-cron.test.ts | 12 ++++++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) create mode 100644 packages/dispatcher/test/poller-cron.test.ts diff --git a/packages/dispatcher/src/poller-cron.ts b/packages/dispatcher/src/poller-cron.ts index f8aadba0..7be55710 100644 --- a/packages/dispatcher/src/poller-cron.ts +++ b/packages/dispatcher/src/poller-cron.ts @@ -2,14 +2,17 @@ import { Bunqueue } from "bunqueue/client"; import { type PollerDeps, reconcileMergedParks, runPoller } from "./poller.ts"; /** - * Default cadence for the GitHub poller. Much slower than the watchdog (30s) — - * a human reply or a review verdict is not latency-sensitive, and a gentler - * cadence is kinder to GitHub rate limits. The poller spends ~1 `gh` call per - * parked workflow per tick and has no backoff yet (see #122), so a conservative - * default keeps a many-parked-workflow / multi-repo deployment well clear of - * the 5000/hr ceiling and of secondary (burst) limits. Override via `startPoller`. + * Default cadence for the GitHub poller. Slower than the watchdog (30s) — a + * human reply or a review verdict isn't latency-sensitive at the second scale, + * and a gentler cadence is kinder to GitHub rate limits. The poller spends ~1 + * `gh` call per parked workflow per tick and has no backoff yet (see #122), so + * 60s keeps a many-parked-workflow / multi-repo deployment well clear of the + * 5000/hr ceiling and of secondary (burst) limits while still healing + * MERGED-transition divergence within one tick (Epic #168). Override via + * `startPoller`. Pinned by the dispatcher's CLAUDE.md cadence contract — keep + * the value and the doc in sync there. */ -export const POLLER_INTERVAL_MS = 120_000; +export const POLLER_INTERVAL_MS = 60_000; /** * Extra reconciliation work the daemon hangs off each poller tick (Epic #168). diff --git a/packages/dispatcher/test/poller-cron.test.ts b/packages/dispatcher/test/poller-cron.test.ts new file mode 100644 index 00000000..3811d3f2 --- /dev/null +++ b/packages/dispatcher/test/poller-cron.test.ts @@ -0,0 +1,12 @@ +import { expect, test } from "bun:test"; +import { POLLER_INTERVAL_MS } from "../src/poller-cron.ts"; + +// The dispatcher's CLAUDE.md fixes the cron cadence at WATCHDOG_INTERVAL_MS = 30s +// and POLLER_INTERVAL_MS = 60s. Earlier the file's literal had drifted to 120s +// and silently doubled production cadence (the cron uses this constant when +// `startPoller`'s `intervalMs` is omitted, which is the daemon's call shape). +// Pin the value here so a future shift surfaces as a test failure, not a slow +// reconciler the next reviewer has to rediscover. +test("POLLER_INTERVAL_MS matches the dispatcher CLAUDE.md cadence contract (60s)", () => { + expect(POLLER_INTERVAL_MS).toBe(60_000); +}); From 2d9930e7a514c3d79ffbc11a53d9cbb7d50e4f2c Mon Sep 17 00:00:00 2001 From: Justin Walsh Date: Thu, 28 May 2026 02:51:52 -0400 Subject: [PATCH 23/23] fix(reconcilers): applyDemoteToWork per-step idempotency + gateway 404 vs failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related hardenings on the `pr-divergence` reconciler, plus a quick-win migration index. **applyDemoteToWork — partial-retry safe.** The pre-existing `if (pr.isDraft) return` short-circuit meant a prior attempt that crashed AFTER `convertPrToDraft` but BEFORE the reopen/comment/enqueue/ state-write steps could never finish remediation on retry — the next pass would see `isDraft=true` and bail. Replace with per-step idempotency: skip only the draft-flip step when already drafted; downstream remediation runs to completion, gated by its existing markers (Epic comment marker, dual- surface comment markers, the daemon's existing-workflow guard on enqueue). Two tests: the renamed "per-step idempotency" test (re-enqueue fires every pass — docstring contract), and a new "partial-retry: prior attempt left PR drafted only" test that asserts every downstream step still runs. **Gateway — distinguish "PR not found" from real failures.** `getMainCommitSha` / `getMergeability` / `getPrHeadRef` / `getPullRequest` collapsed every non-zero `gh` exit into `null`, hiding transport / auth / rate-limit failures as benign UNKNOWN no-ops. Each now consults `ghStderrIsNotFound` (exported for tests; matches only the two stable `gh` not-found phrasings — `Could not resolve to a …` and `HTTP 404` — deliberately narrow so a `secret not found, push declined` can't silently masquerade as a 404). Real failures throw with stderr in the message, propagating to the orchestrator's per-PR `failed++` counter. `classifyDivergence`'s docstring calls out the consequence: a transport failure leaves the state row at its prior observation rather than overwriting with stale UNKNOWN. **Migration nitpick — index `classified_at`.** Recency / staleness scans on `pr_divergence_state` are its other query shape; without an index they'd force a table scan as the table grows. Addresses CodeRabbit on PR #175 (pr-divergence.ts:597, pr-divergence.ts:913, 006_pr_divergence_state.sql:27 nitpick). --- .../db/migrations/006_pr_divergence_state.sql | 5 ++ .../src/reconcilers/pr-divergence.ts | 71 ++++++++++++++--- .../dispatcher/test/pr-divergence.test.ts | 79 ++++++++++++++++++- 3 files changed, 142 insertions(+), 13 deletions(-) diff --git a/packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql b/packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql index 71cecffc..aa0fb830 100644 --- a/packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql +++ b/packages/dispatcher/src/db/migrations/006_pr_divergence_state.sql @@ -25,3 +25,8 @@ CREATE TABLE pr_divergence_state ( ); CREATE INDEX idx_pr_divergence_state ON pr_divergence_state(state); +-- Recency/staleness scans (e.g. "rows last classified > N seconds ago") are +-- the table's other query shape; index `classified_at` so they don't degrade +-- to a full scan as the table grows. +CREATE INDEX idx_pr_divergence_state_classified_at + ON pr_divergence_state(classified_at); diff --git a/packages/dispatcher/src/reconcilers/pr-divergence.ts b/packages/dispatcher/src/reconcilers/pr-divergence.ts index 564f5a1f..560c3067 100644 --- a/packages/dispatcher/src/reconcilers/pr-divergence.ts +++ b/packages/dispatcher/src/reconcilers/pr-divergence.ts @@ -88,6 +88,12 @@ export function classifyMergeability(view: MergeabilityView | null): DivergenceS * One `gh` call per PR (`mergeable,mergeStateStatus` on `pr view`), so a sweep * over `N` open PRs costs `N` REST calls — the rate-limit ceiling * (Phase 6) is what bounds bursts. + * + * A `getMergeability` transport/auth failure propagates as a thrown error (the + * gateway distinguishes that from a missing PR — see `ghStderrIsNotFound`). The + * orchestrator's per-PR try/catch counts that as `failed`, and the state row + * stays at its previous observation rather than being overwritten with a stale + * UNKNOWN — preserving the failure signal for the operator. */ export async function classifyDivergence( deps: { db: Database; github: DivergenceGateway; now?: () => number }, @@ -556,11 +562,14 @@ function renderSubIssueReopenComment(prNumber: number, epicNumber: number): stri * review-feedback convention), re-enqueues the Epic through the recommender's * entry point, and records `DEMOTED` in `pr_divergence_state`. * - * Idempotency rests on two independent gates so a partial-retry doesn't pile - * on duplicates: - * - **PR.isDraft** — if the PR is already a draft, the demote has already - * landed; the whole function returns. This survives a classifier overwrite - * of the state row back to CONFLICTED (a re-classification under DEMOTED). + * Idempotency is **per-step**, never a function-wide short-circuit — a partial + * prior attempt (e.g. the PR got flipped to draft but the next call crashed + * before reopen/comment/enqueue/state-write) must still be able to finish + * remediation on retry. Each step has its own gate so duplicates don't pile on: + * - **PR.isDraft** — if the PR is already a draft, skip the `convertPrToDraft` + * step; the rest of the function still runs so a partial prior attempt can + * complete. This also survives a classifier overwrite of the state row back + * to CONFLICTED (a re-classification under DEMOTED). * - **Epic-keyed marker on prior demote** — if the Epic already carries the * demote marker from a previous incident, skip the sub-issue reopen even if * the PR was un-drafted in the meantime (e.g. a human reviewed the @@ -593,9 +602,15 @@ export async function applyDemoteToWork( const pr = await deps.github.getPullRequest(repo, prNumber); if (!pr) return; - if (pr.isDraft) return; // already demoted — fully idempotent - await deps.github.convertPrToDraft(repo, prNumber); + // Skip only the draft-flip step on a partial-retry, not the rest of + // remediation — a prior attempt that crashed AFTER converting to draft but + // BEFORE the reopen/comment/enqueue/state-write must still be able to finish + // those steps on the next pass. Duplicates downstream are gated per-step + // (Epic marker, comment marker, enqueueEpic's own existing-workflow guard). + if (!pr.isDraft) { + await deps.github.convertPrToDraft(repo, prNumber); + } const marker = demoteMarker(epicNumber); const epicComments = await deps.github.listIssueComments(repo, epicNumber); @@ -868,6 +883,28 @@ function safeJsonParse(stdout: string): T | null { } } +/** + * Distinguish `gh`'s "this PR / branch doesn't exist" exits (which the gateway + * surfaces as `null` so the orchestrator can pass the PR for the pass) from + * transport/auth/rate-limit/syntax failures (which must throw so the per-PR + * try/catch increments `failed` and the operator sees the real error). The + * not-found shape is stable across `gh`'s outputs — both the GraphQL `Could + * not resolve to a …` phrasing and the REST `HTTP 404` phrasing appear in + * stderr. Anything else is a real failure. + * + * Deliberately narrow: we match only the two `gh`-known not-found prefixes, + * never a bare `"not found"` substring (which could appear inside a push- + * protection rejection or a secret-scanning notice and silently mask a real + * failure as a PR-missing no-op). + * + * Exported for unit tests; the predicate is the load-bearing seam between + * "return null" and "throw" across every gateway method. + */ +export function ghStderrIsNotFound(stderr: string): boolean { + const s = stderr.toLowerCase(); + return s.includes("could not resolve to a") || s.includes("http 404"); +} + /** * Production gateway backing {@link reconcileOpenPRs}. Composes with * `ghGitHub`'s comment ops at the daemon-wiring site; methods that overlap @@ -894,7 +931,10 @@ export const ghReconcilerGateway: Omit(r.stdout); }, async getPrHeadRef(repo, prNumber) { @@ -923,13 +966,19 @@ export const ghReconcilerGateway: Omit(r.stdout); }, async convertPrToDraft(repo, prNumber) { diff --git a/packages/dispatcher/test/pr-divergence.test.ts b/packages/dispatcher/test/pr-divergence.test.ts index 7c0a8945..8c2c3cd5 100644 --- a/packages/dispatcher/test/pr-divergence.test.ts +++ b/packages/dispatcher/test/pr-divergence.test.ts @@ -12,6 +12,7 @@ import { type ClosedSubIssue, type DivergenceGateway, getDivergenceState, + ghStderrIsNotFound, type MergeabilityView, parseEpicFromHeadRef, recordDivergenceState, @@ -295,7 +296,12 @@ describe("applyDemoteToWork", () => { }); }); - test("idempotent: a second call sees PR.isDraft and skips everything", async () => { + test("per-step idempotency: a second call skips draft-flip + reopen + comments via markers (but still re-enqueues)", async () => { + // After the first call lands, the second call must NOT pile on duplicates + // for the steps gated by GitHub state (draft flip, sub-issue reopen, both + // surfaces' escalation comments). Re-enqueue still fires on every pass — + // it's the recommender's "fresh divergence" nudge and is itself idempotent + // at the daemon (existing-workflow guard), per the function's contract. const spy = makeDemoteSpy(); const enqueues: Array<[string, number]> = []; const deps = { @@ -310,10 +316,45 @@ describe("applyDemoteToWork", () => { await applyDemoteToWork(deps, REPO, 99, ["a.txt"]); await applyDemoteToWork(deps, REPO, 99, ["a.txt"]); // second call + // Marker-gated steps fired exactly once across two calls. expect(spy.calls.convertPrToDraft.length).toBe(1); expect(spy.calls.reopenIssue.length).toBe(1); expect(spy.calls.postComment.length).toBe(2); - expect(enqueues.length).toBe(1); + // Re-enqueue fires every pass — docstring contract. + expect(enqueues.length).toBe(2); + }); + + test("partial-retry: prior attempt left the PR drafted but did not reopen / comment / enqueue — second pass completes remediation", async () => { + // Models a crash between `convertPrToDraft` and the next step on the first + // attempt: GitHub already sees the PR as a draft, but our spy's state shows + // no reopen, no comments, no enqueue, no row. The second pass must NOT + // short-circuit on `pr.isDraft` (the pre-fix behaviour) — it must finish + // every remaining remediation step. + const spy = makeDemoteSpy({ isDraft: true }); + const enqueues: Array<[string, number]> = []; + await applyDemoteToWork( + { + db, + github: spy.gateway, + enqueueEpic: async (r, e) => { + enqueues.push([r, e]); + }, + now: () => 1_700_000_200_000, + }, + REPO, + 99, + ["packages/x.ts"], + ); + + // The draft-flip step was skipped — the PR was already draft. + expect(spy.calls.convertPrToDraft).toEqual([]); + // …but every downstream step ran to completion. + expect(spy.calls.reopenIssue.length).toBe(1); + expect(spy.calls.reopenIssue[0]?.issueNumber).toBe(50); + expect(spy.calls.postComment.length).toBe(2); + expect(new Set(spy.calls.postComment.map((c) => c.issueNumber))).toEqual(new Set([99, 32])); + expect(enqueues).toEqual([[REPO, 32]]); + expect(getDivergenceState(db, REPO, 99)?.state).toBe("DEMOTED"); }); test("partial-retry safety: existing marker on PR skips the duplicate PR comment, still posts on Epic", async () => { @@ -443,3 +484,37 @@ describe("applyDemoteToWork", () => { expect(spy.calls.postComment).toEqual([]); }); }); + +describe("ghStderrIsNotFound", () => { + // The not-found shapes the production gateway's `getMergeability` / + // `getPrHeadRef` / `getPullRequest` / `getMainCommitSha` recognize as "PR or + // branch doesn't exist → null". Anything else must throw so the orchestrator's + // per-PR try/catch surfaces transport/auth/rate-limit failures as `failed++` + // instead of silently passing the PR through as UNKNOWN. + for (const stderr of [ + "Could not resolve to a PullRequest with the number 99.", + "Could not resolve to a Branch with the name 'main'.", + "HTTP 404: Not Found (https://api.github.com/...)", + "graphql: Could not resolve to a Repository", + ]) { + test(`recognizes not-found: ${JSON.stringify(stderr.slice(0, 40))}`, () => { + expect(ghStderrIsNotFound(stderr)).toBe(true); + }); + } + + for (const stderr of [ + "error connecting to api.github.com: dial tcp: connection refused", + "HTTP 401: Bad credentials", + "HTTP 403: API rate limit exceeded", + "HTTP 502: Bad Gateway", + "gh: command failed (oauth token expired)", + "could not deserialize response", // close-but-not — must NOT be treated as 404 + "remote: secret not found, push declined", // push-protection: NOT a 404 + "Not Found", // bare phrase alone — too ambiguous; require an `HTTP 404` or `Could not resolve` prefix + "", + ]) { + test(`treats non-404 failure as throw-worthy: ${JSON.stringify(stderr.slice(0, 40))}`, () => { + expect(ghStderrIsNotFound(stderr)).toBe(false); + }); + } +});