Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e5fcf78
docs(adapters): implementation plan for the CodexAdapter Epic
thejustinwalsh May 26, 2026
67ece1c
feat(adapters): implement the CodexAdapter
thejustinwalsh May 26, 2026
d312004
feat(adapters): per-CLI adapter selection across implementer + recomm…
thejustinwalsh May 26, 2026
6142e96
test(adapters): prove the AgentAdapter abstraction holds across both …
thejustinwalsh May 26, 2026
420e199
fix(adapters): self-review hardening — enabled-aware dispatch + word-…
thejustinwalsh May 26, 2026
aae7c26
docs(adapters): record empty resume — re-parked on the live-run crite…
thejustinwalsh May 26, 2026
47e2bb5
docs(adapters): diagnose resume loop as a channel mismatch — re-park …
thejustinwalsh May 26, 2026
4330461
docs(adapters): correct resume-loop premise — Codex now installed; re…
thejustinwalsh May 26, 2026
e62f1c6
docs(reconcilers): plan the open-PR divergence reconciler (Epic #168)
thejustinwalsh May 28, 2026
a620ac0
feat(reconcilers): classify open-PR divergence against main (#169)
thejustinwalsh May 28, 2026
efa77e2
feat(reconcilers): rebase helper for an open Epic PR's worktree (#170)
thejustinwalsh May 28, 2026
0d0a264
feat(reconcilers): -X ours merge fallback for rebase-loop conflicts (…
thejustinwalsh May 28, 2026
1edb473
feat(reconcilers): applySuccess — force-push + announce + state→CLEAN…
thejustinwalsh May 28, 2026
db2fc05
feat(reconcilers): applyDemoteToWork — flip draft + reopen sub-issue …
thejustinwalsh May 28, 2026
143f5ea
feat(reconcilers): reconcileOpenPRs orchestrator + poller-tick wiring…
thejustinwalsh May 28, 2026
e50fc23
test(reconcilers): multi-PR orchestrator + onMergedTransition wiring …
thejustinwalsh May 28, 2026
0a5316f
fix(reconcilers): self-review hardening — distinguish skip vs fail, m…
thejustinwalsh May 28, 2026
ab77f8f
fix(reconcilers): move onMergedTransition dedup inside reconcileMerge…
thejustinwalsh May 28, 2026
58c5355
test(reconcilers): cover the merge-twin non-conflict throw path (symm…
thejustinwalsh May 28, 2026
730e6c4
fix(adapters): exact-key registry lookup — Map, not plain object
thejustinwalsh May 28, 2026
56dbd63
fix(dispatcher): daemon adapter gate honors config.enabled across all…
thejustinwalsh May 28, 2026
1629660
fix(poller): align POLLER_INTERVAL_MS with the CLAUDE.md 60s cadence …
thejustinwalsh May 28, 2026
2d9930e
fix(reconcilers): applyDemoteToWork per-step idempotency + gateway 40…
thejustinwalsh May 28, 2026
0cdc570
merge: integrate main's recent restructure into PR #175
thejustinwalsh May 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions packages/adapters/codex/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@
"main": "src/index.ts",
"dependencies": {
"@middle/core": "workspace:*"
},
"devDependencies": {
"smol-toml": "^1.6.1"
}
}
107 changes: 107 additions & 0 deletions packages/adapters/codex/src/classify.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";
import type {
BlockedSentinel,
HookPayload,
RateLimitDetection,
StopClassification,
} from "@middle/core";

/**
* Codex's rate-limit signal. The spec's deliberately generous starting pattern,
* to be tightened as Codex's real usage-limit messages are observed on live runs.
* `429` is word-boundaried (`\b429\b`) so it matches the HTTP status, not an
* incidental substring — a transcript tail is full of line numbers, hashes, and
* byte counts ("line 4290", "commit 4291ab"), and a false `rate-limited` would
* halt a healthy agent. Unlike Claude's, the pattern carries no reset timestamp
* (Codex's format hasn't surfaced one), so `resetAt` defaults to "unknown".
*/
const RATE_LIMIT_RE = /rate.?limit|\b429\b|too many requests/i;

/**
* Classify the agent's state at a turn-end (`Stop`) hook. The sentinel logic is
* identical to Claude's — the `.middle/{blocked,done,failed}.json` files are
* written by the universal skill, not the CLI, so their resolution is
* adapter-agnostic. Only the rate-limit detection differs (Codex's pattern).
* All sentinel paths anchor at `<worktree>/.middle/`, never `payload.cwd` (the
* agent may have `cd`'d into a subdirectory).
*/
export function classifyStop(opts: {
payload: HookPayload;
transcriptPath: string;
sentinelPresent: boolean;
worktree: string;
}): StopClassification {
const middleDir = join(opts.worktree, ".middle");

if (opts.sentinelPresent) {
const sentinelPath = join(middleDir, "blocked.json");
return { kind: "asked-question", sentinelPath, sentinel: readBlockedSentinel(sentinelPath) };
}

if (RATE_LIMIT_RE.test(readTail(opts.transcriptPath))) {
return { kind: "rate-limited", resetAt: "unknown" };
}

if (existsSync(join(middleDir, "done.json"))) return { kind: "done" };

const failedPath = join(middleDir, "failed.json");
if (existsSync(failedPath)) {
return { kind: "failed", reason: readFailedReason(failedPath) };
}

return { kind: "bare-stop" };
}

/**
* The turn-end rate-limit detector: the same Codex pattern applied to the
* transcript tail, independent of `classifyStop`'s ordering so the dispatcher
* can update `rate_limit_state` on every stop even when the classification is a
* higher-priority kind. Returns null when no rate-limit signal is present.
*/
export function detectRateLimit(opts: {
payload: HookPayload;
transcriptPath: string;
}): RateLimitDetection | null {
if (!RATE_LIMIT_RE.test(readTail(opts.transcriptPath))) return null;
return { resetAt: "unknown", source: "stop-hook" };
}

function readTail(path: string): string {
try {
const raw = readFileSync(path, "utf8");
return raw.length > 8192 ? raw.slice(-8192) : raw;
} catch {
return "";
}
}

/**
* Read and tolerantly parse the `.middle/blocked.json` question sentinel. Returns
* `null` when the file is missing, unreadable, not JSON, or carries no string
* `question`; the Stop is still classified `asked-question` (the sentinel's
* presence is the signal), the contents are best-effort.
*/
function readBlockedSentinel(path: string): BlockedSentinel | null {
try {
const parsed = JSON.parse(readFileSync(path, "utf8")) as Record<string, unknown>;
if (typeof parsed.question !== "string" || parsed.question.length === 0) return null;
const context = typeof parsed.context === "string" ? parsed.context : undefined;
const kind = parsed.kind === "complexity" ? "complexity" : undefined;
const out: BlockedSentinel = { question: parsed.question };
if (context !== undefined) out.context = context;
if (kind !== undefined) out.kind = kind;
return out;
} catch {
return null;
}
}

function readFailedReason(path: string): string {
try {
const parsed = JSON.parse(readFileSync(path, "utf8")) as { reason?: unknown };
return typeof parsed.reason === "string" ? parsed.reason : "agent reported failure";
} catch {
return "agent reported failure";
}
}
87 changes: 87 additions & 0 deletions packages/adapters/codex/src/hooks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { chmod, mkdir } from "node:fs/promises";
import { dirname, join } from "node:path";
import type { InstallHookOpts, NormalizedEvent } from "@middle/core";
import { HOOK_SH, PR_READY_GATE_SH } from "@middle/core";

/**
* Map each Codex hook event name to the normalized taxonomy. Names come from the
* build spec's "Normalized event taxonomy" table ("Trigger (Codex)" column):
* `startup`/`turn-start`/`command`/`turn-end`/`shutdown`, with the `command`
* event distinguished into pre / success / failure. Codex has no equivalent of
* Claude's `Notification` or `SubagentStop`, so `agent.notification` /
* `agent.subagent-stopped` are not emitted.
*
* Two entries are load-bearing for dispatch: `startup → session.started`
* (carries the rollout path, triggers launch→drive) and `turn-end →
* agent.stopped` (the turn boundary `classifyStop` reacts to).
*/
const CODEX_EVENT_MAP: ReadonlyArray<[codexEvent: string, normalized: NormalizedEvent]> = [
["startup", "session.started"],
["turn-start", "turn.started"],
["command", "tool.pre"],
["command-success", "tool.post"],
["command-failure", "tool.failed"],
["turn-end", "agent.stopped"],
["shutdown", "session.ended"],
];

/** Escape a string for a TOML basic (double-quoted) value: backslash + quote. */
function tomlString(value: string): string {
return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
}

/**
* Write the full Codex hook configuration into the worktree: the universal
* `hook.sh` and the PR-ready gate script (single-sourced from `@middle/core`,
* shared verbatim with the Claude adapter), plus a `.codex/config.toml` that
* sets the auto-mode policy and registers every taxonomy event in a `[hooks]`
* block.
*
* Auto mode lives in config, not the launch command (per spec): `approval_policy
* = "never"` + `sandbox = "workspace-write"` let the session run unattended.
*
* Each hook invokes the script **through `sh`** with an **absolute** path,
* double-quoted — same rationale as the Claude adapter: `sh` runs the file
* regardless of its exec bit (so a missing bit can't wedge the blocking
* command-gate), and the absolute path resolves from whatever subdirectory the
* agent has `cd`'d into. The PR-ready gate is registered as a second hook on the
* `command` (pre) event so it sits alongside the heartbeat, mirroring Claude's
* two PreToolUse hooks; the gate script self-filters to `gh pr ready`.
*
* The exact `[hooks]` schema is a live-run tightening point (see
* `planning/issues/60/decisions.md`); the array-of-tables shape is the baseline.
*/
export async function installHooks(opts: InstallHookOpts): Promise<void> {
const scriptPath = join(opts.worktree, opts.hookScriptPath);
await mkdir(dirname(scriptPath), { recursive: true });
await Bun.write(scriptPath, HOOK_SH);
await chmod(scriptPath, 0o755);

const gateScriptPath = join(dirname(scriptPath), "pr-ready-gate.sh");
await Bun.write(gateScriptPath, PR_READY_GATE_SH);
await chmod(gateScriptPath, 0o755);

const lines: string[] = [
"# middle-managed Codex configuration for headless dispatch.",
"# Auto mode: no approval prompts, workspace-write sandbox.",
'approval_policy = "never"',
'sandbox = "workspace-write"',
"",
];
for (const [codexEvent, normalized] of CODEX_EVENT_MAP) {
lines.push(`[[hooks.${codexEvent}]]`);
lines.push(`command = ${tomlString(`sh "${scriptPath}" ${normalized}`)}`);
lines.push("");
// The blocking PR-ready gate rides the pre-command event, second so the
// heartbeat stays first (the gate self-filters to `gh pr ready`).
if (codexEvent === "command") {
lines.push(`[[hooks.${codexEvent}]]`);
lines.push(`command = ${tomlString(`sh "${gateScriptPath}"`)}`);
lines.push("");
}
}

const codexDir = join(opts.worktree, ".codex");
await mkdir(codexDir, { recursive: true });
await Bun.write(join(codexDir, "config.toml"), `${lines.join("\n")}\n`);
}
109 changes: 103 additions & 6 deletions packages/adapters/codex/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,115 @@
* @packageDocumentation
* @module @middle/adapter-codex
*
* The `AgentAdapter` implementation for the Codex CLI. Stub — source lands in
* build-spec Phase 10.
* The `AgentAdapter` implementation for the Codex CLI: launch command, auto-mode
* confirmation, transcript reads, stop classification, and rate-limit detection.
* Mirrors the Claude adapter; the per-CLI differences (config-driven auto mode,
* the `.codex/config.toml` `[hooks]` block, the rollout transcript format, and
* the rate-limit pattern) are isolated here behind the shared interface.
*
* Public surface:
* - None yet (stub).
* - `codexAdapter` — the `AgentAdapter` the dispatcher consumes
* - `detectNeedsLogin` — pane probe for a not-authenticated session
*
* Where things live:
* - `index.ts` — placeholder export until Phase 10.
* - `index.ts` — the adapter object + auto-mode confirmation (`enterAutoMode`)
* - `classify.ts` — stop classification + rate-limit detection
* - `hooks.ts` — `.codex/config.toml` hook installation
* - `prompt.ts` — the launch prompt text
* - `transcript.ts` — rollout-path resolution + state reads
*
* Gotchas:
* - None.
* - Auto mode is config-driven (`approval_policy = "never"` in `.codex/config.toml`),
* not a launch flag — so `enterAutoMode` sends no keystrokes; it only fails fast
* on a not-logged-in pane. Codex's observable bits (hook names, rollout format,
* rate-limit message, force-include syntax) are start-generous baselines pending
* live observation — see `planning/issues/60/decisions.md`.
*
* claude-md: false
*/
export {};
import type { AgentAdapter } from "@middle/core";
import { capturePane } from "@middle/core";
import { classifyStop, detectRateLimit } from "./classify.ts";
import { installHooks } from "./hooks.ts";
import { buildPromptText } from "./prompt.ts";
import { readTranscriptState, resolveTranscriptPath } from "./transcript.ts";

const NEEDS_LOGIN_RE =
/please\s+(?:run\s+)?(?:codex\s+)?(?:login|sign[ -]?in)|not\s+(?:logged\s+in|authenticated|signed\s+in)|set\s+openai_api_key|invalid\s+(?:api\s+key|credentials)/i;

/** Whether a captured pane shows a "you need to log in" message. */
export function detectNeedsLogin(paneContent: string): boolean {
return NEEDS_LOGIN_RE.test(paneContent);
}

/** Short window — covers Codex's boot before the startup hook fires. */
const BOOT_DETECT_TIMEOUT_MS = 90_000;
const BOOT_POLL_INTERVAL_MS = 200;

/**
* Confirm the session is ready for auto operation. Unlike Claude — which must
* dismiss folder-trust + bypass dialogs — Codex's auto mode comes entirely from
* `.codex/config.toml` (`approval_policy = "never"`, `sandbox = "workspace-write"`),
* so there are no approval dialogs to answer and no keystrokes to send. This
* method's only job is to fail fast on a not-authenticated pane so a dispatch
* against an unconfigured Codex surfaces a useful error instead of hanging on the
* startup-hook timeout. It returns as soon as the pane looks normal (no login
* prompt), or when the boot window elapses.
*
* NOTE (tightening point): if a live Codex turns out to show a first-run trust /
* onboarding prompt, the keystroke handling for it is added here.
*/
async function enterAutoMode(opts: { sessionName: string }): Promise<void> {
const tag = `codex:${opts.sessionName}`;
const deadline = Date.now() + BOOT_DETECT_TIMEOUT_MS;

while (Date.now() < deadline) {
const pane = await capturePane(opts.sessionName);
if (pane === null) {
console.error(`[${tag}] enterAutoMode: capture-pane failed (session gone) — stopping`);
return;
}
if (detectNeedsLogin(pane)) {
throw new Error(
"codex is not authenticated — run `codex login` (or set OPENAI_API_KEY) in a normal terminal, then retry the dispatch",
);
}
// A non-empty, non-login pane means Codex has booted into its prompt; auto
// mode is already in force via config, so there's nothing to send.
if (pane.trim().length > 0) return;
await Bun.sleep(BOOT_POLL_INTERVAL_MS);
}
console.error(`[${tag}] enterAutoMode: boot window (${BOOT_DETECT_TIMEOUT_MS}ms) elapsed`);
}

/**
* The Codex CLI agent adapter. Implements {@link AgentAdapter} for the
* dispatcher: builds the interactive launch command (`codex`, no `exec`; auto
* mode + sandbox set in `.codex/config.toml`), confirms readiness, reads the
* rollout transcript for stop classification, and detects rate-limit and
* needs-login states.
*/
export const codexAdapter: AgentAdapter = {
name: "codex",
readyEvent: "session.started",
installHooks,
buildLaunchCommand(opts) {
// Interactive — no `exec`, no prompt. approval_policy/sandbox live in
// .codex/config.toml (written by installHooks), not the command line. Env is
// injected by tmux at spawn time.
return {
argv: ["codex"],
env: {
MIDDLE_SESSION: opts.sessionName,
MIDDLE_SESSION_TOKEN: opts.sessionToken,
...opts.envOverrides,
},
};
},
buildPromptText,
enterAutoMode,
resolveTranscriptPath,
readTranscriptState,
classifyStop,
detectRateLimit,
};
33 changes: 33 additions & 0 deletions packages/adapters/codex/src/prompt.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import type { BuildPromptOpts } from "@middle/core";

/**
* The literal text `send-keys` carries into a Codex tmux session to start or
* continue the agent. Mirrors the Claude adapter's contract — the dispatch model
* (a single one-line submission that both invokes the skill and `@`-references
* the on-disk brief) is adapter-agnostic, and the skills are mirrored into
* `.codex/skills/` at bootstrap so the same slash-command invocation resolves.
*
* - `initial`: a slash command that force-invokes the implementing skill on the
* Epic; the skill reads `.middle/prompt.md` itself.
* - `resume` / `answer`: an `@`-reference that force-includes the on-disk brief.
* - `recommender` / `docs`: force-invokes the repo-level skill with the assembled
* context `@`-referenced.
*
* NOTE (tightening point): Codex's exact skill-invocation + force-include syntax
* is verified on a live run (see `planning/issues/60/decisions.md`). The `@`-path
* reference and slash-command form are the parity baseline.
*/
export function buildPromptText(opts: BuildPromptOpts): string {
switch (opts.kind) {
case "initial":
return `/implementing-github-issues implement #${opts.epicNumber}`;
case "resume":
return `Resuming this workstream — re-read the linked context and continue. @${opts.promptFile}`;
case "answer":
return `A human answered your open question — read the answer and continue. @${opts.promptFile}`;
case "recommender":
return `/recommending-github-issues @${opts.promptFile}`;
case "docs":
return `/documenting-the-repo @${opts.promptFile}`;
}
}
Loading