diff --git a/README.md b/README.md index 7ac742f305..1993830b4e 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,14 @@ --- +> **This is `Ddell12/archon-symphony`, a fork of [`coleam00/Archon`](https://github.com/coleam00/Archon).** +> +> The fork adds **Symphony** — an autonomous tracker-driven dispatcher. It polls Linear and GitHub for issues matching a configured state, claims dispatch slots, and runs Archon workflows per issue. A `/symphony` kanban in the web UI shows every dispatch with a deep link into the workflow-run drill-through. +> +> See [`packages/symphony/README.md`](packages/symphony/README.md) for setup. Everything below is upstream Archon; the fork's value-add lives on top. + +--- + Archon is a workflow engine for AI coding agents. Define your development processes as YAML workflows - planning, implementation, validation, code review, PR creation - and run them reliably across all your projects. Like what Dockerfiles did for infrastructure and GitHub Actions did for CI/CD - Archon does for AI coding workflows. Think n8n, but for software development. diff --git a/bun.lock b/bun.lock index 7f15ead093..1c3f2b7f64 100644 --- a/bun.lock +++ b/bun.lock @@ -23,7 +23,7 @@ }, "packages/adapters": { "name": "@archon/adapters", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@archon/core": "workspace:*", "@archon/git": "workspace:*", @@ -41,7 +41,7 @@ }, "packages/cli": { "name": "@archon/cli", - "version": "0.3.9", + "version": "0.3.10", "bin": { "archon": "./src/cli.ts", }, @@ -63,7 +63,7 @@ }, "packages/core": { "name": "@archon/core", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", @@ -83,7 +83,7 @@ }, "packages/docs-web": { "name": "@archon/docs-web", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@astrojs/starlight": "^0.38.0", "astro": "^6.1.0", @@ -92,7 +92,7 @@ }, "packages/git": { "name": "@archon/git", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@archon/paths": "workspace:*", }, @@ -102,7 +102,7 @@ }, "packages/isolation": { "name": "@archon/isolation", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -113,7 +113,7 @@ }, "packages/paths": { "name": "@archon/paths", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "dotenv": "^17", "pino": "^9", @@ -126,7 +126,7 @@ }, "packages/providers": { "name": "@archon/providers", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.121", "@archon/paths": "workspace:*", @@ -144,13 +144,14 @@ }, "packages/server": { "name": "@archon/server", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@archon/adapters": "workspace:*", "@archon/core": "workspace:*", "@archon/git": "workspace:*", "@archon/paths": "workspace:*", "@archon/providers": "workspace:*", + "@archon/symphony": "workspace:*", "@archon/workflows": "workspace:*", "@hono/zod-openapi": "^0.19.6", "dotenv": "^17.2.3", @@ -161,9 +162,23 @@ "@types/node": "^22.0.0", }, }, + "packages/symphony": { + "name": "@archon/symphony", + "version": "0.3.10", + "dependencies": { + "@archon/core": "workspace:*", + "@archon/paths": "workspace:*", + "@archon/workflows": "workspace:*", + "@octokit/rest": "^22.0.0", + "graphql-request": "^7.2.0", + }, + "peerDependencies": { + "typescript": "^5.0.0", + }, + }, "packages/web": { "name": "@archon/web", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@dagrejs/dagre": "^2.0.4", "@radix-ui/react-alert-dialog": "^1.1.15", @@ -215,7 +230,7 @@ }, "packages/workflows": { "name": "@archon/workflows", - "version": "0.3.9", + "version": "0.3.10", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -277,6 +292,8 @@ "@archon/server": ["@archon/server@workspace:packages/server"], + "@archon/symphony": ["@archon/symphony@workspace:packages/symphony"], + "@archon/web": ["@archon/web@workspace:packages/web"], "@archon/workflows": ["@archon/workflows@workspace:packages/workflows"], @@ -549,6 +566,8 @@ "@grammyjs/types": ["@grammyjs/types@3.26.0", "", {}, "sha512-jlnyfxfev/2o68HlvAGRocAXgdPPX5QabG7jZlbqC2r9DZyWBfzTlg+nu3O3Fy4EhgLWu28hZ/8wr7DsNamP9A=="], + "@graphql-typed-document-node/core": ["@graphql-typed-document-node/core@3.2.0", "", { "peerDependencies": { "graphql": "^0.8.0 || ^0.9.0 || ^0.10.0 || ^0.11.0 || ^0.12.0 || ^0.13.0 || ^14.0.0 || ^15.0.0 || ^16.0.0 || ^17.0.0" } }, "sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ=="], + "@hono/node-server": ["@hono/node-server@1.19.11", "", { "peerDependencies": { "hono": "^4" } }, "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g=="], "@hono/zod-openapi": ["@hono/zod-openapi@0.19.10", "", { "dependencies": { "@asteasolutions/zod-to-openapi": "^7.3.0", "@hono/zod-validator": "^0.7.1", "openapi3-ts": "^4.5.0" }, "peerDependencies": { "hono": ">=4.3.6", "zod": ">=3.0.0" } }, "sha512-dpoS6DenvoJyvxtQ7Kd633FRZ/Qf74+4+o9s+zZI8pEqnbjdF/DtxIib08WDpCaWabMEJOL5TXpMgNEZvb7hpA=="], @@ -1739,6 +1758,8 @@ "graphql": ["graphql@16.13.1", "", {}, "sha512-gGgrVCoDKlIZ8fIqXBBb0pPKqDgki0Z/FSKNiQzSGj2uEYHr1tq5wmBegGwJx6QB5S5cM0khSBpi/JFHMCvsmQ=="], + "graphql-request": ["graphql-request@7.4.0", "", { "dependencies": { "@graphql-typed-document-node/core": "^3.2.0" }, "peerDependencies": { "graphql": "14 - 16" } }, "sha512-xfr+zFb/QYbs4l4ty0dltqiXIp07U6sl+tOKAb0t50/EnQek6CVVBLjETXi+FghElytvgaAWtIOt3EV7zLzIAQ=="], + "h3": ["h3@1.15.11", "", { "dependencies": { "cookie-es": "^1.2.3", "crossws": "^0.3.5", "defu": "^6.1.6", "destr": "^2.0.5", "iron-webcrypto": "^1.2.1", "node-mock-http": "^1.0.4", "radix3": "^1.1.2", "ufo": "^1.6.3", "uncrypto": "^0.1.3" } }, "sha512-L3THSe2MPeBwgIZVSH5zLdBBU90TOxarvhK9d04IDY2AmVS8j2Jz2LIWtwsGOU3lu2I5jCN7FNvVfY2+XyF+mg=="], "has-flag": ["has-flag@4.0.0", "", {}, "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ=="], diff --git a/docs/symphoney-legacy/CLAUDE.md b/docs/symphoney-legacy/CLAUDE.md new file mode 100644 index 0000000000..249867fc5e --- /dev/null +++ b/docs/symphoney-legacy/CLAUDE.md @@ -0,0 +1,128 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## What this repo is + +A TypeScript implementation of the [Symphony Service Specification](https://github.com/openai/symphony/blob/main/SPEC.md) — a long-running daemon that polls a Linear-compatible tracker, creates per-issue workspaces, and runs coding-agent sessions against them. Targets **REQUIRED conformance + the OPTIONAL HTTP API**; the SSH worker extension is intentionally out of scope. `SPEC.md` (in repo root) is the source of truth — line references in code comments (e.g. `SPEC.md:1808-1862`) point at the relevant clause. + +`PARITY_REPORT.md` documents the gap between this build and the official OpenAI Symphony reference; read it before adding features. + +## Commands + +```sh +pnpm install +pnpm dev # tsx, no build; uses ./WORKFLOW.md +pnpm dev path/to/WORKFLOW.md +pnpm dev --port 4000 # also start dashboard at http://127.0.0.1:4000/ +pnpm build # emit dist/ +pnpm start # run built artifact +pnpm typecheck # tsc --noEmit +pnpm test # vitest run (unit + integration) +pnpm test:watch +pnpm exec vitest run test/unit/orchestrator-dispatch.test.ts # single file +pnpm exec vitest run -t "dispatches" # by test name +pnpm exec tsx scripts/smoke-claude.ts # real-SDK Claude smoke +pnpm exec tsx scripts/smoke-linear-graphql.ts # Linear API smoke + +# Web UI (Next 16 kanban — pnpm workspace at web/) +pnpm web:dev # next dev on :3000 +pnpm web:build # next build → web/out/ +pnpm web:typecheck +pnpm dev:all # parallel: daemon + web dev +pnpm build:all # daemon dist/ + web out/ +``` + +Both `dev` and `start` auto-load `.env` via Node's `--env-file-if-exists` flag. `LINEAR_API_KEY` lives there (gitignored). Requires Node ≥22; the CLI binary is `bin/symphony` (loads `dist/src/index.js`, so build first when invoking via `npx symphony`). + +## Web UI + +Forked from `cursor/cookbook/sdk/agent-kanban` (Next.js 16 + React 19 + Tailwind 4 + shadcn + Base UI + Phosphor). Lives in `web/` as a separate pnpm workspace package named `@symphony/web`. Talks to the daemon over the same HTTP API documented in `src/server/http.ts`. + +- **Dev (two processes):** first time, `cp web/.env.local.example web/.env.local`. Start the daemon with `pnpm dev --port 4000`, then `pnpm web:dev` (override the port with `PORT=3001 pnpm web:dev`). The Next dev server proxies `/api/*` → `http://127.0.0.1:4000/api/*` (see `web/next.config.ts`), so the kanban hits the daemon **same-origin** through the dev server — no CORS, and it doesn't matter whether you load via `localhost:3000` or `127.0.0.1:3000`. Override the proxy target with `SYMPHONEY_DAEMON_URL` if the daemon runs elsewhere. +- **Prod (single process):** `pnpm build:all` produces `web/out/` (Next static export). The daemon's `src/service.ts:resolveWebRoot` looks for `web/out` next to the running source/dist, and when found mounts it at `/*` in `src/server/http.ts` via `serveStatic`. Falls back to the legacy `src/server/dashboard.ts` HTML when `web/out` is missing. +- **Endpoints the kanban consumes:** `GET /api/v1/state`, `GET /api/v1/issues?states=...`, `GET /api/v1/repositories`, `POST /api/v1/refresh`, `POST /api/v1/dispatch`. The `/dispatch` route is backed by `Orchestrator.requestImmediateDispatch` which still respects slot caps, blockers, claimed/running de-dupe, and the active-state requirement. +- **Polling, not SSE.** `web/src/lib/symphony/use-kanban.ts` polls every 5s with visibility pause. The orchestrator has an internal `onObserve` observer hook (`src/orchestrator/orchestrator.ts:176`) that's the natural future SSE wiring point — defer until Wave 2.3. +- **Group-by toggle:** `web/src/lib/symphony/group.ts` exposes `groupOptions` for `lifecycle` (Symphoney runtime), `status` (Linear state), `repository`. Repository groups collapse to one column until per-repo workflows ship in Wave 0; sourced from optional `tracker.repository` config in `WORKFLOW.md`. +- **Security note:** the dispatch endpoint is unauthenticated. Until Wave 2.3 (Cloudflare Tunnel + Cloudflare Access) lands, **bind the daemon to `127.0.0.1` only** — never expose it on a LAN, public IP, or Tunnel. Optional stop-gap: gate `/api/v1/dispatch` behind a `SYMPHONY_DISPATCH_TOKEN` shared-secret header. +- **Workspace plumbing gotcha:** Next 16 + Turbopack misdetects the project root in pnpm workspaces. `web/next.config.ts` pins `turbopack.root` and `outputFileTracingRoot` to the workspace root (parent of `web/`) — see the comment in that file before changing it. + +## Architecture + +The service is built around a single **Orchestrator** that owns all mutable state and is driven by config snapshots from a hot-reloading workflow file. + +### Boot sequence (`src/index.ts` → `src/service.ts`) + +1. CLI parses `[workflowPath] [--port] [--log-level]`. +2. `startService` resolves the workflow, starts a `chokidar` watcher, validates the initial snapshot, builds the tracker / workspace manager / agent client, constructs the `Orchestrator`, runs `startupCleanup()`, calls `start()`, and conditionally starts the HTTP server. +3. SIGINT/SIGTERM trigger an awaited `service.stop()` that aborts running workers, closes the watcher, and shuts down the HTTP server. + +### Prod vs dev checkouts (Wave 0.5) + +Two clones of `Ddell12/symphoney-codex`, each with a distinct role: + +- **`~/symphony-dev/symphoney-codex`** — owns agent worktrees. `WORKFLOW.md`'s `after_create` runs `git worktree add` against this checkout, so per-issue worktrees branched as `sym/` live under `~/symphony_workspaces/` and share the dev repo's object database. Override the path with the `SYMPHONY_DEV_REPO` env var (used in tests). +- **`~/symphony-prod/symphoney-codex`** — runs the daemon (`pnpm start` or the launchd plist from Wave 2.2). Update only via explicit `git pull && pnpm build && launchctl kickstart`. Never run the daemon from `~/symphony-dev/`: agent worktrees mutate that checkout's index/working trees. + +`safety.ts` only constrains workspace paths; it isn't an OS sandbox. If you need a hard guarantee, run agent workers under a separate OS user. + +### Config snapshot model — *the load-bearing pattern* + +`WORKFLOW.md` is Markdown with optional YAML front matter (config) + a Liquid prompt template body. `src/workflow/parse.ts` splits it; `src/config/snapshot.ts:buildSnapshot` produces a fully-resolved, immutable `ConfigSnapshot`. The `chokidar` watcher (`src/workflow/watch.ts`) rebuilds the snapshot on file change and exposes `current()` to consumers. + +**Critical rule:** every consumer (orchestrator, tracker, workspace manager, agent) reads `getSnapshot()` *per call*, never caches the snapshot reference. This keeps reload semantics correct. `service.ts` builds a `trackerProxy` that re-resolves the tracker on each method call for exactly this reason — copy that pattern when wiring new dependencies. + +`snapshot.agent.turn_timeout_ms` and `stall_timeout_ms` are derived per-backend (codex vs claude) at build time, so orchestrator code reads `snap.agent.*` and stays backend-agnostic. + +### Orchestrator (`src/orchestrator/`) + +Single class, single tick loop: + +- `runTick()` — `reconcileRunningIssues` → validate dispatch config → `tracker.fetchCandidateIssues` → `sortForDispatch` → `eligibilityForDispatch` → `dispatchIssue` for as many as fit in `availableGlobalSlots` / per-state slots → `notifyObservers` → reschedule. +- `dispatchIssue` claims the issue, builds a `RunningEntry`, and spawns `runWorker` as a detached promise; the promise's resolution/rejection routes into `onWorkerExit` which schedules retries via `state.retry_attempts`. +- `runWorker` creates the workspace, runs `before_run`, calls `agent.startSession`, then loops turns. Turn 1 renders the Liquid prompt; **turns 2..N send only `snap.agent.continuation_prompt`** (per `SPEC.md:633-634` — see `PARITY_REPORT.md` §2). After every turn it refreshes issue state and breaks if no longer active or `max_turns` hit. +- `reconcileRunningIssues` does stall detection (`stall_timeout_ms` from last codex event) AND tracker-state reconciliation (terminal → abort + remove workspace; non-active → abort). +- All timers are injectable (`scheduleTimeout` / `cancelTimeout` / `now` deps) so tests can drive the clock — see `test/integration/orchestrator.test.ts`. + +`src/orchestrator/state.ts` defines `OrchestratorState` (running/claimed/completed sets, retry_attempts map, codex_totals, codex_rate_limits). `dispatch.ts` holds the slot-accounting and eligibility predicates. `retry.ts` computes backoff by `DelayKind` ("continuation" | "failure"). + +### Agent backends (`src/agent/`) + +Two implementations behind a common `AgentClient` interface (`client.ts`): + +- **`StdioCodexClient`** (`stdio-client.ts`) — drives the `codex app-server` CLI subprocess over stdio JSON-RPC. Default backend. +- **`ClaudeAgentClient`** (`claude-client.ts`) — uses `@anthropic-ai/claude-agent-sdk` (^0.2.122). Two auth paths: subscription/OAuth (`force_subscription_auth: true`, requires `claude login`) or API key (`ANTHROPIC_API_KEY`). + +`factory.ts:createAgentClient` picks the backend from `snapshot.agent.backend` and pre-warms the Claude SDK via `startup()` (≈20× cold-start latency reduction). The orchestrator never branches on backend kind — it only consumes `AgentClient` / `AgentSession` / `AgentEvent`. + +`AgentEvent` (`events.ts`) is the unified shape: spec-listed event names (`session_started`, `turn_completed`, etc.) flow through `applyAgentEvent` which updates token deltas (monotonic; ignores out-of-order decreases) and emits structured `agent_event` pino lines for the events in `LOGGED_AGENT_EVENTS`. + +`linear-graphql-tool.ts` is the optional client-side tool extension (`SPEC.md:1056-1087`) — gives the agent a way to transition the issue out of an active state so the worker loop terminates before `max_turns`. Without it, the loop runs to `max_turns` (see `PARITY_REPORT.md` §1). + +### Tracker (`src/tracker/`) + +`LinearTracker` issues GraphQL via `graphql-request` against the Linear API. Three required methods: `fetchCandidateIssues`, `fetchIssueStatesByIds`, `fetchIssuesByStates`. `normalize.ts` converts raw Linear shapes to the spec `Issue` shape. The Linear `Project.team` → `teams` connection quirk is handled there. + +### HTTP API (`src/server/http.ts`) + +Hono app served via `@hono/node-server`. Routes: + +- `GET /` → minimal HTML dashboard (`dashboard.ts`). +- `GET /api/v1/state` → orchestrator snapshot. +- `GET /api/v1/` → per-issue running/retry detail, `404` if unknown. +- `POST /api/v1/refresh` → calls `orchestrator.requestRefresh()` (coalesces). + +### Workspace manager (`src/workspace/`) + +`createForIssue(identifier)` creates `//`. `safety.ts` enforces that the resolved path stays under the configured root (defends against `../`). Hooks (`after_create`, `before_run`, `after_run`, `before_remove`) are executed via `runHook` with a configurable `timeout_ms`. + +## Tests + +Vitest, `pool: "forks"`, 15s timeout. `test/unit/` covers parsers, normalizers, dispatch math, hook safety. `test/integration/` exercises the orchestrator with `test/helpers/fake-tracker.ts` + a fake agent client and asserts retry behavior, reload behavior, the HTTP API, and the Claude backend wiring with a mocked SDK. + +## Conventions + +- ESM only (`"type": "module"`, `module: "NodeNext"`). All imports use the `.js` extension even for `.ts` files. `verbatimModuleSyntax` is off but `isolatedModules` is on. +- `noUncheckedIndexedAccess: true` — array/record indexing returns `T | undefined`. Many existing files lean on this; preserve the pattern. +- Logging is `pino` with structured fields; child loggers carry `issue_id` / `issue_identifier` context. Spec-listed agent events get explicit `agent_event` log lines (see `LOGGED_AGENT_EVENTS` in `orchestrator.ts`). +- Reference `SPEC.md` line numbers in comments when implementing spec-driven behavior; don't restate the spec, point at it. diff --git a/docs/symphoney-legacy/PARITY_REPORT.md b/docs/symphoney-legacy/PARITY_REPORT.md new file mode 100644 index 0000000000..135ccb8a5f --- /dev/null +++ b/docs/symphoney-legacy/PARITY_REPORT.md @@ -0,0 +1,133 @@ +# Symphoney-codex parity report + +How this build compares to the official OpenAI Symphony reference, and what it would take to close the gap. Every claim below is grounded in `SPEC.md` (line references inline) or in the implementation under `src/`. + +## TL;DR + +The orchestrator core is **spec-conformant**. Workers loop, retry, reconcile, emit events, and serve the HTTP API exactly as the spec prescribes. The gap that makes day-to-day runs feel broken — the 20-turn loop you saw — is **a missing extension, not a missing fix**. The spec defines an opt-in mechanism (`linear_graphql` client-side tool) that lets the agent move the issue out of an active state when it's done. Without it, the worker has no way to know "work is complete" and must run until `agent.max_turns`. The official Symphony ships with that extension wired up; symphoney-codex does not. + +## Root cause: the 20-turn loop + +**What you observed (APP-267):** worker wrote `hello.txt` on turn 1, then ran 19 more turns, hit `max_turns: 20`, exited normally, and was about to be dispatched again. + +**Why the spec says this is correct:** `SPEC.md:1808-1862` describes `run_agent_attempt`. After every successful turn, the worker: +1. Calls `tracker.fetch_issue_states_by_ids` to refresh the issue (`SPEC.md:1843`) +2. Breaks if the state is no longer in `tracker.active_states` (`SPEC.md:1851-1852`) +3. Otherwise breaks at `max_turns` + +The implementation matches: `src/orchestrator/orchestrator.ts:396-409` runs the same refresh/break logic. + +**What the spec leaves to extensions:** `SPEC.md:38-42` declares Symphony "a scheduler/runner and tracker reader" and explicitly delegates ticket writes to the agent: *"Ticket writes (state transitions, comments, PR links) are typically performed by the coding agent using tools available in the workflow/runtime environment."* The standardized mechanism is the optional `linear_graphql` client-side tool extension (`SPEC.md:1047-1087`). With it, the agent can run a Linear `issueUpdate` mutation as its final step, the orchestrator's next refresh sees a non-active state, the loop breaks, and a continuation retry releases the claim. + +Without `linear_graphql`, the only way the loop ends short of `max_turns` is **out-of-band reconciliation** — a human moves the issue, or a separate process does. `SPEC.md:41` calls this out: *"A successful run can end at a workflow-defined handoff state (for example `Human Review`), not necessarily `Done`."* + +**Verdict:** the loop isn't a bug in the orchestrator. It's the absence of the optional extension. The official Symphony solves it by shipping `linear_graphql`. + +## In-scope gaps to reach official-Symphony behavior + +These are the changes I'd land, in order, to make this build feel like the OpenAI build. + +### 1. Implement the `linear_graphql` client-side tool extension *(highest leverage)* + +Spec contract: `SPEC.md:1056-1087`. Single tool, single GraphQL operation per call, reuses the configured Linear endpoint and auth, returns structured success/error payload. + +Surface for both backends: + +- **Codex backend** (`src/agent/stdio-client.ts`): advertise the tool via the targeted Codex protocol's tool-registration mechanism. Match the exact input/output shape from the spec. Route invocations to a small executor that wraps `LinearTracker.graphql(...)`. +- **Claude backend** (`src/agent/claude-adapter.ts:34`): the SDK `mcpServers` slot is already plumbed but never populated. Add an in-process MCP server (FastMCP-style) that exposes the same `linear_graphql` tool. Pass it via `ClaudeAdapterOptions.mcpServers`. Add `mcp__symphony__linear_graphql` to `claude.allowed_tools` defaults. + +Update the WORKFLOW.md prompt template so the agent knows the tool exists and is expected to transition the issue when work is complete. Without that prompt instruction the tool would be present but never used. + +This single change closes the 20-turn loop, plus enables comments, PR-link writes, and richer state transitions (`Human Review`, custom workflow states). + +### 2. Continuation-turn prompt differentiation + +`SPEC.md:633-634`: *"Continuation turns SHOULD send only continuation guidance to the existing thread, not resend the original task prompt that is already present in thread history."* + +Current behavior: `src/orchestrator/orchestrator.ts:368-376` re-renders the full prompt template every turn, passing `turn_number` and `attempt` as variables. The shipped `WORKFLOW.md` template doesn't branch on those — turn 7 sends the same wall of text as turn 1. + +Two ways to fix: + +- **Template-driven (lighter touch):** update `WORKFLOW.example.md` and `WORKFLOW.md` to branch with `{% if turn_number > 1 %}continuation guidance only{% else %}full prompt{% endif %}`. Document the convention. +- **Orchestrator-driven (cleaner):** when `turn_number > 1`, skip rendering and send a fixed continuation prompt (e.g., "Continue. If the issue is complete, transition it via `linear_graphql` and stop."). Keep the workflow template responsible only for turn 1. + +The Claude SDK is hit hardest by the duplicate prompts: it resumes the same thread (`options.resume`) and the SDK already replays the original prompt internally, so resending it adds tokens for no gain. Worth fixing for both backends. + +### 3. Emit per-event log lines on the daemon log channel + +`SPEC.md:1006-1019` lists the events Symphony emits (`session_started`, `turn_completed`, `turn_failed`, …). The implementation captures them via `applyAgentEvent` (`src/orchestrator/orchestrator.ts:436`), records them on the running entry, and surfaces them through the HTTP API's `last_event` field. But the **pino daemon log only logs scheduling/lifecycle events** (`dispatch_started`, `worker_completed`, `retry_scheduled`); it does not emit a structured pino line per agent event. + +The smoke run for goal #2 made this concrete: I tail-grepped the daemon log for `turn_completed` and got nothing, even though the events were arriving. They're observable through `/api/v1/state`, but not through `tail -f log`. + +Fix: in `applyAgentEvent`, emit a `logger.info({ event, turn_id, usage, … }, "agent_event")` line for spec-listed events. Cheap, no schema change, makes operator debugging dramatically easier. + +### 4. Auto-transition issues to `In Progress` on dispatch *(arguably should be agent-driven, see note)* + +`SPEC.md` example response on line 1403 shows a running issue with `state: "In Progress"`. In practice the official Symphony tends to start issues in `Todo`, dispatch them, and they show as `In Progress` while running. Two ways that happens: + +- **Agent-driven:** the agent moves Todo → In Progress as its first act after the prompt loads. Requires `linear_graphql`, so this rolls into change #1. +- **Orchestrator-driven:** the dispatcher mutates the tracker on claim. Spec is ambiguous — Section 7.1 covers internal claim states but doesn't require a tracker mutation. `SPEC.md:1202-1209` actively discourages this: *"Symphony does not require first-class tracker write APIs in the orchestrator."* + +Recommendation: stick with agent-driven via `linear_graphql`. Don't add tracker writes to the orchestrator. + +### 5. Tighten `usage` accounting for the Claude backend + +`src/agent/claude-adapter.ts:307-313` (`readUsageFromResult`) reads only `SDKResultMessage.usage.input_tokens` and `output_tokens`. The Claude SDK additionally reports `cache_creation_input_tokens` and `cache_read_input_tokens`, which are dropped. After the smoke run the orchestrator's running totals will systematically under-count tokens for any prompt that cache-hits. + +Spec impact: minor. `SPEC.md:13.5` (referenced by `events.ts`) tells us to prefer absolute thread totals, not cache deltas. But the Codex backend also reports cache totals, so omitting them on the Claude side is a backend-asymmetry bug. Add the cache fields to the `TokenUsage` shape and aggregate them. + +### 6. Workflow-template documentation: explain `linear_graphql` is the exit signal + +Once #1 ships, the example workflow should make this loud. Current `WORKFLOW.example.md` has no instruction to the agent about how to terminate a run. Add a section that says, in effect: *"When the work is complete, call `linear_graphql` to set state to Done (or your handoff state). Otherwise the worker will keep prompting you up to `agent.max_turns` times."* + +## Out-of-scope items (deferred per `memory/spec-scope.md`) + +These were explicitly left out at planning time. Listing them so they're not lost. + +### A. SSH worker extension (`SPEC.md` Appendix A) + +Out of scope. Lets workers run on a remote host instead of localhost. Requires SSH transport, remote workspace lifecycle, port-forwarded health. Significant surface area. Not needed for single-machine use. + +### B. `linear_graphql` *(now in-scope per recommendation #1)* + +Originally deferred because the spec calls it OPTIONAL. Recommending we move it to in-scope: it's the lowest-cost change with the highest behavioral impact, and lots of subtle UX problems (the 20-turn loop, missing state transitions, no agent-side commenting) all collapse into this one feature. + +### C. Durable retry persistence + +Out of scope. `state.retry_attempts` lives in process memory; a daemon restart drops the queue. Spec doesn't require persistence (`SPEC.md` Section 7 describes the state machine but doesn't mandate a persistent store). If we ever run as an unattended service, this becomes worth adding (SQLite or a JSON snapshot). + +### D. Pluggable trackers beyond Linear + +Out of scope. The `Tracker` interface in `src/tracker/types.ts` is open enough to accept a non-Linear adapter, but only the Linear implementation ships. Adding GitHub Issues / Jira / Plane is additive but not on the path to OpenAI-Symphony parity (the official build is also Linear-first). + +### E. First-class tracker write APIs in the orchestrator + +Out of scope and **should stay that way**. `SPEC.md:1202-1209` and the explicit TODO at `SPEC.md:2098` argue against it. The right place for ticket writes is the agent's tool surface (i.e., #1 above). + +## Spec-conformant but worth polishing + +Small items. Each is a few lines, no architectural risk. + +- **Event-shape coverage in adapters:** `src/agent/events.ts` lists `turn_ended_with_error`, `approval_auto_approved`, `unsupported_tool_call` as types but neither the Codex adapter nor the Claude adapter actually emits them. Either populate them at the right protocol seams or trim the type union. +- **`session_id` cosmetic:** `claude-client.ts:112` composes `session_id = "-"`. Spec confirms this format (`SPEC.md:966`). The "init" placeholder used before the first turn (`-init`) is non-standard — fine for internal use but worth documenting. +- **Workspace persistence on success:** `SPEC.md:1131` says workspaces are intentionally preserved after successful runs. Implementation does this, but `~/symphony_workspaces/` will accumulate forever. Consider a `workspace.retention_days` config or a `pnpm symphoney prune` script. +- **Pre-warm telemetry:** the `claude_sdk_startup_prewarm_done` log is emitted but the duration isn't measured. One-liner: wrap the call in a `Date.now()` delta and log `duration_ms` alongside. +- **Two stray Claude SDK subprocesses observed during the goal-#2 run** were from `~/Symphony/` (a separate fork on this machine), not symphoney-codex. Not actionable in this repo, but worth knowing if you `pkill -f claude-agent-sdk`. + +## Recommended order of operations + +If you want to ship parity in the smallest number of PRs: + +1. **PR 1 — `linear_graphql` MCP for the Claude backend** (closes the 20-turn loop end-to-end on this build's primary path; `mcpServers` slot already exists). +2. **PR 2 — `linear_graphql` for the Codex backend** (matches the official reference for Codex users). +3. **PR 3 — Continuation-prompt differentiation** (template-driven is the cheaper option; orchestrator-driven is cleaner). +4. **PR 4 — Per-event pino log lines** (operator UX; trivial change). +5. **PR 5 — Cache-token accounting in the Claude usage path** (cleanup). +6. **PR 6 — Workflow-template docs and the example update** (so users discover the agent-side state transition convention). + +Items 1-4 are the difference between "a build that runs but loops" and "a build that feels like the OpenAI Symphony." Items 5-6 are polish. + +## Appendix: what was directly verified this session + +- **Goal #1** — adapter-level smoke (`scripts/smoke-claude.ts`): PASS. Thread-id format, turn outcome, file write, event flow all match spec. OAuth path works (Keychain entry `Claude Code-credentials`). +- **Goal #2** — daemon end-to-end against APP-267 in Symphony Smoke: PASS for the dispatch path; the 20-turn cap was the observed pain point analyzed above. APP-267 was transitioned to Done by hand at the end of the run; WORKFLOW.md was reverted to the codex backend. diff --git a/docs/symphoney-legacy/PRD.md b/docs/symphoney-legacy/PRD.md new file mode 100644 index 0000000000..32da264221 --- /dev/null +++ b/docs/symphoney-legacy/PRD.md @@ -0,0 +1,245 @@ +# Symphoney — Product Requirements + +> Source-of-truth for the product vision. Companion to `SPEC.md` (technical contract), `ROADMAP.md` (sequenced delivery plan), and `PARITY_REPORT.md` (gap with the OpenAI reference). When the three disagree on *what* we're building, this document wins; when they disagree on *how*, `SPEC.md` wins. + +--- + +## What we're building + +**Symphoney is a personal AI engineering team that runs 24/7 on a Mac mini.** It picks issues out of a Linear backlog, runs Codex or Claude coding-agent sessions inside per-issue git worktrees, and ships every dispatch back as a reviewable GitHub PR with a Linear backlink — controllable from Slack on any device. + +The build is the operator's reference implementation of the [OpenAI Symphony Service Specification](https://github.com/openai/symphony/blob/main/SPEC.md), extended with a kanban control plane, a Slack control surface, and Cloudflare-fronted hosting so the product is usable from a phone in a meeting. The orchestrator core is spec-conformant today; the remaining roadmap turns it into a daily-driver personal automation tool. + +**North star: Symphony works on itself.** Every roadmap item from Wave 1 onward ships as a Symphoney-dispatched PR. If the product can't ship its own next feature with a human reviewing the diff and merging, it isn't done. + +--- + +## Who this is for + +A single technical builder running their own backlog. One operator, one Mac mini, one Linear workspace, one GitHub repo per workflow. The product is deliberately not multi-tenant. There is no team plan, no shared dashboard, no per-user auth model. If a second person ever needs access, they get it via Cloudflare Access on the same single-user instance. + +--- + +## What the app does + +- **Polls Linear on a fixed cadence** for issues in active states (`Todo`, `In Progress`) and dispatches the eligible ones into per-issue workspaces, respecting concurrency caps and blocker chains. +- **Creates a per-issue git worktree** branched as `sym/` from a dedicated dev checkout, so every dispatch starts in a known-good, isolated workspace. +- **Runs a coding agent inside the workspace** — Codex over stdio JSON-RPC by default, optionally Claude via the Anthropic Agent SDK — driving multi-turn sessions until the agent transitions the issue out of an active state or hits the turn cap. +- **Lets the agent transition Linear state directly** via the `linear_graphql` client-side tool, so a dispatch can self-terminate at `Done`, `Human Review`, or any workflow-defined handoff. +- **Publishes a GitHub PR on success** with a `gh pr create` flow: typecheck must pass, branch must be `sym/`, working tree must be clean, branch must be ahead of `origin/main`. The PR URL is posted back to Linear as a comment. +- **Hot-reloads its own configuration** when `WORKFLOW.md` changes on disk, so prompt edits, concurrency caps, hook scripts, and tracker settings update without a restart. +- **Surfaces live state** through an HTTP API (`/api/v1/state`, `/api/v1/`, `/api/v1/dispatch`, `/api/v1/refresh`) and a kanban dashboard at `127.0.0.1:4000` with running issues, retry queue, token totals, and a one-click immediate-dispatch action. +- **Recovers from transient failures** with exponential backoff on dispatch failures and a fixed cadence on continuation turns, capped by `max_retry_backoff_ms`. +- **Reconciles tracker state every tick** — if an issue moves to a terminal state during a run, the worker is aborted and its workspace cleaned up. +- **Will be controllable from Slack** (`/symphony status`, `@symphony work on ENG-123`, in-thread cancel) so the operator can claim work or check status from their phone. + +--- + +## Already shipped (don't re-spec) + +The following are done in the current checkout and are out of scope for any future milestone: + +- **Spec-conformant orchestrator core.** Polling tick, dispatch math (priority asc, null-last, oldest-first, identifier tiebreak), eligibility checks (active state + blockers terminal + slot available), per-issue retry queue with `DelayKind` (`continuation` vs `failure`) backoff, reconciliation on every tick, abort on terminal-state transition, stall detection, startup cleanup. +- **Dual agent backends.** Codex stdio JSON-RPC (`StdioCodexClient`) and Claude Agent SDK (`ClaudeAgentClient`) behind a unified `AgentClient` interface. Backend selected by `snapshot.agent.backend`. Cache-token accounting present on both. +- **`linear_graphql` client-side tool extension.** Wired for both backends. Lets the agent run a single Linear GraphQL operation per call (typically `issueUpdate` to transition state) so dispatches can self-terminate before `max_turns`. +- **Continuation prompt differentiation.** Turn 1 renders the full Liquid prompt; turns 2..N send only `agent.continuation_prompt`. No more 20-turn loops on simple tasks. +- **Hot-reloading config snapshot.** `WORKFLOW.md` is parsed into an immutable `ConfigSnapshot`; `chokidar` rebuilds it on file change. Every consumer reads `getSnapshot()` per call so reloads take effect immediately. +- **HTTP API.** `GET /` (legacy dashboard) plus `/api/v1/{state,issues,repositories,version,refresh,dispatch,}`. Static-export of the kanban served at `/*` when `web/out` is built. +- **Kanban control plane (web/).** Next.js 16 + React 19 + Tailwind 4 + shadcn. Polling at 5s with visibility pause. Group-by `lifecycle | status | repository`. One-click immediate dispatch. +- **Workspace lifecycle hooks.** `after_create`, `before_run`, `after_run`, `before_remove` with `WORKSPACE_PATH`, `ISSUE_ID`, `ISSUE_IDENTIFIER`, `ISSUE_TITLE`, `ATTEMPT`, `WORKFLOW_PATH` env propagation. Path-safety constraints on workspace root. +- **Wave 0 bootstrap.** Worktree-based hooks against a dedicated `~/symphony-dev/symphoney-codex` checkout, prod/dev split documented, first-class PR publisher with loud failures, first-dispatch ceremony config (`max_concurrent_agents: 1`, `max_turns: 12`). +- **Structured agent-event logging.** Spec-listed events (`session_started`, `turn_completed`, etc.) emitted as `agent_event` pino lines for grep-friendly operator debugging. + +--- + +## Out of scope (won't ship) + +- **Multi-user auth, team plans, role-based access control.** Single-operator product. Auth boundary, when it arrives, is Cloudflare Access on top of single-user. +- **SSH worker extension** (Symphony spec Appendix A). Localhost-only execution. +- **Generic webhook system.** Symphoney isn't a workflow engine. Slack and Cloudflare Tunnel are bespoke integrations. +- **Built-in metrics stack** (Prometheus, Grafana, OpenTelemetry collector). Operate from launchd logs + outbound heartbeat to a hosted uptime monitor. +- **Drag-to-reorder columns or cards** in the kanban. Linear is the source of truth for state transitions. +- **Editing or creating issues from the kanban.** Linear is the source of truth for issue content. +- **Pluggable trackers beyond Linear.** The `Tracker` interface allows it; only Linear ships. +- **First-class tracker write APIs in the orchestrator.** Per spec, ticket writes belong to the agent's tool surface (`linear_graphql`), not the orchestrator. +- **Mobile native app.** The phone-first surface is Slack and the responsive kanban behind Cloudflare Access. No iOS/Android app. +- **Per-user model selection or fine-tuning.** Backend and model are workflow-level settings, not per-issue. +- **Voice input.** Slash commands and threaded replies are sufficient. + +--- + +## Tech stack + +- **Runtime:** Node ≥22, TypeScript ESM-only (`module: NodeNext`, `verbatimModuleSyntax` off, `isolatedModules` on, `noUncheckedIndexedAccess: true`), pnpm 10. +- **Daemon:** Hono HTTP server via `@hono/node-server`, `chokidar` watcher, `pino` structured logging, `graphql-request` for Linear, `better-sqlite3` (planned, Wave 1.1) for durable run state. +- **Agent backends:** `codex app-server` CLI subprocess over stdio JSON-RPC (default) or `@anthropic-ai/claude-agent-sdk` ^0.2.122 (selectable). Both expose `linear_graphql` as a client-side tool. +- **Web:** Next.js 16 + React 19 + Tailwind 4 + shadcn + Base UI + Phosphor. Static-export at `web/out/` mounted by the daemon in prod; dev rewrites `/api/*` → `http://127.0.0.1:4000` for same-origin requests. +- **Hosting:** Mac mini under `launchd` (user LaunchAgent), Cloudflare Tunnel (`cloudflared`) → `symphony.` → `127.0.0.1:4000`, Cloudflare Access protecting dashboard routes, `/slack/*` bypassed and verified via Slack signed requests. +- **Tooling:** `gh` CLI for PR creation, `git worktree` for workspace isolation, Vitest for unit + integration tests, `tsx` for dev-mode no-build runs. + +--- + +## External integrations + +| Integration | Purpose | Credentials needed | Status | +|---|---|---|---| +| **Linear** | Tracker source-of-truth; the agent also calls `linear_graphql` to transition state and post comments | `LINEAR_API_KEY` in `.env` | Live | +| **GitHub (via gh CLI)** | Branch push + PR creation + PR-URL backlink | `gh auth status` (OAuth, browser flow) | Live | +| **OpenAI Codex CLI** | Default agent backend over stdio JSON-RPC | None at daemon level (Codex manages its own auth) | Live | +| **Anthropic Claude Agent SDK** | Optional agent backend | `claude login` (OAuth/subscription) **or** `ANTHROPIC_API_KEY` | Live | +| **Slack** | Phone-first control plane (`/symphony status`, `@symphony work on …`) | Slack app: signing secret + bot token + slash command + Events API URL | Wave 2.1 | +| **Cloudflare Tunnel + Access** | Public HTTPS for Slack webhooks, auth for dashboard | Cloudflare account, domain on Cloudflare DNS, `cloudflared` token | Wave 2.3 | +| **Outbound uptime heartbeat** | Dead-man's switch | Healthchecks.io (or equivalent) ping URL | Wave 2.4 | + +--- + +## Conceptual data model — what the daemon needs to remember + +Today, most of this lives in process memory; Wave 1.1 moves the durable parts to a SQLite store next to the workspace root. + +### Issue *(read from Linear, normalized)* +- `id` — Linear's UUID (used for issue-level mutations) +- `identifier` — human-readable key like `APP-123`; drives branch and workspace names +- `title`, `description` — what the agent is told to work on +- `priority` — drives dispatch sort order (asc, null-last) +- `state` — current workflow state name (matched against active/terminal sets) +- `branch_name` — agent's preferred branch hint, if any +- `url` — link back to the Linear issue +- `labels` — arbitrary string tags +- `blocked_by` — list of upstream issues; dispatch waits until each is in a terminal state +- `created_at`, `updated_at` — timestamps for sort tiebreaks and reconciliation + +### Run *(per-dispatch attempt)* +- Which issue, when started, current attempt number +- Worker promise + abort controller (in-memory only) +- Codex/Claude session id and thread id (for resume on continuation turns) +- Codex app-server PID, last event name, last event payload, last event timestamp +- Token totals: input, output, cache-creation input, cache-read input, total — monotonic; out-of-order decreases ignored +- Last-reported-to-tracker totals (so deltas can be aggregated even if events arrive out of order) +- Turn count +- Cancel-requested flag (set by `requestImmediateDispatch` cancel path or kanban cancel button) +- Publish result — PR URL, `no_changes` skip marker, or `failed: ` string + +### Turn *(one prompt → response cycle inside a run)* +- Run id, turn number, started/ended timestamps, outcome (`completed | aborted | failed`) +- Prompt sent (full template on turn 1; continuation prompt only on 2..N) +- Token usage delta for this turn + +### Agent event *(the spec's session/turn lifecycle protocol)* +- Run id, turn id, event name (`session_started`, `turn_completed`, `turn_failed`, `agent_message`, `tool_call_started`, `tool_call_completed`, etc.) +- Timestamp, structured payload (varies by event) +- Logged to pino as a structured `agent_event` line; persisted to SQLite from Wave 1.1 + +### Workspace +- Issue identifier → absolute path under `~/symphony_workspaces/` +- Implementation: a git worktree on branch `sym/` from `~/symphony-dev/symphoney-codex` +- Lifecycle: created on dispatch, persisted across runs, removed on terminal-state reconciliation + +### Retry queue entry +- Issue id, attempt number, delay kind (`continuation` | `failure`), due-at timestamp, last error code/message +- Continuation: fixed 1000 ms; failure: 10000 × 2^(n-1) capped by `max_retry_backoff_ms` + +### Config snapshot *(immutable, rebuilt on `WORKFLOW.md` change)* +- Tracker config (kind, project, repository, active/terminal states), polling interval, workspace root, hooks, agent caps and backend selection, codex/claude per-backend settings + +### Rate-limit signals *(read from Linear / agent backends)* +- Surfaced on `/api/v1/state.rate_limits`; used to decide whether to backpressure dispatch + +--- + +## Milestones + +The roadmap waves are the milestones. Each wave is a working session for an agent (or for the operator, in Wave 0's case). Later waves assume earlier ones have shipped. + +--- + +### Milestone 0 — Bootstrap self-work safely **(SHIPPED)** + +What this milestone delivered: Symphoney can dispatch issues against itself without trashing the prod checkout, and every successful dispatch produces a reviewable PR. + +**What got built** +- A dedicated "Symphony" Linear project (slugId `60aa12712181`) on the `dell-omni-group` org, separate from the Smoke sandbox. +- Hook env-var plumbing: `WORKSPACE_PATH`, `ISSUE_ID`, `ISSUE_IDENTIFIER`, `ISSUE_TITLE`, `ATTEMPT`, `WORKFLOW_PATH` flow through `after_create`, `before_run`, `after_run`, `before_remove`. +- Worktree-based workspaces: `after_create` runs `git worktree add` against `~/symphony-dev/symphoney-codex`, with a branch-exists guard so attempt N≥2 reuses the branch. +- `before_run` runs `pnpm install --frozen-lockfile && pnpm typecheck` so the agent starts in a known-good workspace. +- First-class PR publisher (`src/publisher/pr.ts`): rev-parse → status clean → log ahead → typecheck → gh auth status → push → `gh pr create` → Linear backlink comment. Loud failures, no auto-retry. +- Prod/dev checkout split documented in `CLAUDE.md`; daemon runs from `~/symphony-prod/symphoney-codex`. +- First-dispatch ceremony config: `max_concurrent_agents: 1`, `max_turns: 12` until three clean dogfood PRs land. + +**Done when** ✅ A handwritten Linear issue in the Symphony project gets picked up, an agent commits inside `~/symphony_workspaces//`, and a PR opens at `Ddell12/symphoney-codex` with the PR URL posted back to the Linear issue. + +--- + +### Milestone 1 — Durable state and history **(NEXT — Wave 1.1 is the first dogfood dispatch)** + +What this milestone delivers: cheap, high-leverage substrate for restart recovery, dashboard history, and the eval suite. + +**What gets built** +- Persist run state to SQLite (`runs.db` next to workspace root) with `runs`, `turns`, `agent_events`, and a `schema_meta` migration table. WAL journal mode for read concurrency. +- Startup recovery: load non-terminal runs, reconcile their Linear states, mark stale rows as `interrupted`, never duplicate-dispatch issues already inactive. +- Event-shape coverage cleanup: every event in the union is either emitted by at least one adapter with tests, or removed from the union. +- Kanban surface: an `interrupted` lifecycle column with a "resume" button calling `requestImmediateDispatch`. + +**Explicitly NOT in this milestone** +- Migration tooling beyond `PRAGMA user_version`. No Knex / Prisma / Drizzle. +- Multi-database support. SQLite only. +- Time-series storage of token totals. Last-known totals, plus deltas in `agent_events`, are sufficient. + +**Done when** Killing the daemon mid-run and restarting shows the run as `interrupted` in the kanban with full turn history; retry counts and token totals survive; restart never dispatches an issue that's already terminal in Linear. + +--- + +### Milestone 2 — Slack as the control plane + +What this milestone delivers: usable from a phone in a meeting. The product earns the description "personal AI engineering team" only after this milestone ships. + +**What gets built** +- A single Slack app with three primitives: + - `/symphony status` → Block Kit message with running issues, lifecycle pills, token meter, and links to dashboard + PRs. + - `@symphony work on ENG-123` → claim + dispatch immediately, bypassing the polling cadence (still respects slot caps + blockers). + - Threaded run output → bot posts the agent's plan as a thread reply on dispatch and the PR URL on completion. `@symphony cancel` in-thread aborts the run. +- 24/7 hosting on the Mac mini under `launchd` (`WorkingDirectory`, `ProgramArguments`, env-file load, `KeepAlive`, log paths). +- Cloudflare Tunnel from `cloudflared` outbound on the mini → `symphony.` → `127.0.0.1:4000`. `cloudflared` itself runs under `launchd`. +- Cloudflare Access protecting the dashboard routes; `/slack/*` bypassed and verified via Slack signed requests. +- `GET /healthz` with non-sensitive status (uptime, last-poll-age, last tracker error, running count, SQLite health) plus an outbound heartbeat to Healthchecks.io. + +**Explicitly NOT in this milestone** +- Slack modals, multi-channel routing, per-user prefs, voice input, message scheduling, slash subcommands beyond the three primitives. +- A native iOS/Android app. +- Tailscale (separate decision, only if SSH-from-anywhere ever matters). +- Detailed health data on the unauthenticated `/healthz` path. Use the heartbeat for liveness; gate detail behind Access. + +**Done when** From a phone, the operator can run `/symphony status` in a meeting, type `@symphony work on APP-300`, see the plan thread, and either let it run or `@symphony cancel`. The mini reboots cleanly under launchd. The kanban is reachable on the phone behind Cloudflare Access. + +--- + +### Milestone 3 — Output quality + +What this milestone delivers: every dispatch is trustworthy enough to merge without a careful diff read. Pick one sub-item at a time and let it bake before adding the next. + +**What gets built (in order, one at a time)** +- **Validation gate before completion (3.1):** before the agent calls `linear_graphql` to transition the issue, `pnpm typecheck && pnpm test` must pass. Either prompt-driven (the agent runs them) or orchestrator-driven (run them after a turn that looks complete; on failure, send stderr as the next continuation prompt). +- **Plan-then-execute split (3.2):** turn 1 produces a markdown checklist plan, posted to Linear before execution. Subsequent turns execute one item at a time. Conservative checklist parser that doesn't block runs on extraction failure. +- **Golden eval suite (3.3):** five closed Linear issues with known-good PRs replayed offline through `pnpm eval`. Compare patch size, touched files, validation result, expected-files-changed. No LLM grading. Weekly cadence via launchd. +- **Linear UX polish (3.4):** plan-as-comment before execution, per-turn progress comments (test markdown support first), PR body auto-injects `Fixes ENG-123` for Linear's GitHub auto-link, register Symphoney as a Linear Agent user with delegation-based dispatch. + +**Explicitly NOT in this milestone** +- LLM-as-judge evaluation. Deterministic artifacts only. +- Custom validation runners beyond `pnpm typecheck && pnpm test`. The workflow defines the validation command. +- Auto-merging PRs. Human still presses the merge button. + +**Done when** Three consecutive Symphoney-dispatched PRs land on `main` without a human pushing fixup commits, and the eval suite catches the next regression that would have shipped. + +--- + +## Vision-level success criteria + +Symphoney is "done" for the operator's purposes when **all** of the following are true: + +1. The operator can describe a feature in a Linear issue from their phone, walk away, and find a mergeable PR waiting when they next open GitHub. +2. The mini has been up for ≥30 days without a manual restart, and the heartbeat hasn't paged. +3. The eval suite has caught at least one regression that the operator didn't catch by reading the diff. +4. The last three roadmap items shipped were dispatched by Symphoney itself, with the operator only reviewing and merging. +5. Operating cost is dominated by agent token spend, not infrastructure. ($0 for hosting; $X for Codex/Claude usage.) + +When all five are true, Symphoney has cleared the bar of "personal AI engineering team that runs 24/7." Until then, it's a tool that the operator is still feeding by hand. diff --git a/docs/symphoney-legacy/README.md b/docs/symphoney-legacy/README.md new file mode 100644 index 0000000000..94e7dacf05 --- /dev/null +++ b/docs/symphoney-legacy/README.md @@ -0,0 +1,29 @@ +# symphoney-legacy + +Reference snapshot of `Ddell12/symphoney-codex` (the standalone TypeScript Symphony daemon) as of **2026-04-30**, captured before that repo is archived. Phase 2–4 of the consolidation port code from this snapshot into `packages/symphony/`; once Phase 5 lands and `symphoney-codex` is deleted, this directory is the only remaining record of the pre-fork codebase. + +## What's here + +- **`plans/2026-04-30-archon-symphony-consolidation.md`** — master 6-phase consolidation plan. Phases 0–1 already shipped; Phase 2 onward consumes this. +- **`incidents/2026-04-30-app-273-data-loss.md`** — incident report for the reconcile-terminal data-loss bug fixed in Phase 0 (commit `fa70be2` in `symphoney-codex`, dogfood-verified via APP-291). +- **`SPEC.md`** — OpenAI's canonical Symphony Service Specification. Source of truth for tracker/orchestrator/agent semantics; symphoney-codex's source files reference it by line number (e.g. `SPEC.md:633-634` in `runWorker`). +- **`WORKFLOW.md`** — the production daemon's runtime config (YAML front matter + Liquid prompt). Phase 2 turns this into `~/.archon/symphony.yaml` (no `agent:` block; per-state `workflow:` key). +- **`WORKFLOW.example.md`** — sample/template config. +- **`PARITY_REPORT.md`** — gap analysis vs OpenAI's Elixir reference impl. Phase 5 appends a "deprecated in favor of archon-symphony" section. +- **`ROADMAP.md`**, **`WEB_GAPS.md`**, **`PRD.md`** — broader product/engineering context. +- **`CLAUDE.md`** — symphoney-codex's architecture reference (the ESM/`.js`-imports/`pool: forks`/etc. conventions). Phase 2 should consult this when porting orchestrator + tracker. +- **`symphoney-readme.md`** — top-level README. +- **`src/`** — full source snapshot. Phase 2 ports `tracker/`, `orchestrator/`, `config/snapshot.ts`, `workflow/parse.ts` into `packages/symphony/src/`. `publisher/pr.ts` and `agent/linear-graphql-tool.ts` may also port (Phase 3 decides). `agent/{stdio-client,claude-client,claude-adapter,fake-client}.ts` and `agent/factory.ts` are NOT ported — Archon's `packages/providers/` already covers these. `server/http.ts` and `server/dashboard.ts` are NOT ported — Archon's `packages/server/` owns the HTTP layer. +- **`test/`** — full test snapshot. Patterns to mirror: `test/integration/orchestrator.test.ts` (polling + dispatch + retry), `test/integration/orchestrator-reconcile-publish.test.ts` (Phase 0 publish-before-remove tests), `test/helpers/fake-tracker.ts`. Tests use `vitest`; archon uses `bun:test` — port the patterns, not the syntax. +- **`scripts/smoke-claude.ts`**, **`scripts/smoke-linear-graphql.ts`** — real-API smoke scripts. Useful as references for Phase 2's tracker validation; archon has its own provider smoke scripts so don't port directly. + +## What's NOT here (intentionally) + +- `node_modules/`, `dist/`, `pnpm-lock.yaml`, `package.json`, `tsconfig.json`, `vitest.config.ts` — pnpm/Vitest mechanics. Archon uses Bun. +- `web/` — the Next.js kanban being retired. Phase 4 ports its grouping/transform helpers into `packages/web/src/components/symphony/` directly from the live source while symphoney-codex still exists. +- `bin/symphony` — symphoney's CLI entry point. Archon owns its own CLI. +- `.env` — credentials. Use `~/.archon/symphony.yaml` + your local `.env` instead. + +## Stable artifact warning + +This snapshot is **frozen at the point it was committed**. If any of these files materially diverge in symphoney-codex before that repo is archived, the user is responsible for re-syncing. The plan file in particular gets edited as phases land — keep the symphoney-codex copy authoritative until Phase 5. diff --git a/docs/symphoney-legacy/ROADMAP.md b/docs/symphoney-legacy/ROADMAP.md new file mode 100644 index 0000000000..295761e8e5 --- /dev/null +++ b/docs/symphoney-legacy/ROADMAP.md @@ -0,0 +1,208 @@ +# Symphony Roadmap + +Long-arc plan for moving this build from "spec-conformant prototype" to "personal tool that feels production." Read top-to-bottom; each wave assumes the previous is done. File:line references point at the current code so future-you can find the touch-points fast. + +Companion docs: `SPEC.md` is the source of truth. `PARITY_REPORT.md` is historical and now stale in a few places: `linear_graphql`, continuation prompts, structured agent-event logs, Claude cache-token accounting, and `before_remove` hooks are already implemented in this checkout. + +--- + +## Goals + +- Run 24/7 in the background, controllable from Slack on any device. +- Ensure every dispatch produces reviewable output or fails loudly. +- Persist enough state that restarts are invisible and history is queryable. +- Improve output quality gradually: validation gates, plan-then-execute, eval suite. + +Out of scope (deliberately): metrics stack, multi-user auth, generic webhook system, the SSH worker extension from the spec. + +**North star:** Symphony works on itself. Wave 0 is the hand-coded bootstrap that makes that safe; every later item should ship as a Linear issue dispatched through Symphony and reviewed as a PR. + +--- + +## Wave 0 - Bootstrap self-work safely + +Goal: by the end of Wave 0, the next remaining roadmap item is a Linear issue, an agent picks it up in a real git worktree, and the output becomes a PR against `Ddell12/symphoney-codex`. + +Run Wave 0 by hand. Do not seed issues for items already implemented. + +### 0.1 Dedicated Linear project +- Create a new Linear project **"Symphony"** in the `dell-omni-group` org. Do not reuse the Symphony Smoke sandbox (`d0ef0b50e836` in current `WORKFLOW.md`). +- States must include `Todo`, `In Progress`, `Done`, `Cancelled`. Active states should stay `Todo` + `In Progress`; terminal states should include `Done`, `Closed`, `Cancelled`, `Canceled`, `Duplicate`. +- Update `WORKFLOW.md:tracker.project_slug` to the new project's `slugId`. + +### 0.2 Seed only remaining work +- Do **not** create issues for old Wave 1.1 or 1.2. They are already shipped: +- `linear_graphql` exists in `src/agent/linear-graphql-tool.ts` and is wired into `src/agent/stdio-client.ts` + `src/agent/claude-client.ts`. +- Continuation prompts already use `snap.agent.continuation_prompt` on turns 2..N in `src/orchestrator/orchestrator.ts:392-405`. +- Structured `agent_event` pino lines and Claude cache-token accounting are already present. +- Seed the first real queue in this order: hook context/worktree hardening, PR publisher/backlink flow, SQLite persistence, `/healthz`, Slack control plane. + +### 0.3 Make hooks workspace-aware before relying on hook scripts +- Current hook execution can accept `env`, but call sites do not pass issue/workspace variables. Do not write hooks that assume `$WORKSPACE_PATH` or `$ISSUE_IDENTIFIER` until this is fixed. +- Extend hook call sites to pass at least `WORKSPACE_PATH`, `ISSUE_ID`, `ISSUE_IDENTIFIER`, `ISSUE_TITLE`, `ATTEMPT`, and `WORKFLOW_PATH`. +- Where: `src/workspace/manager.ts:createForIssue`, `src/workspace/manager.ts:removeForIssue`, `src/orchestrator/orchestrator.ts:runWorker`, and `src/workspace/hooks.ts`. + +### 0.4 Workspace = git worktree, not blank dir +- Replace the current `after_create` hook (`WORKFLOW.md:23-25`) only after 0.3 exists: + ```sh + git -C ~/symphony-dev/symphoney-codex fetch origin main + git -C ~/symphony-dev/symphoney-codex worktree add "$WORKSPACE_PATH" -b "sym/$ISSUE_IDENTIFIER" origin/main + ``` +- Extend `before_run` to run `pnpm install --frozen-lockfile` and `pnpm typecheck` so the agent starts in a known-good workspace. +- Add `before_remove` cleanup: + ```sh + git -C ~/symphony-dev/symphoney-codex worktree remove --force "$WORKSPACE_PATH" + ``` +- Caveat: `--force` discards uncommitted workspace changes. Only remove worktrees after PR publishing/backlinking has succeeded, or when intentionally cleaning terminal/stale work. +- `src/workspace/safety.ts` pins workspace paths under `workspace.root`, but it is path-safety only. It is not an OS sandbox. + +### 0.5 Separate prod and dev checkouts +- **`~/symphony-dev/symphoney-codex`**: the checkout whose git repository owns agent worktrees. Do not run the daemon from here. +- **`~/symphony-prod/symphoney-codex`**: the checkout the daemon and `pnpm start` run from. Update only through explicit `git pull && pnpm build && launchctl kickstart`. +- Correct model: linked worktrees share the git object database and refs, but each worktree has its own working tree and index. The real prod risk is not a shared index; it is running the daemon from a checkout that agents can edit or reload underneath it. +- Agents can still read or mutate prod paths if host permissions allow it. If prod isolation matters, run the daemon and agent workers under separate OS users or a real sandbox. + +### 0.6 PR creation and Linear backlink flow +- Do not implement this as a bare `after_run` hook that calls `linear_graphql`. `after_run` runs after `session.stop()`, is best-effort, and cannot feed results back into the same agent session. +- Implement a first-class PR publisher in daemon code or a dedicated script invoked by daemon code. It should: +- Verify the worktree is on branch `sym/`. +- Require either committed changes ahead of `origin/main` or a deliberate "no changes" result. +- Run `pnpm typecheck` at minimum before publishing. +- Push with `git push -u origin "sym/$ISSUE_IDENTIFIER"`. +- Run `gh pr create --fill --base main --head "sym/$ISSUE_IDENTIFIER" --body "Fixes $ISSUE_IDENTIFIER\n\nDispatched by Symphony."`. +- Post the PR URL back to Linear using daemon-owned GraphQL/tracker code, or have the agent post it before it transitions the issue. +- Fail loudly if `gh` is not authenticated, the branch is dirty, validation fails, or no PR URL is produced. + +### 0.7 First-dispatch ceremony +- Set `agent.max_concurrent_agents: 1` and `agent.max_turns: 12` for the first three dogfood runs. Watch them end-to-end. +- First dispatch after Wave 0: **Wave 1.1 SQLite persistence**. +- Second dispatch: **Wave 2.4 `/healthz` + heartbeat** if operational visibility is the bottleneck, or **Wave 3.1 validation gate** if output quality is the bottleneck. +- After three successful dogfood PRs, restore `max_concurrent_agents: 4` and `max_turns: 20`. + +### Safety notes +- Agents edit assigned worktrees, not the prod checkout. Enforce this with filesystem permissions if you need a hard guarantee. +- Do not queue issues that say "modify the running daemon and reload." Changes ship via PR -> merge -> manual prod restart. +- If a self-modification PR breaks `main`, prod is unaffected until you explicitly pull and restart prod. + +--- + +## Wave 1 - Durable state and history + +Cheap, high-leverage substrate for Slack, dashboard history, restart recovery, and evals. + +### 1.1 Persist run state to SQLite +- **Why:** runtime state is still in-memory in `src/orchestrator/state.ts:49-56`; restart loses running entries, retry counts, and token totals. +- **What:** one `runs.db` next to the workspace root. Tables: `runs`, `turns`, `agent_events`, and a tiny `schema_meta`/`PRAGMA user_version` migration path. +- Use `better-sqlite3`, but do not treat "no migration framework" as "no migrations." Add idempotent migrations and set `PRAGMA journal_mode = WAL` for dashboard/daemon read concurrency. +- Acceptance: kill the daemon mid-run, restart, dashboard/API shows the run as `interrupted` with full turn history; retry counts and token totals survive. + +### 1.2 Startup recovery semantics +- On startup, load non-terminal runs from SQLite, reconcile their Linear states, and mark stale running rows as `interrupted`. +- Clear stale in-memory claims from the previous process; do not dispatch duplicate work for issues already inactive or terminal in Linear. +- Add tests that simulate daemon crash after turn events are written but before `worker_exit_normal`. + +### 1.3 Event-shape coverage cleanup +- Remaining parity gap: adapter coverage for `turn_ended_with_error`, `approval_auto_approved`, and related protocol-specific events is still uneven. +- Where: `src/agent/events.ts`, `src/agent/stdio-client.ts:577-610`, `src/agent/claude-adapter.ts`. +- Acceptance: event union members are either emitted by at least one adapter with tests or removed/documented as unsupported. + +--- + +## Wave 2 - Slack as the control plane + +Goal: usable from a phone in a meeting. Single Slack app, not a platform. + +### 2.1 Slack bot - three primitives only +- **`/symphony status`** -> Block Kit message with running issues, phase pill, token meter, and links to dashboard/PRs. +- **`@symphony work on ENG-123`** -> claim + dispatch immediately, bypassing polling. +- **Threaded run output** -> bot posts plan as a thread reply on dispatch and summary/PR URL on completion. User can `@symphony cancel` in-thread. +- Use standard Block Kit first. Slack Card and Alert blocks exist, but verify surface support in Block Kit Builder before using them in messages. Work Objects are a separate unfurl/flexpane feature; use them later for Symphony/Linear URL previews, not as a blocker for v1 status messages. +- Required implementation details: verify Slack signed requests using the raw body, respond to slash commands within Slack's 3-second ack window, handle retries idempotently, and add tests for signature failure. +- Do not build yet: modals, multi-channel routing, per-user prefs, voice input. One channel + DMs is enough. + +### 2.2 24/7 hosting on the Mac mini +- **Decision:** Mac mini, not MacBook (sleeps), not VPS (Claude OAuth and local-worktree simplicity matter more than cloud portability). +- Use `launchd`. A user `LaunchAgent` is fine if the Mac mini is logged in; use a `LaunchDaemon` only if it must run before user login. +- The plist should set `WorkingDirectory`, tokenized `ProgramArguments`, `EnvironmentVariables`/env file loading strategy, `KeepAlive`, `StandardOutPath`, and `StandardErrorPath`. +- `pino-roll` is not currently a dependency. Either add it deliberately or rely on `launchd` log paths plus macOS log rotation/newsyslog. +- Verify before committing: `claude login` works on the mini and the selected agent backend picks up OAuth credentials in the launchd environment. + +### 2.3 Cloudflare Tunnel + Cloudflare Access +- **Decision:** Cloudflare Tunnel is the right fit for Slack webhooks because Slack needs a public HTTPS URL and `cloudflared` can expose `127.0.0.1:4000` with outbound-only connectivity. +- Setup: `cloudflared` outbound from the mini -> `symphony.yourdomain.com` -> `127.0.0.1:4000`. Run `cloudflared` itself under launchd. +- Protect dashboard routes with Cloudflare Access. Bypass Access only for Slack webhook routes such as `/slack/*`, and rely on Slack signature verification there. +- Cloudflare One's free tier is currently suitable for small personal use, but re-check pricing/seat limits before depending on it for more users. +- Optional: add Tailscale separately only if you want SSH-from-anywhere into the mini. Do not add it just for Symphony HTTP access. + +### 2.4 Healthcheck + dead-man's switch +- Add `GET /healthz`. It should return non-sensitive status: process uptime, config loaded, last successful poll age, last tracker error, running count, and SQLite health. +- Prefer an outbound heartbeat to healthchecks.io/UptimeRobot-style monitoring if possible; it avoids exposing unauthenticated health data publicly. +- If external polling is used, expose `/healthz` through Cloudflare with a shared secret/header or a narrow bypass rule. Do not put detailed dashboard state on the unauthenticated health path. + +--- + +## Wave 3 - Output quality + +Pick one and let it bake before adding the next. These are what eventually push toward "consistently production-quality." + +### 3.1 Validation gate before completion +- Do **not** implement this as `after_run`. `after_run` happens after the session is stopped and failures are ignored, so it cannot feed stderr back as a continuation turn. +- Implement validation before final completion/PR publishing. Options: +- Prompt-only v1: require the agent to run `pnpm typecheck && pnpm test` before calling `linear_graphql` to transition the issue. +- Orchestrator v2: after a turn that appears complete, run validation while the session is still active; on failure, send stderr as the next continuation prompt and keep the issue active. +- Acceptance: agent is not considered done until validation passes or the run explicitly fails with reviewable logs. + +### 3.2 Plan-then-execute split +- Turn 1 produces a structured plan as a markdown checklist; subsequent turns execute one item at a time. +- Post the plan to Linear before execution so humans can cancel or redirect early. +- Where: prompt template in `WORKFLOW.md` plus a small parser for checklist extraction. Keep the parser conservative; if extraction fails, continue without blocking the run. + +### 3.3 Golden eval suite +- Pick 5 closed Linear issues with known-good PRs. `pnpm eval` replays them through Symphony offline with a fake tracker and compares the produced patch to the merged patch. +- Do not grade with an LLM. Track regression on prompt/model/runtime changes with deterministic artifacts: patch size, touched files, validation result, and whether expected files changed. +- Cadence: weekly via launchd or CI, not an unspecified `/schedule` skill. + +### 3.4 Linear UX polish +- Plan-as-comment before execution. +- Per-turn progress comments. Test Linear markdown support before relying on collapsed `
`; if unsupported, use compact status comments. +- PR body auto-injects `Fixes ENG-123` so Linear's GitHub integration auto-links. +- Register Symphony as a first-class Linear Agent user. Linear supports agent delegation, but the current tracker only filters by project/state; add tracker support for agent/delegation filtering before making assignment the dispatch trigger. + +--- + +## What to pick up first + +**Wave 0, hand-coded, one session.** Required scope: hook context env, worktree setup, prod/dev checkout split, and a real PR/backlink publisher that fails loudly. + +Then dispatch **Wave 1.1 SQLite persistence** as the first dogfood issue. The old first issues (`linear_graphql` and continuation prompts) are already done. + +After Wave 1.1 ships and bakes, choose one: +- **Wave 2** if the bottleneck is "I can't see/control it from my phone." +- **Wave 3.1** if the bottleneck is "the output isn't trustworthy yet." + +Do not do both at once. + +--- + +## Current Implementation Status + +Verified in the current checkout: + +1. `linear_graphql` client-side tool exists and is wired for both Codex stdio and Claude MCP. +2. Continuation turns use `agent.continuation_prompt`. +3. Spec-listed agent events get structured `agent_event` pino lines. +4. Claude usage includes cache creation/read token fields. +5. `before_remove` hook support exists. +6. HTTP dashboard/API exists at `/`, `/api/v1/state`, `/api/v1/refresh`, and `/api/v1/:identifier`. +7. No SQLite persistence exists yet. +8. No Slack routes exist yet. +9. No `/healthz` exists yet. + +Verification commands: + +```sh +pnpm typecheck +pnpm test +``` + +`pnpm test` requires permission to bind localhost for `test/integration/http.test.ts`; without that, sandboxed runs can fail with `listen EPERM`. diff --git a/docs/symphoney-legacy/SPEC.md b/docs/symphoney-legacy/SPEC.md new file mode 100644 index 0000000000..adb8bc59a8 --- /dev/null +++ b/docs/symphoney-legacy/SPEC.md @@ -0,0 +1,2169 @@ +# Symphony Service Specification + +Status: Draft v1 (language-agnostic) + +Purpose: Define a service that orchestrates coding agents to get project work done. + +## Normative Language + +The key words `MUST`, `MUST NOT`, `REQUIRED`, `SHOULD`, `SHOULD NOT`, `RECOMMENDED`, `MAY`, and +`OPTIONAL` in this document are to be interpreted as described in RFC 2119. + +`Implementation-defined` means the behavior is part of the implementation contract, but this +specification does not prescribe one universal policy. Implementations MUST document the selected +behavior. + +## 1. Problem Statement + +Symphony is a long-running automation service that continuously reads work from an issue tracker +(Linear in this specification version), creates an isolated workspace for each issue, and runs a +coding agent session for that issue inside the workspace. + +The service solves four operational problems: + +- It turns issue execution into a repeatable daemon workflow instead of manual scripts. +- It isolates agent execution in per-issue workspaces so agent commands run only inside per-issue + workspace directories. +- It keeps the workflow policy in-repo (`WORKFLOW.md`) so teams version the agent prompt and runtime + settings with their code. +- It provides enough observability to operate and debug multiple concurrent agent runs. + +Implementations are expected to document their trust and safety posture explicitly. This +specification does not require a single approval, sandbox, or operator-confirmation policy; some +implementations target trusted environments with a high-trust configuration, while others require +stricter approvals or sandboxing. + +Important boundary: + +- Symphony is a scheduler/runner and tracker reader. +- Ticket writes (state transitions, comments, PR links) are typically performed by the coding agent + using tools available in the workflow/runtime environment. +- A successful run can end at a workflow-defined handoff state (for example `Human Review`), not + necessarily `Done`. + +## 2. Goals and Non-Goals + +### 2.1 Goals + +- Poll the issue tracker on a fixed cadence and dispatch work with bounded concurrency. +- Maintain a single authoritative orchestrator state for dispatch, retries, and reconciliation. +- Create deterministic per-issue workspaces and preserve them across runs. +- Stop active runs when issue state changes make them ineligible. +- Recover from transient failures with exponential backoff. +- Load runtime behavior from a repository-owned `WORKFLOW.md` contract. +- Expose operator-visible observability (at minimum structured logs). +- Support tracker/filesystem-driven restart recovery without requiring a persistent database; exact + in-memory scheduler state is not restored. + +### 2.2 Non-Goals + +- Rich web UI or multi-tenant control plane. +- Prescribing a specific dashboard or terminal UI implementation. +- General-purpose workflow engine or distributed job scheduler. +- Built-in business logic for how to edit tickets, PRs, or comments. (That logic lives in the + workflow prompt and agent tooling.) +- Mandating strong sandbox controls beyond what the coding agent and host OS provide. +- Mandating a single default approval, sandbox, or operator-confirmation posture for all + implementations. + +## 3. System Overview + +### 3.1 Main Components + +1. `Workflow Loader` + - Reads `WORKFLOW.md`. + - Parses YAML front matter and prompt body. + - Returns `{config, prompt_template}`. + +2. `Config Layer` + - Exposes typed getters for workflow config values. + - Applies defaults and environment variable indirection. + - Performs validation used by the orchestrator before dispatch. + +3. `Issue Tracker Client` + - Fetches candidate issues in active states. + - Fetches current states for specific issue IDs (reconciliation). + - Fetches terminal-state issues during startup cleanup. + - Normalizes tracker payloads into a stable issue model. + +4. `Orchestrator` + - Owns the poll tick. + - Owns the in-memory runtime state. + - Decides which issues to dispatch, retry, stop, or release. + - Tracks session metrics and retry queue state. + +5. `Workspace Manager` + - Maps issue identifiers to workspace paths. + - Ensures per-issue workspace directories exist. + - Runs workspace lifecycle hooks. + - Cleans workspaces for terminal issues. + +6. `Agent Runner` + - Creates workspace. + - Builds prompt from issue + workflow template. + - Launches the coding agent app-server client. + - Streams agent updates back to the orchestrator. + +7. `Status Surface` (OPTIONAL) + - Presents human-readable runtime status (for example terminal output, dashboard, or other + operator-facing view). + +8. `Logging` + - Emits structured runtime logs to one or more configured sinks. + +### 3.2 Abstraction Levels + +Symphony is easiest to port when kept in these layers: + +1. `Policy Layer` (repo-defined) + - `WORKFLOW.md` prompt body. + - Team-specific rules for ticket handling, validation, and handoff. + +2. `Configuration Layer` (typed getters) + - Parses front matter into typed runtime settings. + - Handles defaults, environment tokens, and path normalization. + +3. `Coordination Layer` (orchestrator) + - Polling loop, issue eligibility, concurrency, retries, reconciliation. + +4. `Execution Layer` (workspace + agent subprocess) + - Filesystem lifecycle, workspace preparation, coding-agent protocol. + +5. `Integration Layer` (Linear adapter) + - API calls and normalization for tracker data. + +6. `Observability Layer` (logs + OPTIONAL status surface) + - Operator visibility into orchestrator and agent behavior. + +### 3.3 External Dependencies + +- Issue tracker API (Linear for `tracker.kind: linear` in this specification version). +- Local filesystem for workspaces and logs. +- OPTIONAL workspace population tooling (for example Git CLI, if used). +- Coding-agent executable that supports the targeted Codex app-server mode. +- Host environment authentication for the issue tracker and coding agent. + +## 4. Core Domain Model + +### 4.1 Entities + +#### 4.1.1 Issue + +Normalized issue record used by orchestration, prompt rendering, and observability output. + +Fields: + +- `id` (string) + - Stable tracker-internal ID. +- `identifier` (string) + - Human-readable ticket key (example: `ABC-123`). +- `title` (string) +- `description` (string or null) +- `priority` (integer or null) + - Lower numbers are higher priority in dispatch sorting. +- `state` (string) + - Current tracker state name. +- `branch_name` (string or null) + - Tracker-provided branch metadata if available. +- `url` (string or null) +- `labels` (list of strings) + - Normalized to lowercase. +- `blocked_by` (list of blocker refs) + - Each blocker ref contains: + - `id` (string or null) + - `identifier` (string or null) + - `state` (string or null) +- `created_at` (timestamp or null) +- `updated_at` (timestamp or null) + +#### 4.1.2 Workflow Definition + +Parsed `WORKFLOW.md` payload: + +- `config` (map) + - YAML front matter root object. +- `prompt_template` (string) + - Markdown body after front matter, trimmed. + +#### 4.1.3 Service Config (Typed View) + +Typed runtime values derived from `WorkflowDefinition.config` plus environment resolution. + +Examples: + +- poll interval +- workspace root +- active and terminal issue states +- concurrency limits +- coding-agent executable/args/timeouts +- workspace hooks + +#### 4.1.4 Workspace + +Filesystem workspace assigned to one issue identifier. + +Fields (logical): + +- `path` (absolute workspace path) +- `workspace_key` (sanitized issue identifier) +- `created_now` (boolean, used to gate `after_create` hook) + +#### 4.1.5 Run Attempt + +One execution attempt for one issue. + +Fields (logical): + +- `issue_id` +- `issue_identifier` +- `attempt` (integer or null, `null` for first run, `>=1` for retries/continuation) +- `workspace_path` +- `started_at` +- `status` +- `error` (OPTIONAL) + +#### 4.1.6 Live Session (Agent Session Metadata) + +State tracked while a coding-agent subprocess is running. + +Fields: + +- `session_id` (string, `-`) +- `thread_id` (string) +- `turn_id` (string) +- `codex_app_server_pid` (string or null) +- `last_codex_event` (string/enum or null) +- `last_codex_timestamp` (timestamp or null) +- `last_codex_message` (summarized payload) +- `codex_input_tokens` (integer) +- `codex_output_tokens` (integer) +- `codex_total_tokens` (integer) +- `last_reported_input_tokens` (integer) +- `last_reported_output_tokens` (integer) +- `last_reported_total_tokens` (integer) +- `turn_count` (integer) + - Number of coding-agent turns started within the current worker lifetime. + +#### 4.1.7 Retry Entry + +Scheduled retry state for an issue. + +Fields: + +- `issue_id` +- `identifier` (best-effort human ID for status surfaces/logs) +- `attempt` (integer, 1-based for retry queue) +- `due_at_ms` (monotonic clock timestamp) +- `timer_handle` (runtime-specific timer reference) +- `error` (string or null) + +#### 4.1.8 Orchestrator Runtime State + +Single authoritative in-memory state owned by the orchestrator. + +Fields: + +- `poll_interval_ms` (current effective poll interval) +- `max_concurrent_agents` (current effective global concurrency limit) +- `running` (map `issue_id -> running entry`) +- `claimed` (set of issue IDs reserved/running/retrying) +- `retry_attempts` (map `issue_id -> RetryEntry`) +- `completed` (set of issue IDs; bookkeeping only, not dispatch gating) +- `codex_totals` (aggregate tokens + runtime seconds) +- `codex_rate_limits` (latest rate-limit snapshot from agent events) + +### 4.2 Stable Identifiers and Normalization Rules + +- `Issue ID` + - Use for tracker lookups and internal map keys. +- `Issue Identifier` + - Use for human-readable logs and workspace naming. +- `Workspace Key` + - Derive from `issue.identifier` by replacing any character not in `[A-Za-z0-9._-]` with `_`. + - Use the sanitized value for the workspace directory name. +- `Normalized Issue State` + - Compare states after `lowercase`. +- `Session ID` + - Compose from coding-agent `thread_id` and `turn_id` as `-`. + +## 5. Workflow Specification (Repository Contract) + +### 5.1 File Discovery and Path Resolution + +Workflow file path precedence: + +1. Explicit application/runtime setting (set by CLI startup path). +2. Default: `WORKFLOW.md` in the current process working directory. + +Loader behavior: + +- If the file cannot be read, return `missing_workflow_file` error. +- The workflow file is expected to be repository-owned and version-controlled. + +### 5.2 File Format + +`WORKFLOW.md` is a Markdown file with OPTIONAL YAML front matter. + +Design note: + +- `WORKFLOW.md` SHOULD be self-contained enough to describe and run different workflows (prompt, + runtime settings, hooks, and tracker selection/config) without requiring out-of-band + service-specific configuration. + +Parsing rules: + +- If file starts with `---`, parse lines until the next `---` as YAML front matter. +- Remaining lines become the prompt body. +- If front matter is absent, treat the entire file as prompt body and use an empty config map. +- YAML front matter MUST decode to a map/object; non-map YAML is an error. +- Prompt body is trimmed before use. + +Returned workflow object: + +- `config`: front matter root object (not nested under a `config` key). +- `prompt_template`: trimmed Markdown body. + +### 5.3 Front Matter Schema + +Top-level keys: + +- `tracker` +- `polling` +- `workspace` +- `hooks` +- `agent` +- `codex` + +Unknown keys SHOULD be ignored for forward compatibility. + +Note: + +- The workflow front matter is extensible. Extensions MAY define additional top-level keys without + changing the core schema above. +- Extensions SHOULD document their field schema, defaults, validation rules, and whether changes + apply dynamically or require restart. + +#### 5.3.1 `tracker` (object) + +Fields: + +- `kind` (string) + - REQUIRED for dispatch. + - Current supported value: `linear` +- `endpoint` (string) + - Default for `tracker.kind == "linear"`: `https://api.linear.app/graphql` +- `api_key` (string) + - MAY be a literal token or `$VAR_NAME`. + - Canonical environment variable for `tracker.kind == "linear"`: `LINEAR_API_KEY`. + - If `$VAR_NAME` resolves to an empty string, treat the key as missing. +- `project_slug` (string) + - REQUIRED for dispatch when `tracker.kind == "linear"`. +- `active_states` (list of strings) + - Default: `Todo`, `In Progress` +- `terminal_states` (list of strings) + - Default: `Closed`, `Cancelled`, `Canceled`, `Duplicate`, `Done` + +#### 5.3.2 `polling` (object) + +Fields: + +- `interval_ms` (integer) + - Default: `30000` + - Changes SHOULD be re-applied at runtime and affect future tick scheduling without restart. + +#### 5.3.3 `workspace` (object) + +Fields: + +- `root` (path string or `$VAR`) + - Default: `/symphony_workspaces` + - `~` is expanded. + - Relative paths are resolved relative to the directory containing `WORKFLOW.md`. + - The effective workspace root is normalized to an absolute path before use. + +#### 5.3.4 `hooks` (object) + +Fields: + +- `after_create` (multiline shell script string, OPTIONAL) + - Runs only when a workspace directory is newly created. + - Failure aborts workspace creation. +- `before_run` (multiline shell script string, OPTIONAL) + - Runs before each agent attempt after workspace preparation and before launching the coding + agent. + - Failure aborts the current attempt. +- `after_run` (multiline shell script string, OPTIONAL) + - Runs after each agent attempt (success, failure, timeout, or cancellation) once the workspace + exists. + - Failure is logged but ignored. +- `before_remove` (multiline shell script string, OPTIONAL) + - Runs before workspace deletion if the directory exists. + - Failure is logged but ignored; cleanup still proceeds. +- `timeout_ms` (integer, OPTIONAL) + - Default: `60000` + - Applies to all workspace hooks. + - Invalid values fail configuration validation. + - Changes SHOULD be re-applied at runtime for future hook executions. + +#### 5.3.5 `agent` (object) + +Fields: + +- `max_concurrent_agents` (integer) + - Default: `10` + - Changes SHOULD be re-applied at runtime and affect subsequent dispatch decisions. +- `max_turns` (positive integer) + - Default: `20` + - Limits the number of coding-agent turns within one worker session. + - Invalid values fail configuration validation. +- `max_retry_backoff_ms` (integer) + - Default: `300000` (5 minutes) + - Changes SHOULD be re-applied at runtime and affect future retry scheduling. +- `max_concurrent_agents_by_state` (map `state_name -> positive integer`) + - Default: empty map. + - State keys are normalized (`lowercase`) for lookup. + - Invalid entries (non-positive or non-numeric) are ignored. + +#### 5.3.6 `codex` (object) + +Fields: + +For Codex-owned config values such as `approval_policy`, `thread_sandbox`, and +`turn_sandbox_policy`, supported values are defined by the targeted Codex app-server version. +Implementors SHOULD treat them as pass-through Codex config values rather than relying on a +hand-maintained enum in this spec. To inspect the installed Codex schema, run +`codex app-server generate-json-schema --out ` and inspect the relevant definitions referenced +by `v2/ThreadStartParams.json` and `v2/TurnStartParams.json`. Implementations MAY validate these +fields locally if they want stricter startup checks. + +- `command` (string shell command) + - Default: `codex app-server` + - The runtime launches this command via `bash -lc` in the workspace directory. + - The launched process MUST speak a compatible app-server protocol over stdio. +- `approval_policy` (Codex `AskForApproval` value) + - Default: implementation-defined. +- `thread_sandbox` (Codex `SandboxMode` value) + - Default: implementation-defined. +- `turn_sandbox_policy` (Codex `SandboxPolicy` value) + - Default: implementation-defined. +- `turn_timeout_ms` (integer) + - Default: `3600000` (1 hour) +- `read_timeout_ms` (integer) + - Default: `5000` +- `stall_timeout_ms` (integer) + - Default: `300000` (5 minutes) + - If `<= 0`, stall detection is disabled. + +### 5.4 Prompt Template Contract + +The Markdown body of `WORKFLOW.md` is the per-issue prompt template. + +Rendering requirements: + +- Use a strict template engine (Liquid-compatible semantics are sufficient). +- Unknown variables MUST fail rendering. +- Unknown filters MUST fail rendering. + +Template input variables: + +- `issue` (object) + - Includes all normalized issue fields, including labels and blockers. +- `attempt` (integer or null) + - `null`/absent on first attempt. + - Integer on retry or continuation run. + +Fallback prompt behavior: + +- If the workflow prompt body is empty, the runtime MAY use a minimal default prompt + (`You are working on an issue from Linear.`). +- Workflow file read/parse failures are configuration/validation errors and SHOULD NOT silently fall + back to a prompt. + +### 5.5 Workflow Validation and Error Surface + +Error classes: + +- `missing_workflow_file` +- `workflow_parse_error` +- `workflow_front_matter_not_a_map` +- `template_parse_error` (during prompt rendering) +- `template_render_error` (unknown variable/filter, invalid interpolation) + +Dispatch gating behavior: + +- Workflow file read/YAML errors block new dispatches until fixed. +- Template errors fail only the affected run attempt. + +## 6. Configuration Specification + +### 6.1 Configuration Resolution Pipeline + +Configuration is resolved in this order: + +1. Select the workflow file path (explicit runtime setting, otherwise cwd default). +2. Parse YAML front matter into a raw config map. +3. Apply built-in defaults for missing OPTIONAL fields. +4. Resolve `$VAR_NAME` indirection only for config values that explicitly contain `$VAR_NAME`. +5. Coerce and validate typed values. + +Environment variables do not globally override YAML values. They are used only when a config value +explicitly references them. + +Value coercion semantics: + +- Path/command fields support: + - `~` home expansion + - `$VAR` expansion for env-backed path values + - Apply expansion only to values intended to be local filesystem paths; do not rewrite URIs or + arbitrary shell command strings. +- Relative `workspace.root` values resolve relative to the directory containing the selected + `WORKFLOW.md`. + +### 6.2 Dynamic Reload Semantics + +Dynamic reload is REQUIRED: + +- The software MUST detect `WORKFLOW.md` changes. +- On change, it MUST re-read and re-apply workflow config and prompt template without restart. +- The software MUST attempt to adjust live behavior to the new config (for example polling + cadence, concurrency limits, active/terminal states, codex settings, workspace paths/hooks, and + prompt content for future runs). +- Reloaded config applies to future dispatch, retry scheduling, reconciliation decisions, hook + execution, and agent launches. +- Implementations are not REQUIRED to restart in-flight agent sessions automatically when config + changes. +- Extensions that manage their own listeners/resources (for example an HTTP server port change) MAY + require restart unless the implementation explicitly supports live rebind. +- Implementations SHOULD also re-validate/reload defensively during runtime operations (for example + before dispatch) in case filesystem watch events are missed. +- Invalid reloads MUST NOT crash the service; keep operating with the last known good effective + configuration and emit an operator-visible error. + +### 6.3 Dispatch Preflight Validation + +This validation is a scheduler preflight run before attempting to dispatch new work. It validates +the workflow/config needed to poll and launch workers, not a full audit of all possible workflow +behavior. + +Startup validation: + +- Validate configuration before starting the scheduling loop. +- If startup validation fails, fail startup and emit an operator-visible error. + +Per-tick dispatch validation: + +- Re-validate before each dispatch cycle. +- If validation fails, skip dispatch for that tick, keep reconciliation active, and emit an + operator-visible error. + +Validation checks: + +- Workflow file can be loaded and parsed. +- `tracker.kind` is present and supported. +- `tracker.api_key` is present after `$` resolution. +- `tracker.project_slug` is present when REQUIRED by the selected tracker kind. +- `codex.command` is present and non-empty. + +### 6.4 Core Config Fields Summary (Cheat Sheet) + +This section is intentionally redundant so a coding agent can implement the config layer quickly. +Extension fields are documented in the extension section that defines them. Core conformance does +not require recognizing or validating extension fields unless that extension is implemented. + +- `tracker.kind`: string, REQUIRED, currently `linear` +- `tracker.endpoint`: string, default `https://api.linear.app/graphql` when `tracker.kind=linear` +- `tracker.api_key`: string or `$VAR`, canonical env `LINEAR_API_KEY` when `tracker.kind=linear` +- `tracker.project_slug`: string, REQUIRED when `tracker.kind=linear` +- `tracker.active_states`: list of strings, default `["Todo", "In Progress"]` +- `tracker.terminal_states`: list of strings, default `["Closed", "Cancelled", "Canceled", "Duplicate", "Done"]` +- `polling.interval_ms`: integer, default `30000` +- `workspace.root`: path resolved to absolute, default `/symphony_workspaces` +- `hooks.after_create`: shell script or null +- `hooks.before_run`: shell script or null +- `hooks.after_run`: shell script or null +- `hooks.before_remove`: shell script or null +- `hooks.timeout_ms`: integer, default `60000` +- `agent.max_concurrent_agents`: integer, default `10` +- `agent.max_turns`: integer, default `20` +- `agent.max_retry_backoff_ms`: integer, default `300000` (5m) +- `agent.max_concurrent_agents_by_state`: map of positive integers, default `{}` +- `codex.command`: shell command string, default `codex app-server` +- `codex.approval_policy`: Codex `AskForApproval` value, default implementation-defined +- `codex.thread_sandbox`: Codex `SandboxMode` value, default implementation-defined +- `codex.turn_sandbox_policy`: Codex `SandboxPolicy` value, default implementation-defined +- `codex.turn_timeout_ms`: integer, default `3600000` +- `codex.read_timeout_ms`: integer, default `5000` +- `codex.stall_timeout_ms`: integer, default `300000` + +## 7. Orchestration State Machine + +The orchestrator is the only component that mutates scheduling state. All worker outcomes are +reported back to it and converted into explicit state transitions. + +### 7.1 Issue Orchestration States + +This is not the same as tracker states (`Todo`, `In Progress`, etc.). This is the service's internal +claim state. + +1. `Unclaimed` + - Issue is not running and has no retry scheduled. + +2. `Claimed` + - Orchestrator has reserved the issue to prevent duplicate dispatch. + - In practice, claimed issues are either `Running` or `RetryQueued`. + +3. `Running` + - Worker task exists and the issue is tracked in `running` map. + +4. `RetryQueued` + - Worker is not running, but a retry timer exists in `retry_attempts`. + +5. `Released` + - Claim removed because issue is terminal, non-active, missing, or retry path completed without + re-dispatch. + +Important nuance: + +- A successful worker exit does not mean the issue is done forever. +- The worker MAY continue through multiple back-to-back coding-agent turns before it exits. +- After each normal turn completion, the worker re-checks the tracker issue state. +- If the issue is still in an active state, the worker SHOULD start another turn on the same live + coding-agent thread in the same workspace, up to `agent.max_turns`. +- The first turn SHOULD use the full rendered task prompt. +- Continuation turns SHOULD send only continuation guidance to the existing thread, not resend the + original task prompt that is already present in thread history. +- Once the worker exits normally, the orchestrator still schedules a short continuation retry + (about 1 second) so it can re-check whether the issue remains active and needs another worker + session. + +### 7.2 Run Attempt Lifecycle + +A run attempt transitions through these phases: + +1. `PreparingWorkspace` +2. `BuildingPrompt` +3. `LaunchingAgentProcess` +4. `InitializingSession` +5. `StreamingTurn` +6. `Finishing` +7. `Succeeded` +8. `Failed` +9. `TimedOut` +10. `Stalled` +11. `CanceledByReconciliation` + +Distinct terminal reasons are important because retry logic and logs differ. + +### 7.3 Transition Triggers + +- `Poll Tick` + - Reconcile active runs. + - Validate config. + - Fetch candidate issues. + - Dispatch until slots are exhausted. + +- `Worker Exit (normal)` + - Remove running entry. + - Update aggregate runtime totals. + - Schedule continuation retry (attempt `1`) after the worker exhausts or finishes its in-process + turn loop. + +- `Worker Exit (abnormal)` + - Remove running entry. + - Update aggregate runtime totals. + - Schedule exponential-backoff retry. + +- `Codex Update Event` + - Update live session fields, token counters, and rate limits. + +- `Retry Timer Fired` + - Re-fetch active candidates and attempt re-dispatch, or release claim if no longer eligible. + +- `Reconciliation State Refresh` + - Stop runs whose issue states are terminal or no longer active. + +- `Stall Timeout` + - Kill worker and schedule retry. + +### 7.4 Idempotency and Recovery Rules + +- The orchestrator serializes state mutations through one authority to avoid duplicate dispatch. +- `claimed` and `running` checks are REQUIRED before launching any worker. +- Reconciliation runs before dispatch on every tick. +- Restart recovery is tracker-driven and filesystem-driven (without a durable orchestrator DB). +- Startup terminal cleanup removes stale workspaces for issues already in terminal states. + +## 8. Polling, Scheduling, and Reconciliation + +### 8.1 Poll Loop + +At startup, the service validates config, performs startup cleanup, schedules an immediate tick, and +then repeats every `polling.interval_ms`. + +The effective poll interval SHOULD be updated when workflow config changes are re-applied. + +Tick sequence: + +1. Reconcile running issues. +2. Run dispatch preflight validation. +3. Fetch candidate issues from tracker using active states. +4. Sort issues by dispatch priority. +5. Dispatch eligible issues while slots remain. +6. Notify observability/status consumers of state changes. + +If per-tick validation fails, dispatch is skipped for that tick, but reconciliation still happens +first. + +### 8.2 Candidate Selection Rules + +An issue is dispatch-eligible only if all are true: + +- It has `id`, `identifier`, `title`, and `state`. +- Its state is in `active_states` and not in `terminal_states`. +- It is not already in `running`. +- It is not already in `claimed`. +- Global concurrency slots are available. +- Per-state concurrency slots are available. +- Blocker rule for `Todo` state passes: + - If the issue state is `Todo`, do not dispatch when any blocker is non-terminal. + +Sorting order (stable intent): + +1. `priority` ascending (1..4 are preferred; null/unknown sorts last) +2. `created_at` oldest first +3. `identifier` lexicographic tie-breaker + +### 8.3 Concurrency Control + +Global limit: + +- `available_slots = max(max_concurrent_agents - running_count, 0)` + +Per-state limit: + +- `max_concurrent_agents_by_state[state]` if present (state key normalized) +- otherwise fallback to global limit + +The runtime counts issues by their current tracked state in the `running` map. + +### 8.4 Retry and Backoff + +Retry entry creation: + +- Cancel any existing retry timer for the same issue. +- Store `attempt`, `identifier`, `error`, `due_at_ms`, and new timer handle. + +Backoff formula: + +- Normal continuation retries after a clean worker exit use a short fixed delay of `1000` ms. +- Failure-driven retries use `delay = min(10000 * 2^(attempt - 1), agent.max_retry_backoff_ms)`. +- Power is capped by the configured max retry backoff (default `300000` / 5m). + +Retry handling behavior: + +1. Fetch active candidate issues (not all issues). +2. Find the specific issue by `issue_id`. +3. If not found, release claim. +4. If found and still candidate-eligible: + - Dispatch if slots are available. + - Otherwise requeue with error `no available orchestrator slots`. +5. If found but no longer active, release claim. + +Note: + +- Terminal-state workspace cleanup is handled by startup cleanup and active-run reconciliation + (including terminal transitions for currently running issues). +- Retry handling mainly operates on active candidates and releases claims when the issue is absent, + rather than performing terminal cleanup itself. + +### 8.5 Active Run Reconciliation + +Reconciliation runs every tick and has two parts. + +Part A: Stall detection + +- For each running issue, compute `elapsed_ms` since: + - `last_codex_timestamp` if any event has been seen, else + - `started_at` +- If `elapsed_ms > codex.stall_timeout_ms`, terminate the worker and queue a retry. +- If `stall_timeout_ms <= 0`, skip stall detection entirely. + +Part B: Tracker state refresh + +- Fetch current issue states for all running issue IDs. +- For each running issue: + - If tracker state is terminal: terminate worker and clean workspace. + - If tracker state is still active: update the in-memory issue snapshot. + - If tracker state is neither active nor terminal: terminate worker without workspace cleanup. +- If state refresh fails, keep workers running and try again on the next tick. + +### 8.6 Startup Terminal Workspace Cleanup + +When the service starts: + +1. Query tracker for issues in terminal states. +2. For each returned issue identifier, remove the corresponding workspace directory. +3. If the terminal-issues fetch fails, log a warning and continue startup. + +This prevents stale terminal workspaces from accumulating after restarts. + +## 9. Workspace Management and Safety + +### 9.1 Workspace Layout + +Workspace root: + +- `workspace.root` (normalized absolute path) + +Per-issue workspace path: + +- `/` + +Workspace persistence: + +- Workspaces are reused across runs for the same issue. +- Successful runs do not auto-delete workspaces. + +### 9.2 Workspace Creation and Reuse + +Input: `issue.identifier` + +Algorithm summary: + +1. Sanitize identifier to `workspace_key`. +2. Compute workspace path under workspace root. +3. Ensure the workspace path exists as a directory. +4. Mark `created_now=true` only if the directory was created during this call; otherwise + `created_now=false`. +5. If `created_now=true`, run `after_create` hook if configured. + +Notes: + +- This section does not assume any specific repository/VCS workflow. +- Workspace preparation beyond directory creation (for example dependency bootstrap, checkout/sync, + code generation) is implementation-defined and is typically handled via hooks. + +### 9.3 OPTIONAL Workspace Population (Implementation-Defined) + +The spec does not require any built-in VCS or repository bootstrap behavior. + +Implementations MAY populate or synchronize the workspace using implementation-defined logic and/or +hooks (for example `after_create` and/or `before_run`). + +Failure handling: + +- Workspace population/synchronization failures return an error for the current attempt. +- If failure happens while creating a brand-new workspace, implementations MAY remove the partially + prepared directory. +- Reused workspaces SHOULD NOT be destructively reset on population failure unless that policy is + explicitly chosen and documented. + +### 9.4 Workspace Hooks + +Supported hooks: + +- `hooks.after_create` +- `hooks.before_run` +- `hooks.after_run` +- `hooks.before_remove` + +Execution contract: + +- Execute in a local shell context appropriate to the host OS, with the workspace directory as + `cwd`. +- On POSIX systems, `sh -lc