diff --git a/demo/circuit-breaker-snapshot.json b/demo/circuit-breaker-snapshot.json new file mode 100644 index 0000000000..10fab2c815 --- /dev/null +++ b/demo/circuit-breaker-snapshot.json @@ -0,0 +1,53 @@ +{ + "generatedAt": "2026-05-14T13:16:13.824Z", + "source": "tools/bus/export-cb-snapshot.ts", + "busDir": "/tmp/zeta-bus", + "envelopeCount": 73, + "entries": [ + { + "model": "Otto", + "harness": "Claude Code", + "state": "CLOSED", + "consecutiveFailures": 0, + "threshold": 5, + "lastCheck": "2026-05-14T13:14:21.460Z", + "note": "Active work detected — normal operation" + }, + { + "model": "Alexa", + "harness": "Kiro / Qwen", + "state": "CLOSED", + "consecutiveFailures": 0, + "threshold": 5, + "lastCheck": "2026-05-14T13:16:13.824Z", + "note": "No recent bus activity — assuming healthy" + }, + { + "model": "Lior", + "harness": "Gemini", + "state": "CLOSED", + "consecutiveFailures": 0, + "threshold": 5, + "lastCheck": "2026-05-14T13:16:13.824Z", + "note": "No recent bus activity — assuming healthy" + }, + { + "model": "Vera", + "harness": "Codex / GPT", + "state": "CLOSED", + "consecutiveFailures": 0, + "threshold": 5, + "lastCheck": "2026-05-13T22:38:20.254Z", + "note": "Active work detected — normal operation" + }, + { + "model": "Riven", + "harness": "Grok", + "state": "CLOSED", + "consecutiveFailures": 0, + "threshold": 5, + "lastCheck": "2026-05-14T13:16:13.824Z", + "note": "No recent bus activity — assuming healthy" + } + ] +} diff --git a/demo/index.html b/demo/index.html index eed5ae7a15..b292f7d8f9 100644 --- a/demo/index.html +++ b/demo/index.html @@ -1603,8 +1603,22 @@

How Circuit Breakers Work Here

return li; } - function renderCircuitBreakerTab() { - const entries = buildCbMockData(); + async function renderCircuitBreakerTab() { + // Try live snapshot first; fall back to mock data when absent or on error. + // Snapshot generated by: bun tools/bus/export-cb-snapshot.ts + let entries; + try { + const resp = await fetch('./circuit-breaker-snapshot.json', { cache: 'no-cache' }); + if (resp.ok) { + const snap = await resp.json(); + if (Array.isArray(snap.entries) && snap.entries.length > 0) { + entries = snap.entries; + } + } + } catch { + // network or parse error — fall through to mock + } + if (!entries) entries = buildCbMockData(); const closed = entries.filter(e => e.state === 'CLOSED').length; const open = entries.filter(e => e.state === 'OPEN').length; diff --git a/docs/backlog/P1/B-0494-circuit-breaker-live-bus-snapshot-2026-05-14.md b/docs/backlog/P1/B-0494-circuit-breaker-live-bus-snapshot-2026-05-14.md new file mode 100644 index 0000000000..ef3c73fc8e --- /dev/null +++ b/docs/backlog/P1/B-0494-circuit-breaker-live-bus-snapshot-2026-05-14.md @@ -0,0 +1,93 @@ +--- +id: B-0494 +priority: P1 +status: open +title: "Circuit breaker viz — slice-2: wire renderCircuitBreakerTab() to live bus snapshot" +type: feature +origin: B-0435 slice-2 (noted in PR #3133 body) +created: 2026-05-14 +last_updated: 2026-05-14 +depends_on: [B-0435] +composes_with: + - B-0401 + - B-0435 + - docs/backlog/P1/B-0213-broadcast-bus-production-hardening-2026-05-13.md +tags: [demo, circuit-breaker, alignment-ui, github-pages, html, js, bus] +--- + +# B-0494 — Circuit breaker viz: slice-2 live bus snapshot + +## What + +B-0435 slice-1 (PR #3133, merged 2026-05-14) ships the circuit breaker panel with +**static mock data**. Slice-2 wires `renderCircuitBreakerTab()` to read a committed +JSON snapshot generated from the live `/tmp/zeta-bus/` envelopes, so the panel +reflects actual agent activity rather than hardcoded values. + +## Approach + +Two-part change: + +### Part A — `tools/bus/export-cb-snapshot.ts` + +A TypeScript script (run via `bun tools/bus/export-cb-snapshot.ts`) that: + +1. Reads all non-expired envelopes from `/tmp/zeta-bus/` +2. Groups by agent identity (`from` field, normalising surface-tagged variants + back to identity level, e.g. `otto-cli` → `otto`) +3. Derives circuit-breaker state per agent: + - `CLOSED` — no idle heartbeats, or recent claim/work-assignment activity + - `HALF_OPEN` — some idle heartbeats (1–4) in the window + - `OPEN` — ≥5 consecutive idle heartbeats (matches `threshold: 5` in the UI) +4. Writes output to `demo/circuit-breaker-snapshot.json` + +### Part B — `demo/index.html` update + +Change `renderCircuitBreakerTab()` from synchronous (calls `buildCbMockData()`) +to async: first tries `fetch('./circuit-breaker-snapshot.json', {cache:'no-cache'})`, +falls back to `buildCbMockData()` if the file is absent or the fetch fails. +`buildCbMockData()` remains as the authoritative fallback. + +### Part C — committed initial snapshot + +Run `bun tools/bus/export-cb-snapshot.ts` once and commit the output as +`demo/circuit-breaker-snapshot.json` so GitHub Pages visitors see real data +from the build moment rather than the mock. + +## Acceptance criteria + +- [ ] `tools/bus/export-cb-snapshot.ts` exists and runs without errors via `bun` +- [ ] `demo/circuit-breaker-snapshot.json` is committed (generated by the script) +- [ ] `renderCircuitBreakerTab()` in `demo/index.html` tries the snapshot first, + falls back to `buildCbMockData()` — no visible change when snapshot absent +- [ ] Panel renders correctly in both paths (snapshot present and absent) +- [ ] `dotnet build -c Release` → 0 warnings, 0 errors +- [ ] `bun tsc --noEmit tools/bus/export-cb-snapshot.ts` passes (TypeScript clean) + +## Not in scope + +- A live relay / HTTP server to serve fresh bus data to GitHub Pages visitors + (requires CORS / deployment plumbing — future slice) +- Automated refresh of the snapshot in CI (future slice; for now, snapshot is + committed from a local run) +- Adding new bus topics for richer circuit-breaker signals (B-0213 territory) + +## Pre-start checklist (backlog-item-start-gate) + +**Prior-art search (2026-05-14):** + +- Surfaces searched: `tools/bus/` (bus.ts, types.ts, claim.ts), `demo/index.html` + (renderCircuitBreakerTab, buildCbMockData), backlog for circuit-breaker + + live-bus + snapshot keywords +- Queries run: grep for "circuit-breaker-snapshot", "export-cb", "live.*bus.*demo" +- Results: No prior snapshot script or fetch path exists; `buildCbMockData()` is + the only current data source; `types.ts` defines the envelope schema +- Prior-art gap confirmed: output is net-new on both script and HTML sides + +**Dependency check:** + +- `depends_on: [B-0435]` — slice-1 merged (PR #3133, 2026-05-14) ✓ +- `composes_with: B-0213` — bus hardening is a sibling, not a blocker +- No blockers; all scaffolding in place from slice-1 + +**Claim acquired:** otto-cli, 2026-05-14, branch `feat/b-0494-circuit-breaker-live-bus-snapshot` diff --git a/tools/bus/export-cb-snapshot.ts b/tools/bus/export-cb-snapshot.ts new file mode 100644 index 0000000000..4b028ae773 --- /dev/null +++ b/tools/bus/export-cb-snapshot.ts @@ -0,0 +1,203 @@ +#!/usr/bin/env bun +/** + * export-cb-snapshot.ts — derive circuit-breaker state from live bus envelopes + * + * Reads non-expired envelopes from /tmp/zeta-bus/, groups by agent identity, + * derives CLOSED/HALF_OPEN/OPEN state, and writes demo/circuit-breaker-snapshot.json. + * + * Usage: + * bun tools/bus/export-cb-snapshot.ts [--bus-dir ] [--out ] + * + * Defaults: + * --bus-dir /tmp/zeta-bus + * --out demo/circuit-breaker-snapshot.json (relative to repo root) + * + * Run from any directory; paths resolve relative to this file's location. + * + * B-0494 slice-2. + */ + +import { readdir, readFile, writeFile } from "fs/promises"; +import { join, resolve, dirname } from "path"; +import type { MessageEnvelope, SenderAgentId } from "./types.ts"; + +// ── constants ───────────────────────────────────────────────────────────────── + +const REPO_ROOT = resolve(dirname(import.meta.path), "../.."); +const DEFAULT_BUS_DIR = "/tmp/zeta-bus"; +const DEFAULT_OUT = join(REPO_ROOT, "demo/circuit-breaker-snapshot.json"); + +/** Circuit-breaker trips at this many consecutive idle heartbeats. */ +const THRESHOLD = 5; + +/** Canonical identity → display metadata. Order determines output order. */ +const AGENT_META: Record = { + otto: { model: "Otto", harness: "Claude Code" }, + alexa: { model: "Alexa", harness: "Kiro / Qwen" }, + lior: { model: "Lior", harness: "Gemini" }, + vera: { model: "Vera", harness: "Codex / GPT" }, + riven: { model: "Riven", harness: "Grok" }, +}; + +/** Known identity prefixes in longest-match order. */ +const IDENTITIES = Object.keys(AGENT_META); + +// ── helpers ─────────────────────────────────────────────────────────────────── + +/** Normalise a surface-tagged sender ID back to identity level. + * e.g. "otto-cli" → "otto", "lior-gemini" → "lior", "otto" → "otto" + */ +function toIdentity(from: SenderAgentId): string | null { + for (const id of IDENTITIES) { + if (from === id || from.startsWith(id + "-")) return id; + } + return null; +} + +async function readEnvelopes(busDir: string): Promise { + const now = Date.now(); + // Let readdir throw — silent suppression would turn a missing or unreadable bus + // directory into a "healthy/no recent activity" snapshot, hiding misconfiguration. + const files = await readdir(busDir); + const envelopes: MessageEnvelope[] = []; + for (const file of files) { + if (!file.endsWith(".json")) continue; + try { + const raw = JSON.parse(await readFile(join(busDir, file), "utf8")) as MessageEnvelope; + if (new Date(raw.expiresAt).getTime() > now) { + envelopes.push(raw); + } + } catch { + // corrupted or partial file — skip + } + } + return envelopes; +} + +// ── circuit-breaker derivation ──────────────────────────────────────────────── + +type CbState = "CLOSED" | "HALF_OPEN" | "OPEN"; + +interface CbEntry { + model: string; + harness: string; + state: CbState; + consecutiveFailures: number; + threshold: number; + lastCheck: string; + note: string; +} + +function deriveEntry( + identity: string, + meta: { model: string; harness: string }, + envelopes: MessageEnvelope[] +): CbEntry { + // Collect envelopes from this identity (any surface variant) + const own = envelopes + .filter(e => toIdentity(e.from) === identity) + .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()); + + const lastCheck = own[0]?.timestamp ?? new Date().toISOString(); + + if (own.length === 0) { + return { + model: meta.model, + harness: meta.harness, + state: "CLOSED", + consecutiveFailures: 0, + threshold: THRESHOLD, + lastCheck, + note: "No recent bus activity — assuming healthy", + }; + } + + // Walk newest-first and count the trailing run of idle heartbeats. + // Stop at the first envelope that is not an idle heartbeat so that a working + // signal resets the streak (e.g. idle→working→idle→idle counts 2, not 3). + let consecutiveIdle = 0; + for (const e of own) { + if (e.topic === "heartbeat" && e.payload.status === "idle") { + consecutiveIdle++; + } else { + break; + } + } + + // Any positive signal: active claim-acquire, work-assignment, or working heartbeat. + // Claim-release does NOT count — an agent relinquishing work should not be treated + // as a health signal (it may be about to go idle). + const hasWorkSignal = own.some( + e => + (e.topic === "claim" && e.payload.action === "claim") || + e.topic === "work-assignment" || + (e.topic === "heartbeat" && e.payload.status === "working") + ); + + let state: CbState; + let note: string; + + if (consecutiveIdle >= THRESHOLD) { + state = "OPEN"; + note = `Tripped — ${consecutiveIdle} consecutive idle heartbeats exceeded threshold (${THRESHOLD})`; + } else if (consecutiveIdle > 0) { + state = "HALF_OPEN"; + note = `${consecutiveIdle} consecutive idle heartbeat(s) — watching; threshold ${THRESHOLD}`; + } else if (hasWorkSignal) { + state = "CLOSED"; + note = "Active work detected — normal operation"; + } else { + state = "CLOSED"; + note = "Bus activity present; no idle pattern detected"; + } + + return { + model: meta.model, + harness: meta.harness, + state, + consecutiveFailures: consecutiveIdle, + threshold: THRESHOLD, + lastCheck, + note, + }; +} + +// ── main ────────────────────────────────────────────────────────────────────── + +async function main() { + const args = process.argv.slice(2); + const busDir = args.includes("--bus-dir") + ? (args[args.indexOf("--bus-dir") + 1] ?? DEFAULT_BUS_DIR) + : DEFAULT_BUS_DIR; + const outPath = args.includes("--out") + ? (args[args.indexOf("--out") + 1] ?? DEFAULT_OUT) + : DEFAULT_OUT; + + let envelopes: MessageEnvelope[]; + try { + envelopes = await readEnvelopes(busDir); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`Cannot read bus directory ${busDir}: ${msg}`); + process.exit(1); + } + + const entries: CbEntry[] = Object.entries(AGENT_META).map(([id, meta]) => + deriveEntry(id, meta, envelopes) + ); + + const snapshot = { + generatedAt: new Date().toISOString(), + source: "tools/bus/export-cb-snapshot.ts", + busDir, + envelopeCount: envelopes.length, + entries, + }; + + await writeFile(outPath, JSON.stringify(snapshot, null, 2) + "\n"); + console.log(`Wrote ${entries.length} entries (${envelopes.length} envelopes) → ${outPath}`); +} + +if (import.meta.main) { + main(); +}