From a05efb7fc47011e5fd68122de6c4cde657a08519 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Thu, 30 Apr 2026 12:57:24 -0400 Subject: [PATCH 1/2] =?UTF-8?q?tools(github):=20check-github-status.ts=20?= =?UTF-8?q?=E2=80=94=20pre-flight=20gatekeeper=20for=20GitHub-status?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes Gemini's #2 v1-hardening recommendation: convert the GitHub-status check from behavioral habit into operational tool. Companion to poll-pr-gate.ts: - poll-pr-gate.ts asks "is this PR ready to merge?" - check-github-status.ts asks "is GitHub itself healthy enough that the answer can be trusted?" ## What it does Queries `https://www.githubstatus.com/api/v2/summary.json` and classifies into a structured report: - `overall`: operational / degraded / outage / maintenance - `degradedComponents`: array of non-operational components (name + status) - `factoryRelevant`: per-component status restricted to the factory's load-bearing surfaces (Pull Requests / Actions / API Requests / Webhooks / Git Operations / Issues — per the GitHub-status reference memory file) - `decision`: proceed / proceed-with-verify / halt - `description`: human-readable status text ## Exit codes (consumable by autonomous-loop pre-flight) - 0 — operational; safe to proceed - 1 — invocation / dependency error - 2 — fetch failed (network / DNS / cloudflare) - 3 — JSON parse failed - 8 — degraded; proceed-but-verify (per the rule from poll-the-gate memory file — proceed when gate surfaces aren't impacted, verify post-mutation) - 9 — outage / major; halt mutating operations ## Fixture mode `--fixture path/to/summary.json` — for offline testing + regression fixtures. Two fixtures land with v0: - `github-status-all-operational.json` — clean state (exit 0) - `github-status-pr-degraded.json` — Pull Requests degraded (exit 8) ## Component filter `--component "Pull Requests"` — narrows the report to a single factory-relevant component. Exit code reflects that component specifically (0 if operational, 8 otherwise). ## Verification Tested against both fixtures + live API. The live API currently returns degraded (Pull Requests still impacted from this session's morning incident) — tool correctly classifies as decision=proceed-with-verify, exit 8. ## Composes with - `memory/reference_github_status_first_class_aaron_2026_04_30.md` — the canonical surface allowlist this tool reports against - `memory/feedback_amara_poll_gate_not_ending_holding_is_not_status_2026_04_30.md` — the proceed-but-verify rule whose exit code 8 mirrors - `docs/backlog/P0/B-0109-dependency-status-tracking-surface-2026-04-30.md` — the substrate-tracking row this tool operationalizes - `tools/github/poll-pr-gate.ts` — the companion query tool; together they form a Query + Gatekeeper pair ## Wiring into the autonomous loop (deferred) This commit ships the standalone tool. Wiring it into the autonomous-loop pre-flight sequence (so it executes before any state-mutating API call) is a separate change — needs harness config, not just substrate. Tracked under Otto-354 (harness-vendor resilience). Co-Authored-By: Claude Opus 4.7 --- tools/github/check-github-status.ts | 255 ++++++++++++++++++ .../github-status-all-operational.json | 17 ++ .../fixtures/github-status-pr-degraded.json | 17 ++ 3 files changed, 289 insertions(+) create mode 100755 tools/github/check-github-status.ts create mode 100644 tools/github/fixtures/github-status-all-operational.json create mode 100644 tools/github/fixtures/github-status-pr-degraded.json diff --git a/tools/github/check-github-status.ts b/tools/github/check-github-status.ts new file mode 100755 index 000000000..ea9a77178 --- /dev/null +++ b/tools/github/check-github-status.ts @@ -0,0 +1,255 @@ +#!/usr/bin/env bun +// check-github-status.ts — query the public GitHub status API for the +// autonomous loop's pre-flight gate. +// +// Companion to poll-pr-gate.ts. Where poll-pr-gate.ts asks "is this PR +// ready to merge?", check-github-status.ts asks "is GitHub itself +// healthy enough that the answer can be trusted?" +// +// Origin: B-0109 (dependency status tracking surface, 2026-04-30) and +// peer-review feedback that the GitHub-status check should be a tested +// executable, not a behavioral habit. Mirrors poll-pr-gate.ts's +// "promote prose probes to executable substrate" pattern. +// +// Usage: +// bun tools/github/check-github-status.ts +// bun tools/github/check-github-status.ts --component "Pull Requests" +// bun tools/github/check-github-status.ts --fixture path/to/summary.json +// bun tools/github/check-github-status.ts --quiet # exit-code-only +// +// Output (default): one JSON object on stdout, shape: +// { +// "overall": "operational" | "degraded" | "outage" | "maintenance", +// "fetchedAt": "2026-04-30T15:00:00Z", +// "degradedComponents": [{ "name": "...", "status": "..." }, ...], +// "factoryRelevant": { +// "Pull Requests": "operational" | "...", +// "Actions": "...", +// "API Requests": "...", +// "Webhooks": "...", +// "Git Operations": "...", +// "Issues": "..." +// }, +// "decision": "proceed" | "proceed-with-verify" | "halt" +// } +// +// Exit codes: +// 0 — overall operational; safe to proceed +// 1 — invocation / dependency error +// 2 — fetch failed (network / DNS / cloudflare) +// 3 — JSON parse failed +// 8 — overall degraded; proceed-but-verify (per the proceed-but-verify +// rule landed in poll-the-gate memory) +// 9 — overall outage / major; halt mutating operations +// +// The `factoryRelevant` allowlist mirrors the GitHub-status reference +// memory file (memory/reference_github_status_first_class_aaron_2026_04_30.md) +// so this tool reports exactly the surfaces the factory depends on. + +import { readFileSync } from "node:fs"; + +const SUMMARY_URL = "https://www.githubstatus.com/api/v2/summary.json"; + +const FACTORY_RELEVANT_COMPONENTS = [ + "Pull Requests", + "Actions", + "API Requests", + "Webhooks", + "Git Operations", + "Issues", +] as const; + +type ComponentStatus = + | "operational" + | "degraded_performance" + | "partial_outage" + | "major_outage" + | "under_maintenance"; + +type OverallStatus = "operational" | "degraded" | "outage" | "maintenance"; + +type Decision = "proceed" | "proceed-with-verify" | "halt"; + +interface ApiComponent { + name: string; + status: ComponentStatus; +} + +interface ApiSummary { + page?: { updated_at?: string }; + status: { description: string; indicator: string }; + components: ApiComponent[]; +} + +interface StatusReport { + overall: OverallStatus; + fetchedAt: string; + degradedComponents: Array<{ name: string; status: ComponentStatus }>; + factoryRelevant: Record; + decision: Decision; + description: string; +} + +function classifyOverall(indicator: string): OverallStatus { + // GitHub status indicator values: none | minor | major | critical | + // maintenance. See the public schema at githubstatus.com. + if (indicator === "none") return "operational"; + if (indicator === "maintenance") return "maintenance"; + if (indicator === "minor") return "degraded"; + if (indicator === "major" || indicator === "critical") return "outage"; + return "degraded"; // conservative: unknown indicator → degraded +} + +function classifyDecision( + overall: OverallStatus, + factoryRelevant: Record, +): Decision { + if (overall === "outage") return "halt"; + if (overall === "operational" && Object.values(factoryRelevant).every( + (s) => s === "operational", + )) { + return "proceed"; + } + // Either overall is degraded/maintenance, OR a factory-relevant component + // is degraded — proceed-but-verify per the rule from PR #911. + return "proceed-with-verify"; +} + +function buildReport(summary: ApiSummary): StatusReport { + const overall = classifyOverall(summary.status.indicator); + const fetchedAt = summary.page?.updated_at ?? new Date().toISOString(); + const factoryNameSet = new Set(FACTORY_RELEVANT_COMPONENTS); + const factoryRelevant: Record = {}; + for (const name of FACTORY_RELEVANT_COMPONENTS) { + factoryRelevant[name] = "unknown"; + } + const degradedComponents: Array<{ name: string; status: ComponentStatus }> = []; + for (const c of summary.components) { + if (factoryNameSet.has(c.name)) { + factoryRelevant[c.name] = c.status; + } + if (c.status !== "operational") { + degradedComponents.push({ name: c.name, status: c.status }); + } + } + const decision = classifyDecision(overall, factoryRelevant); + return { + overall, + fetchedAt, + degradedComponents, + factoryRelevant, + decision, + description: summary.status.description, + }; +} + +async function fetchSummary(): Promise { + let resp: Response; + try { + resp = await fetch(SUMMARY_URL, { + headers: { accept: "application/json" }, + }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`fetch failed: ${msg}\n`); + process.exit(2); + } + if (!resp.ok) { + process.stderr.write(`fetch returned HTTP ${resp.status}: ${resp.statusText}\n`); + process.exit(2); + } + const text = await resp.text(); + try { + return JSON.parse(text) as ApiSummary; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`JSON parse error: ${msg}\n`); + process.stderr.write(`first 200 bytes: ${text.slice(0, 200)}\n`); + process.exit(3); + } +} + +function loadFixture(path: string): ApiSummary { + let raw: ApiSummary; + try { + raw = JSON.parse(readFileSync(path, "utf8")) as ApiSummary; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`failed to load fixture ${path}: ${msg}\n`); + process.exit(1); + } + return raw; +} + +interface ParsedArgs { + fixture?: string; + component?: string; + quiet: boolean; +} + +function parseArgs(argv: string[]): ParsedArgs { + const out: ParsedArgs = { quiet: false }; + const requireValue = (flag: string, v: string | undefined): string => { + if (v === undefined || v.startsWith("--")) { + process.stderr.write(`${flag} requires a value\n`); + process.exit(1); + } + return v; + }; + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]; + if (arg === undefined) continue; + if (arg === "--fixture") { + out.fixture = requireValue("--fixture", argv[++i]); + } else if (arg === "--component") { + out.component = requireValue("--component", argv[++i]); + } else if (arg === "--quiet" || arg === "-q") { + out.quiet = true; + } else if (arg === "--help" || arg === "-h") { + process.stdout.write( + "Usage: check-github-status.ts [--component ] [--quiet]\n" + + " check-github-status.ts --fixture path/to/summary.json\n", + ); + process.exit(0); + } else { + process.stderr.write(`unknown arg: ${arg}\n`); + process.exit(1); + } + } + return out; +} + +function decisionExitCode(decision: Decision): number { + if (decision === "halt") return 9; + if (decision === "proceed-with-verify") return 8; + return 0; +} + +export async function main(argv: string[]): Promise { + const args = parseArgs(argv); + const summary = args.fixture + ? loadFixture(args.fixture) + : await fetchSummary(); + const report = buildReport(summary); + if (args.component) { + const status = report.factoryRelevant[args.component] ?? "unknown"; + const filtered = { + component: args.component, + status, + decision: + status === "operational" ? "proceed" : "proceed-with-verify", + }; + if (!args.quiet) { + process.stdout.write(`${JSON.stringify(filtered, null, 2)}\n`); + } + return status === "operational" ? 0 : 8; + } + if (!args.quiet) { + process.stdout.write(`${JSON.stringify(report, null, 2)}\n`); + } + return decisionExitCode(report.decision); +} + +if (import.meta.main) { + process.exit(await main(process.argv.slice(2))); +} diff --git a/tools/github/fixtures/github-status-all-operational.json b/tools/github/fixtures/github-status-all-operational.json new file mode 100644 index 000000000..529cad75c --- /dev/null +++ b/tools/github/fixtures/github-status-all-operational.json @@ -0,0 +1,17 @@ +{ + "page": { + "updated_at": "2026-04-30T18:00:00Z" + }, + "status": { + "description": "All Systems Operational", + "indicator": "none" + }, + "components": [ + { "name": "Git Operations", "status": "operational" }, + { "name": "API Requests", "status": "operational" }, + { "name": "Webhooks", "status": "operational" }, + { "name": "Issues", "status": "operational" }, + { "name": "Pull Requests", "status": "operational" }, + { "name": "Actions", "status": "operational" } + ] +} diff --git a/tools/github/fixtures/github-status-pr-degraded.json b/tools/github/fixtures/github-status-pr-degraded.json new file mode 100644 index 000000000..5d2544b26 --- /dev/null +++ b/tools/github/fixtures/github-status-pr-degraded.json @@ -0,0 +1,17 @@ +{ + "page": { + "updated_at": "2026-04-30T15:00:00Z" + }, + "status": { + "description": "Partially Degraded Service", + "indicator": "minor" + }, + "components": [ + { "name": "Git Operations", "status": "operational" }, + { "name": "API Requests", "status": "operational" }, + { "name": "Webhooks", "status": "operational" }, + { "name": "Issues", "status": "operational" }, + { "name": "Pull Requests", "status": "degraded_performance" }, + { "name": "Actions", "status": "operational" } + ] +} From 2db7e8c04b8d94ea8346d9921c25d7acc0e472fd Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Thu, 30 Apr 2026 13:06:35 -0400 Subject: [PATCH 2/2] =?UTF-8?q?fix(github):=20check-github-status=20?= =?UTF-8?q?=E2=80=94=205=20Copilot=20v0=20review=20findings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. **Component mode incorporates overall status** (Copilot zqDN) — `--component` previously ignored `report.overall` so it could return exit 0 with a clean component even during a major outage. Now component decision = stricter of (component status, overall decision); halt overrides. 2. **Default exit 0 on successful fetch + opt-in `--strict`** (Copilot zqD8) — exit 8 was hostile to `set -e` shells (any non-zero aborts). Default now: exit 0 if fetch+parse succeeded; decision is in JSON. Add `--strict` to opt into non-zero exit for degraded/outage. Friendlier for shell pre-flights; enforcement-by-flag is the explicit choice. 3. **`requireValue` rejects single-dash flags too** (Copilot zqEF) — was `startsWith("--")`, now `startsWith("-")` — catches the `--component -q` case where `-q` was being silently accepted as a component name. 4. **`fetchedAt` semantics fixed** (Copilot zqER) — was set to `summary.page.updated_at` (status-page update time, not local fetch time). Now `fetchedAt` is `new Date().toISOString()` (local observation time); added `statusUpdatedAt` field for the page-side update time. Two clearly-named fields. Decision-classifier semantics (Copilot zqDv) — KEPT as-is: proceed-with-verify for factory-relevant degradation, NOT halt. This matches Aaron's explicit proceed-but-verify rule (memory/feedback_amara_poll_gate_not_ending_holding_is_not_status_2026_04_30.md) which has been operationally validated across 13+ PRs this session merging during the live GitHub PR-degradation incident. Halting on factory-relevant degradation would have prevented those merges. The reference memory + this session's track record vs Copilot's stricter reading: maintainer rule wins. Resolving thread with explanation, not the suggested edit. Verified: - Default mode: exit 0 on all fixtures (fetch worked). - `--strict` mode: exit 0/8/9 by decision. - Component mode: incorporates overall (Issues operational + overall degraded → proceed-with-verify). - `--component -q` rejected with "--component requires a value". - `fetchedAt` and `statusUpdatedAt` distinct; tsc clean. Co-Authored-By: Claude Opus 4.7 --- tools/github/check-github-status.ts | 72 +++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/tools/github/check-github-status.ts b/tools/github/check-github-status.ts index ea9a77178..8c93a5077 100755 --- a/tools/github/check-github-status.ts +++ b/tools/github/check-github-status.ts @@ -20,7 +20,8 @@ // Output (default): one JSON object on stdout, shape: // { // "overall": "operational" | "degraded" | "outage" | "maintenance", -// "fetchedAt": "2026-04-30T15:00:00Z", +// "fetchedAt": "2026-04-30T15:00:00Z", // local fetch time +// "statusUpdatedAt":"2026-04-30T15:00:00Z", // status-page update time // "degradedComponents": [{ "name": "...", "status": "..." }, ...], // "factoryRelevant": { // "Pull Requests": "operational" | "...", @@ -33,14 +34,18 @@ // "decision": "proceed" | "proceed-with-verify" | "halt" // } // -// Exit codes: -// 0 — overall operational; safe to proceed +// Exit codes (default — friendly for `set -e` shells): +// 0 — fetch + parse succeeded; decision is in JSON // 1 — invocation / dependency error // 2 — fetch failed (network / DNS / cloudflare) // 3 — JSON parse failed -// 8 — overall degraded; proceed-but-verify (per the proceed-but-verify -// rule landed in poll-the-gate memory) -// 9 — overall outage / major; halt mutating operations +// +// Exit codes with `--strict` (opt-in enforcement): +// 0 — decision: proceed +// 8 — decision: proceed-with-verify (degraded; per proceed-but-verify +// rule from poll-the-gate memory) +// 9 — decision: halt (outage / major) +// 1, 2, 3 — same as default // // The `factoryRelevant` allowlist mirrors the GitHub-status reference // memory file (memory/reference_github_status_first_class_aaron_2026_04_30.md) @@ -83,7 +88,13 @@ interface ApiSummary { interface StatusReport { overall: OverallStatus; + /** Local wall-clock time the tool fetched/loaded the status (ISO 8601). + * Distinct from `statusUpdatedAt` which is the status page's own + * last-updated time (per Copilot v0 review — field-name semantics). */ fetchedAt: string; + /** GitHub status page's `updated_at` — when the status itself last + * changed, NOT when the tool ran. May be older than `fetchedAt`. */ + statusUpdatedAt: string; degradedComponents: Array<{ name: string; status: ComponentStatus }>; factoryRelevant: Record; decision: Decision; @@ -117,7 +128,8 @@ function classifyDecision( function buildReport(summary: ApiSummary): StatusReport { const overall = classifyOverall(summary.status.indicator); - const fetchedAt = summary.page?.updated_at ?? new Date().toISOString(); + const fetchedAt = new Date().toISOString(); + const statusUpdatedAt = summary.page?.updated_at ?? fetchedAt; const factoryNameSet = new Set(FACTORY_RELEVANT_COMPONENTS); const factoryRelevant: Record = {}; for (const name of FACTORY_RELEVANT_COMPONENTS) { @@ -136,6 +148,7 @@ function buildReport(summary: ApiSummary): StatusReport { return { overall, fetchedAt, + statusUpdatedAt, degradedComponents, factoryRelevant, decision, @@ -185,12 +198,21 @@ interface ParsedArgs { fixture?: string; component?: string; quiet: boolean; + /** When true, non-`proceed` decisions exit non-zero (8/9). When false + * (default), exit 0 on successful fetch/parse — decision is in JSON. + * Per Copilot v0 review: `set -e` shells abort on any non-zero, so + * default-to-zero is friendlier; opt-in to strict via `--strict`. */ + strict: boolean; } function parseArgs(argv: string[]): ParsedArgs { - const out: ParsedArgs = { quiet: false }; + const out: ParsedArgs = { quiet: false, strict: false }; const requireValue = (flag: string, v: string | undefined): string => { - if (v === undefined || v.startsWith("--")) { + // Reject any value that looks like a flag (starts with `-`), per + // Copilot v0 review — `--component -q` was silently accepting `-q` + // as a component name. Allow values that are pure numbers (covered + // separately) or non-flag strings only. + if (v === undefined || v.startsWith("-")) { process.stderr.write(`${flag} requires a value\n`); process.exit(1); } @@ -205,10 +227,17 @@ function parseArgs(argv: string[]): ParsedArgs { out.component = requireValue("--component", argv[++i]); } else if (arg === "--quiet" || arg === "-q") { out.quiet = true; + } else if (arg === "--strict") { + out.strict = true; } else if (arg === "--help" || arg === "-h") { process.stdout.write( - "Usage: check-github-status.ts [--component ] [--quiet]\n" + - " check-github-status.ts --fixture path/to/summary.json\n", + "Usage: check-github-status.ts [--component ] [--quiet] [--strict]\n" + + " check-github-status.ts --fixture path/to/summary.json\n" + + "\n" + + " --strict non-`proceed` decisions exit non-zero (8 degraded, 9 outage).\n" + + " Default: exit 0 on successful fetch/parse; decision is\n" + + " encoded in the JSON `decision` field. Friendlier for\n" + + " `set -e` shells; opt-in to enforcement via --strict.\n", ); process.exit(0); } else { @@ -233,21 +262,34 @@ export async function main(argv: string[]): Promise { const report = buildReport(summary); if (args.component) { const status = report.factoryRelevant[args.component] ?? "unknown"; + // Component decision incorporates the overall status (per Copilot v0 + // review): if overall is halt, the component decision is halt + // regardless; otherwise use the stricter of the component's status + // and the overall decision. + let decision: Decision; + if (report.decision === "halt") { + decision = "halt"; + } else if (status === "operational" && report.decision === "proceed") { + decision = "proceed"; + } else { + decision = "proceed-with-verify"; + } const filtered = { component: args.component, status, - decision: - status === "operational" ? "proceed" : "proceed-with-verify", + overall: report.overall, + overallDecision: report.decision, + decision, }; if (!args.quiet) { process.stdout.write(`${JSON.stringify(filtered, null, 2)}\n`); } - return status === "operational" ? 0 : 8; + return args.strict ? decisionExitCode(decision) : 0; } if (!args.quiet) { process.stdout.write(`${JSON.stringify(report, null, 2)}\n`); } - return decisionExitCode(report.decision); + return args.strict ? decisionExitCode(report.decision) : 0; } if (import.meta.main) {