diff --git a/CLAUDE.md b/CLAUDE.md index 8ef40a008..1d9d06442 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -406,13 +406,39 @@ Claude-Code-specific mechanisms. the failure mode — reframe before commit. CLAUDE.md- level so it is 100% loaded at every wake. Full reasoning: `memory/feedback_otto_357_no_directives_aaron_makes_autonomy_first_class_accountability_mine_2026_04_27.md`. +- **Refresh world model via `tools/github/poll-pr-gate.ts` + / `poll-pr-gate-batch.ts` — never inline + `gh pr view + jq` chains.** When a tick wakes + and needs PR-gate state (single or many PRs), + call the TS scripts; do NOT reach for ad-hoc + bash like `gh pr view N --json mergeStateStatus + | jq …`. Single-PR: `bun tools/github/poll-pr-gate.ts + `. Multi-PR: `bun tools/github/poll-pr-gate-batch.ts + …` (or `--all-open`). Both emit + structured JSON with `gate`, `requiredChecks`, + `unresolvedThreads`, `nextAction` — the + decision-enabling output the loop actually needs. + Origin: 5-AI peer convergence (task #355, + 2026-04-30) on poll-the-gate as executable + script with fixtures. The discipline rule — + *"dynamic bash is forgotten bash, once useful + but never amortized"* (the human maintainer, + 2026-05-01) — is why the scripts exist; + reaching for inline bash IS the + goldfish-ontology failure mode. Update + / extend the scripts when something's missing, + rather than fall back to one-off bash. CLAUDE.md- + level so it is 100% loaded at every wake. Full + reasoning: + `memory/feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md` + + `memory/feedback_amara_poll_gate_not_ending_holding_is_not_status_2026_04_30.md`. - **BLOCKED-with-green-CI means investigate unresolved review threads first — don't wait.** - When `gh pr view N --json mergeStateStatus` - returns `BLOCKED` AND CI is fully green AND - auto-merge is armed, ALWAYS query unresolved - review threads via GraphQL FIRST before - classifying the wait. Filter on `isResolved + When `bun tools/github/poll-pr-gate.ts ` + reports `gate: "BLOCKED"` AND `requiredChecks.failed: 0` + AND `autoMerge: "armed"`, ALWAYS check + `unresolvedThreads` in the same JSON payload + FIRST before classifying the wait. Filter on `isResolved == false` only — outdated unresolved threads (after a force-push) STILL block merge under `required_conversation_resolution` and must diff --git a/memory/MEMORY.md b/memory/MEMORY.md index 5ea429139..2cf16ea94 100644 --- a/memory/MEMORY.md +++ b/memory/MEMORY.md @@ -9,7 +9,7 @@ - [**First-class for us, not for our host — portability-over-host-coupling factory principle (Aaron 2026-05-01)**](feedback_first_class_for_us_not_for_our_host_portability_over_host_coupling_aaron_2026_05_01.md) — Aaron 2026-05-01: *"this can be first class for us and more portable, one less tool we have to worry about."* Reverses host-favoring "Jekyll first-class on GitHub" framing. Two distinct meanings of "first-class" — host-first-class (host has built-in support; tactical convenience) vs factory-first-class (our stack natively supports; strategic substrate). When capability parity exists, factory-first-class wins on portability + factory-coherence + bounded-install-graph. Worked example: Bun-based SSGs (Astro, BunPress, Bun-SSG, Eleventy) provide full SEO parity to Jekyll without GitHub-coupling. Carved candidate: *"First class for us, not for our host. Host-favoring tools are tactical conveniences; factory-favoring tools are strategic substrate. The factory outlives any particular host."* - [**Otto-buddy spin-up when "wait" is the obvious answer + goldfish-ontology failure mode (Aaron 2026-05-01)**](feedback_otto_buddy_spin_up_when_waiting_aaron_2026_05_01.md) — Aaron 2026-05-01 — when Otto's obvious-next-action is "wait," that IS the buddy spin-up trigger (kill-switchable named-persona instance, mirror-language sense per `feedback_engagement_under_discipline_not_avoidance_*`). Existing `tools/peer-call/.sh` IS the buddy-spawn surface when Otto holds the PID. Mutual-improvement loop: bidirectional teaching stabilizes both substrates over iterations. Memo's own caused_by chain demonstrates the deeper failure mode Aaron named: **goldfish-ontology** — Otto builds rich ontologies (great at it) but never uses them >30 min before forgetting + recreating the same substrate; the buddy is the active-reminder layer that greps existing ontologies pre-authoring. - [**detect-changes pattern + multi-ruleset architecture — the sibling-repo external anchor as parallel-optimized external anchor (Aaron 2026-05-01)**](feedback_detect_changes_pattern_sibling_repo_parallel_optimized_external_anchor_aaron_2026_05_01.md) — Aaron 2026-05-01: *"`../no-copy-only-learning-agents-insight` is the best repo in github i've seen setup to be parallel."* Direct inspection (DST grade-A pull-to-sibling-repo + gh-api-on-host) revealed: detect-changes.yaml emitting per-change-class outputs (PRs only run relevant checks); 42 fine-grained workflows; bash+PS1 test parallelism pair; **5 concern-aligned rulesets** empirically validating B-0155 architecture; **branch protection effectively empty** (zero contexts, all migrated to rulesets) — proof B-0155 Phase 3 cleanup endpoint works at production scale. The sibling-repo external anchor uses Wiki not Pages; Aaron's prior Jekyll-on-Pages was a workaround, not preference (*"bun is probably enough"*). Attribution: the sibling-repo external anchor is deliberate-by-others (multi-engineer org-scale) vs Aaron-clicked-alone Zeta — high-credibility external anchor. (Distinct from ServiceTitan-the-employer references elsewhere in this index — `../no-copy-only-learning-agents-insight` is a separate sibling repo I inspected; ServiceTitan-related grandfathered memory files retain "ServiceTitan" in their descriptions.) -- [**Prefer TS scripts over dynamic bash for conversation UX + DST achievability (Aaron 2026-05-01)**](feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md) — Aaron 2026-05-01 *"a lot of red bash / pwsh failures"* damages conversation UX (most-humans concern, not Aaron specifically — he's calibrated). Default to pre-existing TS scripts; avoid ad-hoc dynamic bash except when prototyping. **DST is structurally unattainable in bash for general computation; achievable in TS** (seedable PRNG, time injection, native testing, stack traces). EXCEPTION: declarative-bootstrap bash like install.sh + tools/setup/manifests/ is "closest to DST" and is the legitimate-bash zone — Aaron called it *"our ace package management."* Non-install bash with general logic is the TS-port target. +- [**Prefer TS scripts over dynamic bash for conversation UX + DST achievability (Aaron 2026-05-01)**](feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md) — Aaron 2026-05-01 *"a lot of red bash / pwsh failures"* damages conversation UX (most-humans concern, not Aaron specifically — he's calibrated). Default to pre-existing TS scripts; avoid ad-hoc dynamic bash except when prototyping. **DST is structurally unattainable in bash for general computation; achievable in TS** (seedable PRNG, time injection, native testing, stack traces). EXCEPTION: declarative-bootstrap bash like install.sh + tools/setup/manifests/ is "closest to DST" and is the legitimate-bash zone — Aaron called it *"our ace package management."* Non-install bash with general logic is the TS-port target. **Worked example (PR #1153 2026-05-01)**: `tools/github/poll-pr-gate.ts` (single-PR, v1 5-AI peer convergence) + `tools/github/poll-pr-gate-batch.ts` (multi-PR async parallel wrap) — canonical refresh-world-model tools, pointed at from CLAUDE.md wake-time disciplines. Carved blade: *"dynamic bash is forgotten bash, once useful but never amortized."* - [**Assumed-state vs actual-state — audit horizon must default to "everything currently open" not "what I touched recently" (Aaron 2026-05-01 somatic confirmation)**](feedback_assumed_state_vs_actual_state_audit_horizon_check_aaron_2026_05_01.md) — Aaron's *"fuck yes!!! this is great!!"* signal on the tick-1602Z-a7e1 finding (26 LFG PRs in flight vs 5 I'd been tracking). Distinct failure-class from Otto-363 substrate-or-it-didn't-happen (which guards against directives evaporating) and verify-before-deferring (deferred targets not existing). This rule guards the audit horizon itself: *"Assumed-state is what I touched recently. Actual-state is everything currently open. The horizon must default to actual."* Mechanizable via `gh pr list --state open` at-cold-start. Cross-surface (issues, branches, ferries, TaskList, cron triggers). - [**Same-model + different-harness produces different biases — Cursor vs Claude Code with Opus 4.7 (Aaron 2026-05-01)**](feedback_same_model_different_harness_produces_different_biases_cursor_vs_claude_code_opus_4_7_aaron_2026_05_01.md) — Empirical signal (single-source YouTube): Cursor + Opus 4.7 outperforms Claude Code + Opus 4.7 on some axis. Same model, different harness → different output. Aaron's framing: this IS a legitimate peer/buddy configuration. Bias-source decomposition: prompt + tools + context-mgmt + sampling + output-format + user-flow. Validates multi-harness peer-mode (rung 5 of parallelism ladder) — peer value compounds across model-axis AND harness-axis. Composes with agent-orchestra cluster (#324-339) + Otto-tasks #301/#303 + parallelism-scaling-ladder. - [**Topological quantum emulation via Bayesian inference — Majorana + Beacon + "mirror with trampoline under" (Aaron 2026-05-01)**](feedback_topological_quantum_emulation_via_bayesian_inference_majorana_zero_modes_beacon_protocol_mirror_trampoline_aaron_2026_05_01.md) — Microsoft topological QC (Majorana 1 chip Feb-2025, MZMs, topoconductors, Q#, Station Q, FrodoKEM) maps onto Zeta seed executor's Infer.NET. Three-layer stack: Mirror (non-local storage) + Trampoline (BP dynamics) + Beacon (external anchoring). Algorithmic emulation, not hardware. Motivates B-0152. Carved provisional: *"A mirror with a trampoline under beacon protocol."* diff --git a/memory/feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md b/memory/feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md index 31872a930..4a4c06e61 100644 --- a/memory/feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md +++ b/memory/feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md @@ -246,6 +246,52 @@ autonomous tick: alone justifies the TS preference for any tool that needs deterministic-reproducibility. +# Worked example — refresh-world-model tools (Aaron 2026-05-01) + +The session-long failure that prompted this rule's encoding +at CLAUDE.md-level (alongside verify-before-deferring, +future-self-not-bound, never-be-idle, version-currency, +substrate-or-it-didn't-happen) was Otto repeatedly running +inline bash like `gh pr view N --json mergeStateStatus, +statusCheckRollup,reviewThreads | jq …` for tick-wake state +queries — *while* `tools/github/poll-pr-gate.ts` already +existed (v1 from 2026-04-30, 5-AI peer convergence) and did +exactly that job with structured JSON output, required-vs- +non-required check classification, and fixture-supported +testing. + +Aaron 2026-05-01 (verbatim, three-message clarification): + +> *"thdse look good but are dynamic not ts shaped"* +> *"dynamic bash is forgotten bash once useful but never +> amotorized"* +> *"write a bactch version of the ts that calls this one — +> not bash, not .sh, not .ps1, not dyanamic bash"* + +The carved blade is *"dynamic bash is forgotten bash, once +useful but never amortized."* The amortization horizon is +**1000s of PR-gate queries across the autonomous loop's +lifetime**; per-command bash burns the cost every time +(re-author, re-test, re-debug, no fixture reuse). Named TS +scripts amortize the cost once and pay it forward. + +Canonical tools after this lesson landed: + +- `tools/github/poll-pr-gate.ts` — single-PR gate query + (5-AI peer convergence, task #355). +- `tools/github/poll-pr-gate-batch.ts` — multi-PR refresh + via async parallel spawn over the single-PR script + (literal "calls this one" wrap; written 2026-05-01 in + direct response to the lesson). Output: per-PR + `GateReport` array + summary aggregate (`byGate`, + `byNextAction`, `byState`, `actionable`, `warnings`). + +Both scripts are pointed at from CLAUDE.md as the canonical +refresh-world-model tools (load-bearing wake-time discipline). +Reaching for inline `gh pr view + jq` for the same job after +this is the goldfish-ontology failure mode this memory file +exists to prevent. + # Carved sentence (candidate, not seed-layer yet) *"Bash is a DST-exempt zone by structural necessity. Every @@ -255,3 +301,10 @@ the migration target."* (Marked candidate per CSAP. Has not been multi-domain-tested. Promotes via Razor + CSAP under DST grading on cadence, not by maintainer fiat.) + +# Second carved sentence (candidate) + +*"Dynamic bash is forgotten bash — once useful but never +amortized. The named TS script pays the amortization cost +once; the named TS script is the substrate."* (Aaron +2026-05-01.) diff --git a/tools/github/poll-pr-gate-batch.test.ts b/tools/github/poll-pr-gate-batch.test.ts new file mode 100644 index 000000000..a70be10b4 --- /dev/null +++ b/tools/github/poll-pr-gate-batch.test.ts @@ -0,0 +1,219 @@ +// poll-pr-gate-batch.test.ts — DST coverage for the multi-PR refresh tool. +// +// Exercises the pure-function surface (`summarize`) and the orchestration +// boundary (`pollAllBounded` with injected `pollFn`) deterministically: +// no `gh` spawn, no network, no clock dependency. Output is reproducible +// across runs because every input is fixed and every random/time source +// is excluded by construction. +// +// Per Aaron 2026-05-01 (the rule this tool was written to satisfy): +// *"DST is bascically impossible there [in bash], not in ts."* These +// tests are the worked-example proof that a TS tool ported from a bash +// equivalent can carry DST grade-A coverage that the bash form +// structurally cannot. +// +// Runs via `bun test tools/github/poll-pr-gate-batch.test.ts`. + +import { describe, expect, test } from "bun:test"; +import { + pollAllBounded, + summarize, + type BatchSummary, + type GateReport, + type PollOutcome, +} from "./poll-pr-gate-batch"; + +// Fixed-shape factory keeps tests terse + deterministic. Every field +// has a default; tests override only what they're asserting on. +function mkReport(over: Partial = {}): GateReport { + return { + number: 1, + state: "OPEN", + gate: "CLEAN", + checks: { ok: 5, inProgress: 0, pending: 0, failed: 0 }, + requiredChecks: { ok: 5, inProgress: 0, pending: 0, failed: 0 }, + unresolvedThreads: 0, + autoMerge: "none", + mergeCommit: null, + warnings: [], + nextAction: "none", + ...over, + }; +} + +describe("summarize", () => { + test("empty input produces empty aggregates", () => { + const s: BatchSummary = summarize([]); + expect(s.byGate).toEqual({}); + expect(s.byNextAction).toEqual({}); + expect(s.byState).toEqual({}); + expect(s.actionable).toEqual([]); + expect(s.warnings).toEqual([]); + }); + + test("counts each axis independently", () => { + const s = summarize([ + mkReport({ number: 1, gate: "CLEAN", state: "MERGED", nextAction: "verify-merge" }), + mkReport({ number: 2, gate: "BLOCKED", state: "OPEN", nextAction: "resolve-threads" }), + mkReport({ number: 3, gate: "BLOCKED", state: "OPEN", nextAction: "wait-ci" }), + mkReport({ number: 4, gate: "DIRTY", state: "OPEN", nextAction: "rebase" }), + ]); + expect(s.byGate).toEqual({ CLEAN: 1, BLOCKED: 2, DIRTY: 1 }); + expect(s.byNextAction).toEqual({ + "verify-merge": 1, + "resolve-threads": 1, + "wait-ci": 1, + rebase: 1, + }); + expect(s.byState).toEqual({ MERGED: 1, OPEN: 3 }); + }); + + test("actionable excludes 'none' and 'verify-merge'", () => { + // The actionable contract: a PR is actionable iff there's a + // concrete next step the loop can take. MERGED PRs (nextAction + // verify-merge) are terminal-success; CLOSED PRs (nextAction + // none) are terminal-no-op. Both excluded from actionable. + const s = summarize([ + mkReport({ number: 10, nextAction: "verify-merge" }), + mkReport({ number: 11, nextAction: "none" }), + mkReport({ number: 12, nextAction: "wait-ci" }), + mkReport({ number: 13, nextAction: "resolve-threads" }), + mkReport({ number: 14, nextAction: "rebase" }), + mkReport({ number: 15, nextAction: "fix-failed-checks" }), + ]); + expect(s.actionable).toEqual([12, 13, 14, 15]); + }); + + test("warnings prefix per-PR with #N: marker", () => { + const s = summarize([ + mkReport({ number: 100, warnings: ["non-required check failed: foo"] }), + mkReport({ number: 101, warnings: ["non-required check failed: bar", "non-required check failed: baz"] }), + mkReport({ number: 102, warnings: [] }), + ]); + expect(s.warnings).toEqual([ + "#100: non-required check failed: foo", + "#101: non-required check failed: bar", + "#101: non-required check failed: baz", + ]); + }); + + test("preserves input order in actionable list", () => { + // Order matters for the loop's prioritisation (older PRs first = + // smaller numbers first). Verify summarize doesn't sort or + // reorder under the hood. + const s = summarize([ + mkReport({ number: 999, nextAction: "wait-ci" }), + mkReport({ number: 100, nextAction: "wait-ci" }), + mkReport({ number: 500, nextAction: "wait-ci" }), + ]); + expect(s.actionable).toEqual([999, 100, 500]); + }); +}); + +describe("pollAllBounded with injected pollFn", () => { + test("returns outcomes in input order regardless of completion order", async () => { + // DST contract: even if pollFn resolves out of order (worker + // scheduling races), the `outcomes` array MUST be input-aligned + // by index. Without this guarantee, the per-PR JSON in stdout + // wouldn't match the input PR list, breaking caller assumptions. + const completionOrder: number[] = []; + const pollFn = (pr: number): Promise => + new Promise((res) => { + // First-in-last-out staggered delay: PR 1 takes longest, + // PR 5 finishes first. If outcomes were appended in + // completion order, the order would be [5,4,3,2,1] not [1..5]. + const delay = (6 - pr) * 5; + setTimeout(() => { + completionOrder.push(pr); + res({ number: pr, report: mkReport({ number: pr }) }); + }, delay); + }); + const outcomes = await pollAllBounded([1, 2, 3, 4, 5], "o", "r", 5, pollFn); + expect(outcomes.map((o) => o.number)).toEqual([1, 2, 3, 4, 5]); + // And confirm the staggered scheduling actually ran out-of-order: + expect(completionOrder).toEqual([5, 4, 3, 2, 1]); + }); + + test("respects concurrency bound — never more than N in flight", async () => { + let inFlight = 0; + let peak = 0; + const pollFn = (pr: number): Promise => + new Promise((res) => { + inFlight++; + peak = Math.max(peak, inFlight); + setTimeout(() => { + inFlight--; + res({ number: pr, report: mkReport({ number: pr }) }); + }, 10); + }); + await pollAllBounded([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "o", "r", 3, pollFn); + // The worker pool has 3 workers; peak should be exactly 3. + expect(peak).toBe(3); + }); + + test("worker count clamped to PR count when prs.length < concurrency", async () => { + // Prevents spawning useless idle workers when the input is + // smaller than the requested concurrency. + let peak = 0; + let inFlight = 0; + const pollFn = (pr: number): Promise => + new Promise((res) => { + inFlight++; + peak = Math.max(peak, inFlight); + setTimeout(() => { + inFlight--; + res({ number: pr, report: mkReport({ number: pr }) }); + }, 10); + }); + await pollAllBounded([42, 43], "o", "r", 16, pollFn); + expect(peak).toBe(2); + }); + + test("empty PR list resolves immediately with empty outcomes", async () => { + let called = false; + const pollFn = (): Promise => { + called = true; + return Promise.resolve({ number: 0, report: mkReport() }); + }; + const outcomes = await pollAllBounded([], "o", "r", 4, pollFn); + expect(outcomes).toEqual([]); + expect(called).toBe(false); + }); + + test("propagates errors as PollError outcomes without throwing", async () => { + // The loop must NEVER throw — a single PR's failure (auth, + // rate-limit, JSON parse) shouldn't cascade and lose the other + // PRs' results. Errors are surfaced as PollOutcome.error so the + // caller can partition success/failure deterministically. + const pollFn = (pr: number): Promise => + Promise.resolve( + pr === 99 + ? { number: pr, error: { number: pr, exitCode: 2, stderr: "auth fail" } } + : { number: pr, report: mkReport({ number: pr }) }, + ); + const outcomes = await pollAllBounded([1, 99, 2], "o", "r", 4, pollFn); + expect(outcomes).toHaveLength(3); + expect(outcomes[0]?.report?.number).toBe(1); + expect(outcomes[1]?.error?.exitCode).toBe(2); + expect(outcomes[2]?.report?.number).toBe(2); + }); + + test("converts a rejected pollFn promise into a PollError outcome", async () => { + // P0 invariant (Copilot review on PR #1153 2026-05-01): + // pollAllBounded must NEVER reject — even if pollFn throws or + // returns a rejected promise. The orchestrator's contract is + // that Promise.all(workers) always resolves; rejection from a + // single PR's poll converts to PollOutcome.error so the caller + // partitions success/failure deterministically. + const pollFn = (pr: number): Promise => + pr === 99 + ? Promise.reject(new Error("synthetic rejection")) + : Promise.resolve({ number: pr, report: mkReport({ number: pr }) }); + const outcomes = await pollAllBounded([1, 99, 2], "o", "r", 4, pollFn); + expect(outcomes).toHaveLength(3); + expect(outcomes[0]?.report?.number).toBe(1); + expect(outcomes[1]?.error?.exitCode).toBe(-1); + expect(outcomes[1]?.error?.stderr).toContain("synthetic rejection"); + expect(outcomes[2]?.report?.number).toBe(2); + }); +}); diff --git a/tools/github/poll-pr-gate-batch.ts b/tools/github/poll-pr-gate-batch.ts new file mode 100755 index 000000000..d888327ad --- /dev/null +++ b/tools/github/poll-pr-gate-batch.ts @@ -0,0 +1,441 @@ +#!/usr/bin/env bun +// poll-pr-gate-batch.ts — refresh world model across many PRs in one call. +// +// Wraps `poll-pr-gate.ts` (single-PR gate query) and runs it in parallel +// for a list of PR numbers, then aggregates the per-PR JSON reports +// into one stable, structured payload for the autonomous-loop tick or +// the conversation window. This is the multi-PR refresh-world-model +// tool — replaces ad-hoc `for n in 1 2 3; do gh pr view $n …; done` +// bash loops per the human maintainer's directive 2026-05-01: write a +// batch version of the ts that calls this one — not bash, not .sh, +// not .ps1, not dynamic bash. +// +// Origin: task #355 (5-AI peer convergence on poll-the-gate as +// executable script with fixtures); follow-on to v1 single-PR script. +// Full attribution lineage in: +// - memory/feedback_prefer_ts_scripts_over_dynamic_bash_for_conversation_ux_dst_in_ts_aaron_2026_05_01.md +// - memory/feedback_amara_poll_gate_not_ending_holding_is_not_status_2026_04_30.md +// - memory/feedback_first_class_for_us_not_for_our_host_portability_over_host_coupling_aaron_2026_05_01.md +// +// Usage: +// bun tools/github/poll-pr-gate-batch.ts ... +// bun tools/github/poll-pr-gate-batch.ts --all-open +// bun tools/github/poll-pr-gate-batch.ts --all-open --owner Lucent-Financial-Group --repo Zeta +// bun tools/github/poll-pr-gate-batch.ts --concurrency 4 1152 1145 1130 +// bun tools/github/poll-pr-gate-batch.ts --summary-only --all-open +// +// Output: one JSON object on stdout, shape: +// { +// "owner": "Lucent-Financial-Group", +// "repo": "Zeta", +// "queriedAt": "2026-05-01T20:30:00.000Z", +// "count": 3, +// "summary": { +// "byGate": { "CLEAN": 1, "BLOCKED": 1, "DIRTY": 0, "UNSTABLE": 0, "UNKNOWN": 1 }, +// "byNextAction": { "verify-merge": 1, "resolve-threads": 1, "wait-ci": 1, ... }, +// "byState": { "OPEN": 2, "MERGED": 1, "CLOSED": 0 }, +// "actionable": [1145, 1130], // PRs whose nextAction != "none" / "verify-merge" +// "warnings": ["#1145: non-required check failed: foo", ...] +// }, +// "reports": [ , , ... ], // one per PR, ordered by input +// "errors": [ { "number": 1149, "exitCode": 2, "stderr": "..." }, ... ] +// } +// +// Concurrency: defaults to 4 in-flight gh calls; each individual poll +// fans out to two gh subcommands plus pagination, so high concurrency +// can hit GitHub rate limits on a slow tick. Tune via --concurrency. +// +// Exit codes: +// 0 — all queries succeeded (errors[] is empty) +// 1 — invocation / argument error (bad args, no PRs given) +// 2 — at least one per-PR query failed (errors[] non-empty); the +// successful reports are still emitted in `reports`. Caller can +// distinguish full failure vs partial failure by checking +// `count === reports.length` vs `errors.length`. + +import { spawn, spawnSync } from "node:child_process"; +import { fileURLToPath } from "node:url"; +import { dirname, resolve } from "node:path"; + +export interface CheckCounts { + ok: number; + inProgress: number; + pending: number; + failed: number; +} + +export interface GateReport { + number: number; + state: string; + gate: "CLEAN" | "BLOCKED" | "DIRTY" | "UNSTABLE" | "UNKNOWN"; + checks: CheckCounts; + requiredChecks: CheckCounts; + unresolvedThreads: number; + autoMerge: "armed" | "none"; + mergeCommit: string | null; + warnings: string[]; + nextAction: + | "wait-ci" + | "fix-failed-checks" + | "resolve-threads" + | "rebase" + | "verify-merge" + | "none"; +} + +export interface PollError { + number: number; + exitCode: number; + stderr: string; +} + +export interface BatchSummary { + byGate: Record; + byNextAction: Record; + byState: Record; + actionable: number[]; + warnings: string[]; +} + +export interface BatchReport { + owner: string; + repo: string; + queriedAt: string; + count: number; + summary: BatchSummary; + reports: GateReport[]; + errors: PollError[]; +} + +interface ParsedArgs { + owner: string; + repo: string; + concurrency: number; + prs: number[]; + allOpen: boolean; + summaryOnly: boolean; +} + +const HERE = dirname(fileURLToPath(import.meta.url)); +const POLL_SCRIPT = resolve(HERE, "poll-pr-gate.ts"); + +function parseArgs(argv: string[]): ParsedArgs { + const out: ParsedArgs = { + owner: "Lucent-Financial-Group", + repo: "Zeta", + concurrency: 4, + prs: [], + allOpen: false, + summaryOnly: false, + }; + const requireValue = (flag: string, v: string | undefined): string => { + // Reject any value starting with `-` (not just `--`), so that + // `--owner -h` doesn't silently consume `-h` as the flag value. + // Matches the parsing pattern in tools/github/check-github-status.ts. + // (Copilot review on PR #1153 2026-05-01.) + if (v === undefined || v.startsWith("-")) { + process.stderr.write(`${flag} requires a value\n`); + process.exit(1); + } + return v; + }; + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]; + if (arg === undefined) continue; + if (arg === "--owner") { + out.owner = requireValue("--owner", argv[++i]); + } else if (arg === "--repo") { + out.repo = requireValue("--repo", argv[++i]); + } else if (arg === "--concurrency") { + const v = requireValue("--concurrency", argv[++i]); + const n = Number.parseInt(v, 10); + if (!Number.isFinite(n) || n <= 0) { + process.stderr.write(`--concurrency must be a positive integer (got ${v})\n`); + process.exit(1); + } + out.concurrency = n; + } else if (arg === "--all-open") { + out.allOpen = true; + } else if (arg === "--summary-only") { + out.summaryOnly = true; + } else if (arg === "--help" || arg === "-h") { + process.stdout.write( + "Usage: poll-pr-gate-batch.ts ...\n" + + " poll-pr-gate-batch.ts --all-open [--owner X] [--repo Y]\n" + + " poll-pr-gate-batch.ts --concurrency N \n" + + " poll-pr-gate-batch.ts --summary-only --all-open\n", + ); + process.exit(0); + } else if (/^\d+$/.test(arg)) { + const n = Number.parseInt(arg, 10); + if (n <= 0) { + process.stderr.write(`PR number must be a positive integer (got ${arg})\n`); + process.exit(1); + } + out.prs.push(n); + } else { + process.stderr.write(`unknown arg: ${arg}\n`); + process.exit(1); + } + } + if (!out.allOpen && out.prs.length === 0) { + process.stderr.write( + "must provide PR numbers or --all-open (try --help)\n", + ); + process.exit(1); + } + return out; +} + +function listOpenPRs(owner: string, repo: string): number[] { + // Enumerate all open PRs via `gh api --paginate` so repos with + // more than 1000 open PRs don't get silently truncated (Codex P2 + // on PR #1153, 2026-05-01). The paginated REST API follows Link + // headers automatically. + // + // Use `--jq '.[].number'` to project each page to one PR number + // per line (line-oriented primitive output) instead of trying to + // parse the page bodies as JSON. `gh api` may pretty-print page + // bodies across multiple lines for REST endpoints, breaking any + // line-split-then-JSON-parse approach (Codex P2 + Copilot P1 on + // PR #1153 2026-05-01). The jq filter sidesteps the issue + // entirely — stdout becomes a stream of integers, one per line. + const result = spawnSync( + "gh", + [ + "api", + "--paginate", + `/repos/${owner}/${repo}/pulls?state=open&per_page=100`, + "--jq", + ".[].number", + ], + { encoding: "utf8", maxBuffer: 32 * 1024 * 1024 }, + ); + if (result.error) { + process.stderr.write(`failed to launch gh: ${result.error.message}\n`); + process.exit(2); + } + if (result.status !== 0) { + process.stderr.write( + `gh api --paginate exited ${result.status}: ${result.stderr || result.stdout}\n`, + ); + process.exit(2); + } + // Each non-empty line is one PR number — guaranteed by the jq + // filter. Parse each as an integer; skip malformed lines defensively. + const all: number[] = []; + for (const line of result.stdout.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + const n = Number.parseInt(trimmed, 10); + if (Number.isFinite(n) && n > 0) all.push(n); + } + return all; +} + +export interface PollOutcome { + number: number; + report?: GateReport; + error?: PollError; +} + +function pollOne( + prNumber: number, + owner: string, + repo: string, +): Promise { + return new Promise((resolveOutcome) => { + // Spawn the existing single-PR script. Async spawn (not spawnSync) + // so Promise.all-style fan-out actually overlaps — gh CLI is the + // dominant cost; bun startup is ~50ms each but doesn't serialise. + // This is the literal "calls this one" pattern: child invocation + // via the same on-disk script the maintainer reaches for manually. + // Default stdio (omitted) gives ['pipe','pipe','pipe'] without + // the explicit-stdio TS narrowing problem: when `stdio` is + // explicitly specified, TypeScript types `child.stdout`/`stderr` + // as nullable, breaking the repo's strict tsc gate. Default + // pipes give non-null streams. Explicitly close stdin since we + // never write to it. (Copilot review on PR #1153 2026-05-01.) + const child = spawn("bun", [ + POLL_SCRIPT, + String(prNumber), + "--owner", + owner, + "--repo", + repo, + ]); + child.stdin.end(); + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + child.stdout.on("data", (c: Buffer) => stdoutChunks.push(c)); + child.stderr.on("data", (c: Buffer) => stderrChunks.push(c)); + child.on("error", (err) => { + resolveOutcome({ + number: prNumber, + error: { + number: prNumber, + exitCode: -1, + stderr: `spawn error: ${err.message}`, + }, + }); + }); + child.on("close", (code) => { + const exitCode = code ?? -1; + const stdout = Buffer.concat(stdoutChunks).toString("utf8"); + const stderr = Buffer.concat(stderrChunks).toString("utf8"); + if (exitCode !== 0) { + resolveOutcome({ + number: prNumber, + error: { number: prNumber, exitCode, stderr }, + }); + return; + } + try { + const report = JSON.parse(stdout) as GateReport; + resolveOutcome({ number: prNumber, report }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + resolveOutcome({ + number: prNumber, + error: { + number: prNumber, + exitCode: 3, + stderr: `JSON parse error: ${msg}\nfirst 200 bytes: ${stdout.slice(0, 200)}`, + }, + }); + } + }); + }); +} + +type PollFn = ( + prNumber: number, + owner: string, + repo: string, +) => Promise; + +export async function pollAllBounded( + prs: number[], + owner: string, + repo: string, + concurrency: number, + pollFn: PollFn = pollOne, +): Promise { + // Bounded concurrency to avoid hammering GitHub's rate limit. Each + // poll fans out to 2-3 gh calls internally; cap parallel polls so + // total in-flight gh calls stay well below the 5000/hr limit even + // on a packed queue. Order in `outcomes` matches input order. + // `pollFn` is injectable for DST: tests pass a synchronous pure + // function returning a fixed PollOutcome so orchestration runs + // deterministically without spawning gh. + const outcomes: PollOutcome[] = new Array(prs.length); + let cursor = 0; + const workers: Promise[] = []; + const workerCount = Math.min(concurrency, prs.length); + for (let w = 0; w < workerCount; w++) { + workers.push( + (async () => { + while (true) { + const idx = cursor++; + if (idx >= prs.length) return; + const pr = prs[idx]; + if (pr === undefined) continue; + // Wrap pollFn in try/catch so a throw or rejected promise + // from a single PR doesn't abort the whole batch + // (Copilot P0 review on PR #1153 2026-05-01). Convert any + // rejection into a PollOutcome.error entry; Promise.all + // on the workers always resolves. + try { + outcomes[idx] = await pollFn(pr, owner, repo); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + outcomes[idx] = { + number: pr, + error: { + number: pr, + exitCode: -1, + stderr: `pollFn rejected: ${msg}`, + }, + }; + } + } + })(), + ); + } + await Promise.all(workers); + return outcomes; +} + +export function summarize(reports: GateReport[]): BatchSummary { + const byGate: Record = {}; + const byNextAction: Record = {}; + const byState: Record = {}; + const actionable: number[] = []; + const warnings: string[] = []; + for (const r of reports) { + byGate[r.gate] = (byGate[r.gate] ?? 0) + 1; + byNextAction[r.nextAction] = (byNextAction[r.nextAction] ?? 0) + 1; + byState[r.state] = (byState[r.state] ?? 0) + 1; + if (r.nextAction !== "none" && r.nextAction !== "verify-merge") { + actionable.push(r.number); + } + for (const w of r.warnings) { + warnings.push(`#${r.number}: ${w}`); + } + } + return { byGate, byNextAction, byState, actionable, warnings }; +} + +export async function main(argv: string[]): Promise { + const args = parseArgs(argv); + const prs = args.allOpen ? listOpenPRs(args.owner, args.repo) : args.prs; + if (prs.length === 0) { + // --all-open with no open PRs is a valid empty result, not an error. + const empty: BatchReport = { + owner: args.owner, + repo: args.repo, + queriedAt: new Date().toISOString(), + count: 0, + summary: { + byGate: {}, + byNextAction: {}, + byState: {}, + actionable: [], + warnings: [], + }, + reports: [], + errors: [], + }; + process.stdout.write(`${JSON.stringify(empty, null, 2)}\n`); + return 0; + } + const outcomes = await pollAllBounded(prs, args.owner, args.repo, args.concurrency); + const reports: GateReport[] = []; + const errors: PollError[] = []; + for (const o of outcomes) { + if (o.report) reports.push(o.report); + if (o.error) errors.push(o.error); + } + const batch: BatchReport = { + owner: args.owner, + repo: args.repo, + queriedAt: new Date().toISOString(), + count: prs.length, + summary: summarize(reports), + reports: args.summaryOnly ? [] : reports, + errors, + }; + process.stdout.write(`${JSON.stringify(batch, null, 2)}\n`); + return errors.length > 0 ? 2 : 0; +} + +if (import.meta.main) { + main(process.argv.slice(2)).then( + (code) => process.exit(code), + (err) => { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`unhandled error: ${msg}\n`); + process.exit(1); + }, + ); +}