thejustinwalsh · thejustinwalsh · May 25, 2026
diff --git a/packages/dispatcher/src/poller-gateway.ts b/packages/dispatcher/src/poller-gateway.ts
@@ -1,4 +1,5 @@
 import type {
+  CiStatus,
   EpicPrLifecycle,
   GitHubPollGateway,
   IssueComment,
@@ -7,6 +8,47 @@ import type {
   RateLimitStatus,
 } from "./poller.ts";
 
+/**
+ * One `statusCheckRollup` entry as `gh pr view` returns it. A **CheckRun**
+ * (GitHub Actions / most apps) reports `status` + `conclusion`; a **StatusContext**
+ * (legacy commit statuses) reports a single `state`. We read whichever is present.
+ */
+export type CheckRollupEntry = {
+  status?: string; // CheckRun: QUEUED | IN_PROGRESS | COMPLETED
+  conclusion?: string; // CheckRun: SUCCESS | FAILURE | NEUTRAL | CANCELLED | TIMED_OUT | ...
+  state?: string; // StatusContext: SUCCESS | FAILURE | ERROR | PENDING
+};
+
+const CONCLUSION_OK = new Set(["SUCCESS", "NEUTRAL", "SKIPPED"]);
+const STATE_OK = new Set(["SUCCESS", "EXPECTED"]);
+
+/**
+ * Collapse a `statusCheckRollup` array into a single {@link CiStatus}. Any
+ * failing check ⇒ `failing` (a red build can't be reviewed); else any still-
+ * running check ⇒ `pending`; else `passing`. An empty/absent rollup ⇒ `none`
+ * (no checks configured — nothing to gate on). Pure, so it's unit-tested directly.
+ */
+export function deriveCiStatus(rollup: CheckRollupEntry[] | null | undefined): CiStatus {
+  if (!rollup || rollup.length === 0) return "none";
+  let pending = false;
+  for (const c of rollup) {
+    if (c.status !== undefined && c.status !== "COMPLETED") {
+      pending = true; // a CheckRun that hasn't finished
+      continue;
+    }
+    if (c.conclusion !== undefined) {
+      if (!CONCLUSION_OK.has(c.conclusion)) return "failing";
+      continue;
+    }
+    // StatusContext (no status/conclusion) — read `state`.
+    if (c.state !== undefined) {
+      if (c.state === "PENDING") pending = true;
+      else if (!STATE_OK.has(c.state)) return "failing"; // FAILURE / ERROR
+    }
+  }
+  return pending ? "pending" : "passing";
+}
+
 /**
  * The production {@link GitHubPollGateway} — reads issue comments and PR review
  * state through the `gh` CLI. The poller's logic is unit-tested against an
@@ -88,11 +130,12 @@ export const ghPollGateway: GitHubPollGateway = {
       "--repo",
       repo,
       "--json",
-      "reviewDecision,labels",
+      "reviewDecision,labels,statusCheckRollup",
     ]);
     const view = JSON.parse(viewOut) as {
       reviewDecision: string | null;
       labels: Array<{ name: string }>;
+      statusCheckRollup: CheckRollupEntry[] | null;
     };
 
     const reviewsOut = await gh([
@@ -125,6 +168,7 @@ export const ghPollGateway: GitHubPollGateway = {
       reviewDecision: view.reviewDecision ?? null,
       reviews,
       labels: view.labels.map((l) => l.name),
+      ci: deriveCiStatus(view.statusCheckRollup),
     };
   },
 

diff --git a/packages/dispatcher/src/poller.ts b/packages/dispatcher/src/poller.ts
@@ -47,14 +47,28 @@ export type PrReview = {
   body: string;
 };
 
+/**
+ * A PR's CI standing, collapsed from `statusCheckRollup`:
+ * - `passing` — every required check succeeded (or was neutral/skipped).
+ * - `failing` — at least one check failed/errored/was cancelled.
+ * - `pending` — nothing failed yet, but a check is still running/queued.
+ * - `none` — no checks are configured on the PR (nothing to gate on).
+ */
+export type CiStatus = "passing" | "failing" | "pending" | "none";
+
 /** A PR's review-relevant snapshot. */
 export type PrSnapshot = {
   number: number;
   reviewDecision: string | null; // 'APPROVED' | 'CHANGES_REQUESTED' | 'REVIEW_REQUIRED' | null
   reviews: PrReview[];
   labels: string[];
+  /** CI rollup. Absent (legacy/test fixtures) is treated as `none` — non-blocking. */
+  ci?: CiStatus;
 };
 
+/** The synthetic `decision` a CI-failure resume carries, distinct from a review `CHANGES_REQUESTED`. */
+export const CI_FAILED_DECISION = "CI_FAILED";
+
 /** GitHub's remaining REST budget and when it resets (epoch ms). */
 export type RateLimitStatus = { remaining: number; resetAt: number };
 
@@ -159,17 +173,44 @@ export function classifyNewHumanReply(
   return fresh[0] ?? null;
 }
 
+/** The resume verdict a classification resolves to (or null = nothing actionable). */
+type ReviewVerdict = { outcome: ReviewOutcome; reviewId: number | null; decision: string | null };
+
 /**
- * Classify the PR's review state into a resume verdict, or null when nothing
- * actionable has changed since the wait armed. The newest review submitted this
+ * Classify the PR into a resume verdict, **CI-gated**: a PR isn't reviewable
+ * until it builds, so CI standing participates in the verdict alongside reviews.
+ *
+ * Precedence:
+ *  1. **Explicit review feedback wins** — a `CHANGES_REQUESTED` (review or label)
+ *     resumes for the feedback; addressing it should also green CI, so don't
+ *     pre-empt it with a CI nudge.
+ *  2. **Red CI is its own resume trigger** — failing checks with no outstanding
+ *     review feedback resume the agent to fix CI ({@link CI_FAILED_DECISION}).
+ *  3. **A resolve is gated on green** — an `APPROVED`/0-actionable verdict while
+ *     checks are still `pending` is held (null) until they finish, so the loop
+ *     never ends on a PR whose CI hasn't reported. `passing`/`none` resolves.
+ *
+ * Absent CI (`undefined` → `none`) is non-blocking, so the pre-CI review loop is
+ * unchanged.
+ */
+export function classifyReviewOutcome(snapshot: PrSnapshot, sinceMs: number): ReviewVerdict | null {
+  const review = classifyReviewVerdict(snapshot, sinceMs);
+  const ci = snapshot.ci ?? "none";
+  if (review?.outcome === "changes-requested") return review;
+  if (ci === "failing") {
+    return { outcome: "changes-requested", reviewId: null, decision: CI_FAILED_DECISION };
+  }
+  if (review?.outcome === "resolved") return ci === "pending" ? null : review;
+  return review;
+}
+
+/**
+ * The review-only verdict (no CI gating) — the newest review submitted this
  * round is authoritative; a 0-actionable re-review counts as **resolved** even
  * while the PR's `reviewDecision` still reads `CHANGES_REQUESTED`. Falls back to
  * the standing decision / `changes-requested` label when no fresh review exists.
  */
-export function classifyReviewOutcome(
-  snapshot: PrSnapshot,
-  sinceMs: number,
-): { outcome: ReviewOutcome; reviewId: number | null; decision: string | null } | null {
+function classifyReviewVerdict(snapshot: PrSnapshot, sinceMs: number): ReviewVerdict | null {
   const fresh = snapshot.reviews
     .filter((r) => r.submittedAt > sinceMs)
     .sort((a, b) => b.submittedAt - a.submittedAt);

diff --git a/packages/dispatcher/src/workflows/implementation.ts b/packages/dispatcher/src/workflows/implementation.ts
@@ -6,7 +6,7 @@ import { Workflow } from "bunqueue/workflow";
 import type { StepContext } from "bunqueue/workflow";
 import { type PlanCommentReader, verifyPlanComment } from "../gates/plan-comment.ts";
 import type { SessionGate } from "../hook-server.ts";
-import type { ResumeSignalPayload } from "../poller.ts";
+import { CI_FAILED_DECISION, type ResumeSignalPayload } from "../poller.ts";
 import { markAvailableOnSuccess, parseResetAt, setRateLimited } from "../rate-limits.ts";
 import type { CreateWorktreeOpts, WorktreeHandle } from "../worktree.ts";
 import {
@@ -326,6 +326,34 @@ ${operatingRules}`,
 
   // review-changes
   const decision = resume.payload.reason === "review-changes" ? resume.payload.decision : null;
+
+  // CI-failure resume (#CI gate): a red build, not review feedback. The agent
+  // pulls the failing checks itself and fixes them — a distinct brief from the
+  // address-review one (there are no review threads to work, just broken CI).
+  if (decision === CI_FAILED_DECISION) {
+    writeFileSync(
+      promptPath,
+      `# middle dispatch brief — Epic #${epicNumber} (resumed: CI is failing — round ${resume.round} of ${reviewRoundCap})
+
+The PR's CI is **red** — a PR can't be reviewed until it builds. Investigate and
+fix the failing checks now:
+
+1. Pull the failing checks yourself: \`gh pr checks\` for the rollup, then
+   \`gh run view <run-id> --log-failed\` (or the check's details URL) to read the
+   actual failure — don't guess from the check name.
+2. Reproduce locally where you can (run the failing gate/test), fix the **cause**,
+   and add a regression test if the failure was a real defect, not flake.
+3. **Push once** for the whole fix, then stop. The workflow re-parks and re-checks
+   CI on the next poll; a green build (plus review) is what ends the loop.
+
+This is round ${resume.round} of ${reviewRoundCap}. After ${reviewRoundCap} rounds without resolution the
+workflow parks for a human and stops auto-resuming.
+
+${operatingRules}`,
+    );
+    return;
+  }
+
   writeFileSync(
     promptPath,
     `# middle dispatch brief — Epic #${epicNumber} (resumed: address review — round ${resume.round} of ${reviewRoundCap})

diff --git a/packages/dispatcher/test/implementation-workflow.test.ts b/packages/dispatcher/test/implementation-workflow.test.ts
@@ -192,6 +192,12 @@ const APPROVED = {
   reviewId: 2,
   decision: "APPROVED",
 };
+const CI_FAILED = {
+  reason: "review-changes" as const,
+  outcome: "changes-requested" as const,
+  reviewId: null,
+  decision: "CI_FAILED",
+};
 
 /** No session leak: every tmux session that was created was also killed. */
 function expectNoSessionLeak(tmux: { created: string[]; killed: string[] }): void {
@@ -582,6 +588,30 @@ describe("implementation workflow — done park → review-changes → resume (e
     expectNoSessionLeak(tmux);
   });
 
+  test("a CI_FAILED verdict resumes a continuation with the fix-CI brief (not the address-review one)", async () => {
+    const prompts: string[] = [];
+    const adapter = makeAdapterStub({ kind: "done" }, prompts);
+    const { deps, continuationIds } = withContinuations({ getAdapter: () => adapter });
+    const id0 = await start(deps);
+    await awaitParked(id0);
+
+    // Red CI (no review feedback) resumes the agent to fix the build.
+    await engine.signal(id0, RESUME_EVENT, CI_FAILED);
+    expect(await awaitSettled(id0)).toBe("completed");
+
+    const id1 = await awaitContinuation(continuationIds, 0);
+    await awaitParked(id1);
+    const brief = readPromptBrief(id1);
+    expect(brief).toContain("CI is failing — round 1 of 5");
+    expect(brief).toContain("gh pr checks");
+    expect(brief).toContain("Push once");
+    // It is NOT the address-review brief — there are no review threads to work.
+    expect(brief).not.toContain("Addressing review feedback");
+
+    await engine.signal(id1, RESUME_EVENT, APPROVED);
+    expect(await awaitSettled(id1)).toBe("completed");
+  });
+
   test("a resolved review reverts a previously RATE_LIMITED adapter to AVAILABLE", async () => {
     setRateLimited(db, {
       adapter: "stub",

diff --git a/packages/dispatcher/test/poller-gateway.test.ts b/packages/dispatcher/test/poller-gateway.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, test } from "bun:test";
+import { type CheckRollupEntry, deriveCiStatus } from "../src/poller-gateway.ts";
+
+/** A finished GitHub Actions check run. */
+function run(conclusion: string): CheckRollupEntry {
+  return { status: "COMPLETED", conclusion };
+}
+
+describe("deriveCiStatus", () => {
+  test("no checks configured → none (nothing to gate on)", () => {
+    expect(deriveCiStatus(null)).toBe("none");
+    expect(deriveCiStatus([])).toBe("none");
+  });
+
+  test("all check runs succeeded (incl. neutral/skipped) → passing", () => {
+    expect(deriveCiStatus([run("SUCCESS"), run("NEUTRAL"), run("SKIPPED")])).toBe("passing");
+  });
+
+  test("any failed/errored/cancelled/timed-out check → failing", () => {
+    expect(deriveCiStatus([run("SUCCESS"), run("FAILURE")])).toBe("failing");
+    expect(deriveCiStatus([run("TIMED_OUT")])).toBe("failing");
+    expect(deriveCiStatus([run("CANCELLED")])).toBe("failing");
+    expect(deriveCiStatus([run("ACTION_REQUIRED")])).toBe("failing");
+  });
+
+  test("an unfinished check run (not COMPLETED) → pending", () => {
+    expect(deriveCiStatus([run("SUCCESS"), { status: "IN_PROGRESS" }])).toBe("pending");
+    expect(deriveCiStatus([{ status: "QUEUED" }])).toBe("pending");
+  });
+
+  test("a failure outranks a still-running check → failing", () => {
+    expect(deriveCiStatus([{ status: "IN_PROGRESS" }, run("FAILURE")])).toBe("failing");
+  });
+
+  test("legacy StatusContext entries (state) are read too", () => {
+    expect(deriveCiStatus([{ state: "SUCCESS" }])).toBe("passing");
+    expect(deriveCiStatus([{ state: "PENDING" }])).toBe("pending");
+    expect(deriveCiStatus([{ state: "FAILURE" }])).toBe("failing");
+    expect(deriveCiStatus([{ state: "ERROR" }])).toBe("failing");
+  });
+});
diff --git a/packages/dispatcher/test/poller.test.ts b/packages/dispatcher/test/poller.test.ts
@@ -246,6 +246,51 @@ describe("classifyReviewOutcome", () => {
   });
 });
 
+describe("classifyReviewOutcome — CI gate", () => {
+  test("failing CI with no review feedback → resume to fix CI (CI_FAILED)", () => {
+    const v = classifyReviewOutcome(prSnapshot({ ci: "failing" }), ARMED_AT);
+    expect(v).toEqual({ outcome: "changes-requested", reviewId: null, decision: "CI_FAILED" });
+  });
+
+  test("an APPROVED review while CI is still pending is held (null) — don't end on un-built CI", () => {
+    const v = classifyReviewOutcome(
+      prSnapshot({ reviewDecision: "APPROVED", ci: "pending" }),
+      ARMED_AT,
+    );
+    expect(v).toBeNull();
+  });
+
+  test("an APPROVED review with passing CI resolves", () => {
+    const v = classifyReviewOutcome(
+      prSnapshot({ reviewDecision: "APPROVED", ci: "passing" }),
+      ARMED_AT,
+    );
+    expect(v).toEqual({ outcome: "resolved", reviewId: null, decision: "APPROVED" });
+  });
+
+  test("explicit review feedback wins over red CI (address the review, which greens CI)", () => {
+    const v = classifyReviewOutcome(
+      prSnapshot({ labels: ["changes-requested"], ci: "failing" }),
+      ARMED_AT,
+    );
+    expect(v).toEqual({
+      outcome: "changes-requested",
+      reviewId: null,
+      decision: "CHANGES_REQUESTED",
+    });
+  });
+
+  test("absent CI (`none`) is non-blocking — the pre-CI review loop is unchanged", () => {
+    const v = classifyReviewOutcome(prSnapshot({ reviewDecision: "APPROVED" }), ARMED_AT);
+    expect(v).toEqual({ outcome: "resolved", reviewId: null, decision: "APPROVED" });
+  });
+
+  test("failing CI but no PR change and no review → still CI_FAILED (red build is actionable)", () => {
+    const v = classifyReviewOutcome(prSnapshot({ ci: "failing", reviewDecision: null }), ARMED_AT);
+    expect(v?.decision).toBe("CI_FAILED");
+  });
+});
+
 describe("runPoller — answered-question", () => {
   test("a new human reply fires epic-<n>-answered exactly once (idempotent across passes)", async () => {
     const id = seedParked("answered-question");