diff --git a/packages/dispatcher/src/poller-gateway.ts b/packages/dispatcher/src/poller-gateway.ts index ebbf85da..00dfa359 100644 --- a/packages/dispatcher/src/poller-gateway.ts +++ b/packages/dispatcher/src/poller-gateway.ts @@ -1,4 +1,5 @@ import type { + CiStatus, EpicPrLifecycle, GitHubPollGateway, IssueComment, @@ -7,6 +8,47 @@ import type { RateLimitStatus, } from "./poller.ts"; +/** + * One `statusCheckRollup` entry as `gh pr view` returns it. A **CheckRun** + * (GitHub Actions / most apps) reports `status` + `conclusion`; a **StatusContext** + * (legacy commit statuses) reports a single `state`. We read whichever is present. + */ +export type CheckRollupEntry = { + status?: string; // CheckRun: QUEUED | IN_PROGRESS | COMPLETED + conclusion?: string; // CheckRun: SUCCESS | FAILURE | NEUTRAL | CANCELLED | TIMED_OUT | ... + state?: string; // StatusContext: SUCCESS | FAILURE | ERROR | PENDING +}; + +const CONCLUSION_OK = new Set(["SUCCESS", "NEUTRAL", "SKIPPED"]); +const STATE_OK = new Set(["SUCCESS", "EXPECTED"]); + +/** + * Collapse a `statusCheckRollup` array into a single {@link CiStatus}. Any + * failing check ⇒ `failing` (a red build can't be reviewed); else any still- + * running check ⇒ `pending`; else `passing`. An empty/absent rollup ⇒ `none` + * (no checks configured — nothing to gate on). Pure, so it's unit-tested directly. + */ +export function deriveCiStatus(rollup: CheckRollupEntry[] | null | undefined): CiStatus { + if (!rollup || rollup.length === 0) return "none"; + let pending = false; + for (const c of rollup) { + if (c.status !== undefined && c.status !== "COMPLETED") { + pending = true; // a CheckRun that hasn't finished + continue; + } + if (c.conclusion !== undefined) { + if (!CONCLUSION_OK.has(c.conclusion)) return "failing"; + continue; + } + // StatusContext (no status/conclusion) — read `state`. + if (c.state !== undefined) { + if (c.state === "PENDING") pending = true; + else if (!STATE_OK.has(c.state)) return "failing"; // FAILURE / ERROR + } + } + return pending ? "pending" : "passing"; +} + /** * The production {@link GitHubPollGateway} — reads issue comments and PR review * state through the `gh` CLI. The poller's logic is unit-tested against an @@ -88,11 +130,12 @@ export const ghPollGateway: GitHubPollGateway = { "--repo", repo, "--json", - "reviewDecision,labels", + "reviewDecision,labels,statusCheckRollup", ]); const view = JSON.parse(viewOut) as { reviewDecision: string | null; labels: Array<{ name: string }>; + statusCheckRollup: CheckRollupEntry[] | null; }; const reviewsOut = await gh([ @@ -125,6 +168,7 @@ export const ghPollGateway: GitHubPollGateway = { reviewDecision: view.reviewDecision ?? null, reviews, labels: view.labels.map((l) => l.name), + ci: deriveCiStatus(view.statusCheckRollup), }; }, diff --git a/packages/dispatcher/src/poller.ts b/packages/dispatcher/src/poller.ts index 7b54e56e..c5d710ab 100644 --- a/packages/dispatcher/src/poller.ts +++ b/packages/dispatcher/src/poller.ts @@ -47,14 +47,28 @@ export type PrReview = { body: string; }; +/** + * A PR's CI standing, collapsed from `statusCheckRollup`: + * - `passing` — every required check succeeded (or was neutral/skipped). + * - `failing` — at least one check failed/errored/was cancelled. + * - `pending` — nothing failed yet, but a check is still running/queued. + * - `none` — no checks are configured on the PR (nothing to gate on). + */ +export type CiStatus = "passing" | "failing" | "pending" | "none"; + /** A PR's review-relevant snapshot. */ export type PrSnapshot = { number: number; reviewDecision: string | null; // 'APPROVED' | 'CHANGES_REQUESTED' | 'REVIEW_REQUIRED' | null reviews: PrReview[]; labels: string[]; + /** CI rollup. Absent (legacy/test fixtures) is treated as `none` — non-blocking. */ + ci?: CiStatus; }; +/** The synthetic `decision` a CI-failure resume carries, distinct from a review `CHANGES_REQUESTED`. */ +export const CI_FAILED_DECISION = "CI_FAILED"; + /** GitHub's remaining REST budget and when it resets (epoch ms). */ export type RateLimitStatus = { remaining: number; resetAt: number }; @@ -159,17 +173,44 @@ export function classifyNewHumanReply( return fresh[0] ?? null; } +/** The resume verdict a classification resolves to (or null = nothing actionable). */ +type ReviewVerdict = { outcome: ReviewOutcome; reviewId: number | null; decision: string | null }; + /** - * Classify the PR's review state into a resume verdict, or null when nothing - * actionable has changed since the wait armed. The newest review submitted this + * Classify the PR into a resume verdict, **CI-gated**: a PR isn't reviewable + * until it builds, so CI standing participates in the verdict alongside reviews. + * + * Precedence: + * 1. **Explicit review feedback wins** — a `CHANGES_REQUESTED` (review or label) + * resumes for the feedback; addressing it should also green CI, so don't + * pre-empt it with a CI nudge. + * 2. **Red CI is its own resume trigger** — failing checks with no outstanding + * review feedback resume the agent to fix CI ({@link CI_FAILED_DECISION}). + * 3. **A resolve is gated on green** — an `APPROVED`/0-actionable verdict while + * checks are still `pending` is held (null) until they finish, so the loop + * never ends on a PR whose CI hasn't reported. `passing`/`none` resolves. + * + * Absent CI (`undefined` → `none`) is non-blocking, so the pre-CI review loop is + * unchanged. + */ +export function classifyReviewOutcome(snapshot: PrSnapshot, sinceMs: number): ReviewVerdict | null { + const review = classifyReviewVerdict(snapshot, sinceMs); + const ci = snapshot.ci ?? "none"; + if (review?.outcome === "changes-requested") return review; + if (ci === "failing") { + return { outcome: "changes-requested", reviewId: null, decision: CI_FAILED_DECISION }; + } + if (review?.outcome === "resolved") return ci === "pending" ? null : review; + return review; +} + +/** + * The review-only verdict (no CI gating) — the newest review submitted this * round is authoritative; a 0-actionable re-review counts as **resolved** even * while the PR's `reviewDecision` still reads `CHANGES_REQUESTED`. Falls back to * the standing decision / `changes-requested` label when no fresh review exists. */ -export function classifyReviewOutcome( - snapshot: PrSnapshot, - sinceMs: number, -): { outcome: ReviewOutcome; reviewId: number | null; decision: string | null } | null { +function classifyReviewVerdict(snapshot: PrSnapshot, sinceMs: number): ReviewVerdict | null { const fresh = snapshot.reviews .filter((r) => r.submittedAt > sinceMs) .sort((a, b) => b.submittedAt - a.submittedAt); diff --git a/packages/dispatcher/src/workflows/implementation.ts b/packages/dispatcher/src/workflows/implementation.ts index fa379a8e..84a60ecd 100644 --- a/packages/dispatcher/src/workflows/implementation.ts +++ b/packages/dispatcher/src/workflows/implementation.ts @@ -6,7 +6,7 @@ import { Workflow } from "bunqueue/workflow"; import type { StepContext } from "bunqueue/workflow"; import { type PlanCommentReader, verifyPlanComment } from "../gates/plan-comment.ts"; import type { SessionGate } from "../hook-server.ts"; -import type { ResumeSignalPayload } from "../poller.ts"; +import { CI_FAILED_DECISION, type ResumeSignalPayload } from "../poller.ts"; import { markAvailableOnSuccess, parseResetAt, setRateLimited } from "../rate-limits.ts"; import type { CreateWorktreeOpts, WorktreeHandle } from "../worktree.ts"; import { @@ -326,6 +326,34 @@ ${operatingRules}`, // review-changes const decision = resume.payload.reason === "review-changes" ? resume.payload.decision : null; + + // CI-failure resume (#CI gate): a red build, not review feedback. The agent + // pulls the failing checks itself and fixes them — a distinct brief from the + // address-review one (there are no review threads to work, just broken CI). + if (decision === CI_FAILED_DECISION) { + writeFileSync( + promptPath, + `# middle dispatch brief — Epic #${epicNumber} (resumed: CI is failing — round ${resume.round} of ${reviewRoundCap}) + +The PR's CI is **red** — a PR can't be reviewed until it builds. Investigate and +fix the failing checks now: + +1. Pull the failing checks yourself: \`gh pr checks\` for the rollup, then + \`gh run view --log-failed\` (or the check's details URL) to read the + actual failure — don't guess from the check name. +2. Reproduce locally where you can (run the failing gate/test), fix the **cause**, + and add a regression test if the failure was a real defect, not flake. +3. **Push once** for the whole fix, then stop. The workflow re-parks and re-checks + CI on the next poll; a green build (plus review) is what ends the loop. + +This is round ${resume.round} of ${reviewRoundCap}. After ${reviewRoundCap} rounds without resolution the +workflow parks for a human and stops auto-resuming. + +${operatingRules}`, + ); + return; + } + writeFileSync( promptPath, `# middle dispatch brief — Epic #${epicNumber} (resumed: address review — round ${resume.round} of ${reviewRoundCap}) diff --git a/packages/dispatcher/test/implementation-workflow.test.ts b/packages/dispatcher/test/implementation-workflow.test.ts index 6434ba93..4bbba9ef 100644 --- a/packages/dispatcher/test/implementation-workflow.test.ts +++ b/packages/dispatcher/test/implementation-workflow.test.ts @@ -192,6 +192,12 @@ const APPROVED = { reviewId: 2, decision: "APPROVED", }; +const CI_FAILED = { + reason: "review-changes" as const, + outcome: "changes-requested" as const, + reviewId: null, + decision: "CI_FAILED", +}; /** No session leak: every tmux session that was created was also killed. */ function expectNoSessionLeak(tmux: { created: string[]; killed: string[] }): void { @@ -582,6 +588,30 @@ describe("implementation workflow — done park → review-changes → resume (e expectNoSessionLeak(tmux); }); + test("a CI_FAILED verdict resumes a continuation with the fix-CI brief (not the address-review one)", async () => { + const prompts: string[] = []; + const adapter = makeAdapterStub({ kind: "done" }, prompts); + const { deps, continuationIds } = withContinuations({ getAdapter: () => adapter }); + const id0 = await start(deps); + await awaitParked(id0); + + // Red CI (no review feedback) resumes the agent to fix the build. + await engine.signal(id0, RESUME_EVENT, CI_FAILED); + expect(await awaitSettled(id0)).toBe("completed"); + + const id1 = await awaitContinuation(continuationIds, 0); + await awaitParked(id1); + const brief = readPromptBrief(id1); + expect(brief).toContain("CI is failing — round 1 of 5"); + expect(brief).toContain("gh pr checks"); + expect(brief).toContain("Push once"); + // It is NOT the address-review brief — there are no review threads to work. + expect(brief).not.toContain("Addressing review feedback"); + + await engine.signal(id1, RESUME_EVENT, APPROVED); + expect(await awaitSettled(id1)).toBe("completed"); + }); + test("a resolved review reverts a previously RATE_LIMITED adapter to AVAILABLE", async () => { setRateLimited(db, { adapter: "stub", diff --git a/packages/dispatcher/test/poller-gateway.test.ts b/packages/dispatcher/test/poller-gateway.test.ts new file mode 100644 index 00000000..e3e8cb6b --- /dev/null +++ b/packages/dispatcher/test/poller-gateway.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, test } from "bun:test"; +import { type CheckRollupEntry, deriveCiStatus } from "../src/poller-gateway.ts"; + +/** A finished GitHub Actions check run. */ +function run(conclusion: string): CheckRollupEntry { + return { status: "COMPLETED", conclusion }; +} + +describe("deriveCiStatus", () => { + test("no checks configured → none (nothing to gate on)", () => { + expect(deriveCiStatus(null)).toBe("none"); + expect(deriveCiStatus([])).toBe("none"); + }); + + test("all check runs succeeded (incl. neutral/skipped) → passing", () => { + expect(deriveCiStatus([run("SUCCESS"), run("NEUTRAL"), run("SKIPPED")])).toBe("passing"); + }); + + test("any failed/errored/cancelled/timed-out check → failing", () => { + expect(deriveCiStatus([run("SUCCESS"), run("FAILURE")])).toBe("failing"); + expect(deriveCiStatus([run("TIMED_OUT")])).toBe("failing"); + expect(deriveCiStatus([run("CANCELLED")])).toBe("failing"); + expect(deriveCiStatus([run("ACTION_REQUIRED")])).toBe("failing"); + }); + + test("an unfinished check run (not COMPLETED) → pending", () => { + expect(deriveCiStatus([run("SUCCESS"), { status: "IN_PROGRESS" }])).toBe("pending"); + expect(deriveCiStatus([{ status: "QUEUED" }])).toBe("pending"); + }); + + test("a failure outranks a still-running check → failing", () => { + expect(deriveCiStatus([{ status: "IN_PROGRESS" }, run("FAILURE")])).toBe("failing"); + }); + + test("legacy StatusContext entries (state) are read too", () => { + expect(deriveCiStatus([{ state: "SUCCESS" }])).toBe("passing"); + expect(deriveCiStatus([{ state: "PENDING" }])).toBe("pending"); + expect(deriveCiStatus([{ state: "FAILURE" }])).toBe("failing"); + expect(deriveCiStatus([{ state: "ERROR" }])).toBe("failing"); + }); +}); diff --git a/packages/dispatcher/test/poller.test.ts b/packages/dispatcher/test/poller.test.ts index 45114ee2..f7f2bf0b 100644 --- a/packages/dispatcher/test/poller.test.ts +++ b/packages/dispatcher/test/poller.test.ts @@ -246,6 +246,51 @@ describe("classifyReviewOutcome", () => { }); }); +describe("classifyReviewOutcome — CI gate", () => { + test("failing CI with no review feedback → resume to fix CI (CI_FAILED)", () => { + const v = classifyReviewOutcome(prSnapshot({ ci: "failing" }), ARMED_AT); + expect(v).toEqual({ outcome: "changes-requested", reviewId: null, decision: "CI_FAILED" }); + }); + + test("an APPROVED review while CI is still pending is held (null) — don't end on un-built CI", () => { + const v = classifyReviewOutcome( + prSnapshot({ reviewDecision: "APPROVED", ci: "pending" }), + ARMED_AT, + ); + expect(v).toBeNull(); + }); + + test("an APPROVED review with passing CI resolves", () => { + const v = classifyReviewOutcome( + prSnapshot({ reviewDecision: "APPROVED", ci: "passing" }), + ARMED_AT, + ); + expect(v).toEqual({ outcome: "resolved", reviewId: null, decision: "APPROVED" }); + }); + + test("explicit review feedback wins over red CI (address the review, which greens CI)", () => { + const v = classifyReviewOutcome( + prSnapshot({ labels: ["changes-requested"], ci: "failing" }), + ARMED_AT, + ); + expect(v).toEqual({ + outcome: "changes-requested", + reviewId: null, + decision: "CHANGES_REQUESTED", + }); + }); + + test("absent CI (`none`) is non-blocking — the pre-CI review loop is unchanged", () => { + const v = classifyReviewOutcome(prSnapshot({ reviewDecision: "APPROVED" }), ARMED_AT); + expect(v).toEqual({ outcome: "resolved", reviewId: null, decision: "APPROVED" }); + }); + + test("failing CI but no PR change and no review → still CI_FAILED (red build is actionable)", () => { + const v = classifyReviewOutcome(prSnapshot({ ci: "failing", reviewDecision: null }), ARMED_AT); + expect(v?.decision).toBe("CI_FAILED"); + }); +}); + describe("runPoller — answered-question", () => { test("a new human reply fires epic--answered exactly once (idempotent across passes)", async () => { const id = seedParked("answered-question");