Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion packages/dispatcher/src/poller-gateway.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type {
CiStatus,
EpicPrLifecycle,
GitHubPollGateway,
IssueComment,
Expand All @@ -7,6 +8,47 @@ import type {
RateLimitStatus,
} from "./poller.ts";

/**
* One `statusCheckRollup` entry as `gh pr view` returns it. A **CheckRun**
* (GitHub Actions / most apps) reports `status` + `conclusion`; a **StatusContext**
* (legacy commit statuses) reports a single `state`. We read whichever is present.
*/
export type CheckRollupEntry = {
status?: string; // CheckRun: QUEUED | IN_PROGRESS | COMPLETED
conclusion?: string; // CheckRun: SUCCESS | FAILURE | NEUTRAL | CANCELLED | TIMED_OUT | ...
state?: string; // StatusContext: SUCCESS | FAILURE | ERROR | PENDING
};

const CONCLUSION_OK = new Set(["SUCCESS", "NEUTRAL", "SKIPPED"]);
const STATE_OK = new Set(["SUCCESS", "EXPECTED"]);

/**
* Collapse a `statusCheckRollup` array into a single {@link CiStatus}. Any
* failing check ⇒ `failing` (a red build can't be reviewed); else any still-
* running check ⇒ `pending`; else `passing`. An empty/absent rollup ⇒ `none`
* (no checks configured — nothing to gate on). Pure, so it's unit-tested directly.
*/
export function deriveCiStatus(rollup: CheckRollupEntry[] | null | undefined): CiStatus {
if (!rollup || rollup.length === 0) return "none";
let pending = false;
for (const c of rollup) {
if (c.status !== undefined && c.status !== "COMPLETED") {
pending = true; // a CheckRun that hasn't finished
continue;
}
if (c.conclusion !== undefined) {
if (!CONCLUSION_OK.has(c.conclusion)) return "failing";
continue;
}
// StatusContext (no status/conclusion) — read `state`.
if (c.state !== undefined) {
if (c.state === "PENDING") pending = true;
else if (!STATE_OK.has(c.state)) return "failing"; // FAILURE / ERROR
}
}
return pending ? "pending" : "passing";
}

/**
* The production {@link GitHubPollGateway} — reads issue comments and PR review
* state through the `gh` CLI. The poller's logic is unit-tested against an
Expand Down Expand Up @@ -88,11 +130,12 @@ export const ghPollGateway: GitHubPollGateway = {
"--repo",
repo,
"--json",
"reviewDecision,labels",
"reviewDecision,labels,statusCheckRollup",
]);
const view = JSON.parse(viewOut) as {
reviewDecision: string | null;
labels: Array<{ name: string }>;
statusCheckRollup: CheckRollupEntry[] | null;
};

const reviewsOut = await gh([
Expand Down Expand Up @@ -125,6 +168,7 @@ export const ghPollGateway: GitHubPollGateway = {
reviewDecision: view.reviewDecision ?? null,
reviews,
labels: view.labels.map((l) => l.name),
ci: deriveCiStatus(view.statusCheckRollup),
};
},

Expand Down
53 changes: 47 additions & 6 deletions packages/dispatcher/src/poller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,28 @@ export type PrReview = {
body: string;
};

/**
* A PR's CI standing, collapsed from `statusCheckRollup`:
* - `passing` — every required check succeeded (or was neutral/skipped).
* - `failing` — at least one check failed/errored/was cancelled.
* - `pending` — nothing failed yet, but a check is still running/queued.
* - `none` — no checks are configured on the PR (nothing to gate on).
*/
export type CiStatus = "passing" | "failing" | "pending" | "none";

/** A PR's review-relevant snapshot. */
export type PrSnapshot = {
number: number;
reviewDecision: string | null; // 'APPROVED' | 'CHANGES_REQUESTED' | 'REVIEW_REQUIRED' | null
reviews: PrReview[];
labels: string[];
/** CI rollup. Absent (legacy/test fixtures) is treated as `none` — non-blocking. */
ci?: CiStatus;
};

/** The synthetic `decision` a CI-failure resume carries, distinct from a review `CHANGES_REQUESTED`. */
export const CI_FAILED_DECISION = "CI_FAILED";

/** GitHub's remaining REST budget and when it resets (epoch ms). */
export type RateLimitStatus = { remaining: number; resetAt: number };

Expand Down Expand Up @@ -159,17 +173,44 @@ export function classifyNewHumanReply(
return fresh[0] ?? null;
}

/** The resume verdict a classification resolves to (or null = nothing actionable). */
type ReviewVerdict = { outcome: ReviewOutcome; reviewId: number | null; decision: string | null };

/**
* Classify the PR's review state into a resume verdict, or null when nothing
* actionable has changed since the wait armed. The newest review submitted this
* Classify the PR into a resume verdict, **CI-gated**: a PR isn't reviewable
* until it builds, so CI standing participates in the verdict alongside reviews.
*
* Precedence:
* 1. **Explicit review feedback wins** — a `CHANGES_REQUESTED` (review or label)
* resumes for the feedback; addressing it should also green CI, so don't
* pre-empt it with a CI nudge.
* 2. **Red CI is its own resume trigger** — failing checks with no outstanding
* review feedback resume the agent to fix CI ({@link CI_FAILED_DECISION}).
* 3. **A resolve is gated on green** — an `APPROVED`/0-actionable verdict while
* checks are still `pending` is held (null) until they finish, so the loop
* never ends on a PR whose CI hasn't reported. `passing`/`none` resolves.
*
* Absent CI (`undefined` → `none`) is non-blocking, so the pre-CI review loop is
* unchanged.
*/
export function classifyReviewOutcome(snapshot: PrSnapshot, sinceMs: number): ReviewVerdict | null {
const review = classifyReviewVerdict(snapshot, sinceMs);
const ci = snapshot.ci ?? "none";
if (review?.outcome === "changes-requested") return review;
if (ci === "failing") {
return { outcome: "changes-requested", reviewId: null, decision: CI_FAILED_DECISION };
}
if (review?.outcome === "resolved") return ci === "pending" ? null : review;
return review;
}

/**
* The review-only verdict (no CI gating) — the newest review submitted this
* round is authoritative; a 0-actionable re-review counts as **resolved** even
* while the PR's `reviewDecision` still reads `CHANGES_REQUESTED`. Falls back to
* the standing decision / `changes-requested` label when no fresh review exists.
*/
export function classifyReviewOutcome(
snapshot: PrSnapshot,
sinceMs: number,
): { outcome: ReviewOutcome; reviewId: number | null; decision: string | null } | null {
function classifyReviewVerdict(snapshot: PrSnapshot, sinceMs: number): ReviewVerdict | null {
const fresh = snapshot.reviews
.filter((r) => r.submittedAt > sinceMs)
.sort((a, b) => b.submittedAt - a.submittedAt);
Expand Down
30 changes: 29 additions & 1 deletion packages/dispatcher/src/workflows/implementation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { Workflow } from "bunqueue/workflow";
import type { StepContext } from "bunqueue/workflow";
import { type PlanCommentReader, verifyPlanComment } from "../gates/plan-comment.ts";
import type { SessionGate } from "../hook-server.ts";
import type { ResumeSignalPayload } from "../poller.ts";
import { CI_FAILED_DECISION, type ResumeSignalPayload } from "../poller.ts";
import { markAvailableOnSuccess, parseResetAt, setRateLimited } from "../rate-limits.ts";
import type { CreateWorktreeOpts, WorktreeHandle } from "../worktree.ts";
import {
Expand Down Expand Up @@ -326,6 +326,34 @@ ${operatingRules}`,

// review-changes
const decision = resume.payload.reason === "review-changes" ? resume.payload.decision : null;

// CI-failure resume (#CI gate): a red build, not review feedback. The agent
// pulls the failing checks itself and fixes them — a distinct brief from the
// address-review one (there are no review threads to work, just broken CI).
if (decision === CI_FAILED_DECISION) {
writeFileSync(
promptPath,
`# middle dispatch brief — Epic #${epicNumber} (resumed: CI is failing — round ${resume.round} of ${reviewRoundCap})

The PR's CI is **red** — a PR can't be reviewed until it builds. Investigate and
fix the failing checks now:

1. Pull the failing checks yourself: \`gh pr checks\` for the rollup, then
\`gh run view <run-id> --log-failed\` (or the check's details URL) to read the
actual failure — don't guess from the check name.
2. Reproduce locally where you can (run the failing gate/test), fix the **cause**,
and add a regression test if the failure was a real defect, not flake.
3. **Push once** for the whole fix, then stop. The workflow re-parks and re-checks
CI on the next poll; a green build (plus review) is what ends the loop.

This is round ${resume.round} of ${reviewRoundCap}. After ${reviewRoundCap} rounds without resolution the
workflow parks for a human and stops auto-resuming.

${operatingRules}`,
);
return;
}

writeFileSync(
promptPath,
`# middle dispatch brief — Epic #${epicNumber} (resumed: address review — round ${resume.round} of ${reviewRoundCap})
Expand Down
30 changes: 30 additions & 0 deletions packages/dispatcher/test/implementation-workflow.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,12 @@ const APPROVED = {
reviewId: 2,
decision: "APPROVED",
};
const CI_FAILED = {
reason: "review-changes" as const,
outcome: "changes-requested" as const,
reviewId: null,
decision: "CI_FAILED",
};

/** No session leak: every tmux session that was created was also killed. */
function expectNoSessionLeak(tmux: { created: string[]; killed: string[] }): void {
Expand Down Expand Up @@ -582,6 +588,30 @@ describe("implementation workflow — done park → review-changes → resume (e
expectNoSessionLeak(tmux);
});

test("a CI_FAILED verdict resumes a continuation with the fix-CI brief (not the address-review one)", async () => {
const prompts: string[] = [];
const adapter = makeAdapterStub({ kind: "done" }, prompts);
const { deps, continuationIds } = withContinuations({ getAdapter: () => adapter });
const id0 = await start(deps);
await awaitParked(id0);

// Red CI (no review feedback) resumes the agent to fix the build.
await engine.signal(id0, RESUME_EVENT, CI_FAILED);
expect(await awaitSettled(id0)).toBe("completed");

const id1 = await awaitContinuation(continuationIds, 0);
await awaitParked(id1);
const brief = readPromptBrief(id1);
expect(brief).toContain("CI is failing — round 1 of 5");
expect(brief).toContain("gh pr checks");
expect(brief).toContain("Push once");
// It is NOT the address-review brief — there are no review threads to work.
expect(brief).not.toContain("Addressing review feedback");

await engine.signal(id1, RESUME_EVENT, APPROVED);
expect(await awaitSettled(id1)).toBe("completed");
});

test("a resolved review reverts a previously RATE_LIMITED adapter to AVAILABLE", async () => {
setRateLimited(db, {
adapter: "stub",
Expand Down
41 changes: 41 additions & 0 deletions packages/dispatcher/test/poller-gateway.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { describe, expect, test } from "bun:test";
import { type CheckRollupEntry, deriveCiStatus } from "../src/poller-gateway.ts";

/** A finished GitHub Actions check run. */
function run(conclusion: string): CheckRollupEntry {
return { status: "COMPLETED", conclusion };
}

describe("deriveCiStatus", () => {
test("no checks configured → none (nothing to gate on)", () => {
expect(deriveCiStatus(null)).toBe("none");
expect(deriveCiStatus([])).toBe("none");
});

test("all check runs succeeded (incl. neutral/skipped) → passing", () => {
expect(deriveCiStatus([run("SUCCESS"), run("NEUTRAL"), run("SKIPPED")])).toBe("passing");
});

test("any failed/errored/cancelled/timed-out check → failing", () => {
expect(deriveCiStatus([run("SUCCESS"), run("FAILURE")])).toBe("failing");
expect(deriveCiStatus([run("TIMED_OUT")])).toBe("failing");
expect(deriveCiStatus([run("CANCELLED")])).toBe("failing");
expect(deriveCiStatus([run("ACTION_REQUIRED")])).toBe("failing");
});

test("an unfinished check run (not COMPLETED) → pending", () => {
expect(deriveCiStatus([run("SUCCESS"), { status: "IN_PROGRESS" }])).toBe("pending");
expect(deriveCiStatus([{ status: "QUEUED" }])).toBe("pending");
});

test("a failure outranks a still-running check → failing", () => {
expect(deriveCiStatus([{ status: "IN_PROGRESS" }, run("FAILURE")])).toBe("failing");
});

test("legacy StatusContext entries (state) are read too", () => {
expect(deriveCiStatus([{ state: "SUCCESS" }])).toBe("passing");
expect(deriveCiStatus([{ state: "PENDING" }])).toBe("pending");
expect(deriveCiStatus([{ state: "FAILURE" }])).toBe("failing");
expect(deriveCiStatus([{ state: "ERROR" }])).toBe("failing");
});
});
45 changes: 45 additions & 0 deletions packages/dispatcher/test/poller.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,51 @@ describe("classifyReviewOutcome", () => {
});
});

describe("classifyReviewOutcome — CI gate", () => {
test("failing CI with no review feedback → resume to fix CI (CI_FAILED)", () => {
const v = classifyReviewOutcome(prSnapshot({ ci: "failing" }), ARMED_AT);
expect(v).toEqual({ outcome: "changes-requested", reviewId: null, decision: "CI_FAILED" });
});

test("an APPROVED review while CI is still pending is held (null) — don't end on un-built CI", () => {
const v = classifyReviewOutcome(
prSnapshot({ reviewDecision: "APPROVED", ci: "pending" }),
ARMED_AT,
);
expect(v).toBeNull();
});

test("an APPROVED review with passing CI resolves", () => {
const v = classifyReviewOutcome(
prSnapshot({ reviewDecision: "APPROVED", ci: "passing" }),
ARMED_AT,
);
expect(v).toEqual({ outcome: "resolved", reviewId: null, decision: "APPROVED" });
});

test("explicit review feedback wins over red CI (address the review, which greens CI)", () => {
const v = classifyReviewOutcome(
prSnapshot({ labels: ["changes-requested"], ci: "failing" }),
ARMED_AT,
);
expect(v).toEqual({
outcome: "changes-requested",
reviewId: null,
decision: "CHANGES_REQUESTED",
});
});

test("absent CI (`none`) is non-blocking — the pre-CI review loop is unchanged", () => {
const v = classifyReviewOutcome(prSnapshot({ reviewDecision: "APPROVED" }), ARMED_AT);
expect(v).toEqual({ outcome: "resolved", reviewId: null, decision: "APPROVED" });
});

test("failing CI but no PR change and no review → still CI_FAILED (red build is actionable)", () => {
const v = classifyReviewOutcome(prSnapshot({ ci: "failing", reviewDecision: null }), ARMED_AT);
expect(v?.decision).toBe("CI_FAILED");
});
});

describe("runPoller — answered-question", () => {
test("a new human reply fires epic-<n>-answered exactly once (idempotent across passes)", async () => {
const id = seedParked("answered-question");
Expand Down