Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 74 additions & 59 deletions tools/bg/standing-by-detector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ type FakeNudgeCall = {
function fakeAdapters(
nowIso: string,
lastCommitIso: string | null,
lastPrActivityIso: string | null = null,
capturedCalls: FakeNudgeCall[] = [],
publishImpl?: (from: SenderAgentId, to: AgentId, idleMinutes: number, rationale: string) => MessageEnvelope,
): Adapters {
return {
now: () => new Date(nowIso),
lastCommitIso: () => lastCommitIso,
publishNudge: (from, to, idleMinutes, rationale): MessageEnvelope => {
lastPrActivityIso: () => lastPrActivityIso,
publishNudge: publishImpl ?? ((from, to, idleMinutes, rationale): MessageEnvelope => {
capturedCalls.push({ from, to, idleMinutes, rationale });
return {
id: "test-envelope-id",
Expand All @@ -34,100 +37,123 @@ function fakeAdapters(
topic: "infinite-backlog-nudge",
payload: { idleMinutes, rationale },
};
},
}),
};
}

describe("standing-by-detector slice 4", () => {
describe("standing-by-detector slice 3 (P0 fix: PR-activity poll)", () => {
test("default config has sensible thresholds + bus defaults", () => {
expect(DEFAULT_CONFIG.pollIntervalMin).toBe(5);
expect(DEFAULT_CONFIG.idleThresholdMin).toBe(15);
expect(DEFAULT_CONFIG.once).toBe(false);
expect(DEFAULT_CONFIG.noPublish).toBe(false);
expect(DEFAULT_CONFIG.fromAgent).toBe("otto");
expect(DEFAULT_CONFIG.toAgent).toBe("*");
});

describe("pollOnce with injected adapters — detection", () => {
test("flags idle when last commit is older than threshold", () => {
describe("P0 fix — idle uses MAX(commit, pr) so PR-only agents are NOT flagged", () => {
test("recent COMMIT only (no PR activity) → NOT idle", () => {
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15, noPublish: true },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:40:00Z"),
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:58:00Z", null),
);
expect(result.idleDetected).toBe(true);
expect(result.idleMinutes).toBe(20);
expect(result.note).toContain("Standing-by candidate");
expect(result.idleDetected).toBe(false);
expect(result.idleMinutes).toBe(2);
expect(result.lastCommitAt).toBe("2026-05-13T17:58:00.000Z");
expect(result.lastPrActivityAt).toBeNull();
});

test("does NOT flag idle when last commit is recent", () => {
test("recent PR activity only (no commit) → NOT idle (was Riven's P0 false-negative)", () => {
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15 },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:55:00Z"),
{ ...DEFAULT_CONFIG, idleThresholdMin: 15, noPublish: true },
fakeAdapters("2026-05-13T18:00:00Z", null, "2026-05-13T17:58:00Z"),
);
expect(result.idleDetected).toBe(false);
expect(result.idleMinutes).toBe(2);
expect(result.lastCommitAt).toBeNull();
expect(result.lastPrActivityAt).toBe("2026-05-13T17:58:00.000Z");
});

test("OLD commit + recent PR activity → NOT idle (PR work counts)", () => {
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15, noPublish: true },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:00:00Z", "2026-05-13T17:55:00Z"),
);
expect(result.idleDetected).toBe(false);
expect(result.idleMinutes).toBe(5);
});

test("recent commit + OLD PR activity → NOT idle (commit work counts)", () => {
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15, noPublish: true },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:55:00Z", "2026-05-13T17:00:00Z"),
);
expect(result.idleDetected).toBe(false);
expect(result.idleMinutes).toBe(5);
expect(result.publishedEnvelopeId).toBeNull();
});

test("handles null lastCommit gracefully (no publish)", () => {
test("BOTH old → idle flagged", () => {
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15, noPublish: true },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:30:00Z", "2026-05-13T17:35:00Z"),
);
expect(result.idleDetected).toBe(true);
expect(result.idleMinutes).toBe(25);
});

test("BOTH null → no detection (no false positive)", () => {
const result = pollOnce(
DEFAULT_CONFIG,
fakeAdapters("2026-05-13T18:00:00Z", null),
fakeAdapters("2026-05-13T18:00:00Z", null, null),
);
expect(result.idleDetected).toBe(false);
expect(result.publishedEnvelopeId).toBeNull();
expect(result.note).toContain("no commit found");
expect(result.lastCommitAt).toBeNull();
expect(result.lastPrActivityAt).toBeNull();
expect(result.note).toContain("no commit AND no PR activity");
});
});

describe("pollOnce with injected adapters — bus publish", () => {
test("publishes nudge envelope when idle detected", () => {
describe("bus publish — slice 4 behavior preserved", () => {
test("publishes nudge when idle detected", () => {
const captured: FakeNudgeCall[] = [];
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15 },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:40:00Z", captured),
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:30:00Z", "2026-05-13T17:35:00Z", captured),
);
expect(result.publishedEnvelopeId).toBe("test-envelope-id");
expect(result.lastPublishError).toBeNull();
expect(result.note).toContain("nudge published");
expect(captured).toHaveLength(1);
expect(captured[0]!.from).toBe("otto");
expect(captured[0]!.to).toBe("*");
expect(captured[0]!.idleMinutes).toBe(20);
expect(captured[0]!.rationale).toContain("Standing-by detected");
expect(captured[0]!.rationale).toContain("infinite-backlog metabolism");
expect(captured[0]!.rationale).toContain("commit or PR");
});

test("does NOT publish when noPublish is true", () => {
test("does NOT publish when noPublish=true", () => {
const captured: FakeNudgeCall[] = [];
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15, noPublish: true },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:40:00Z", captured),
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:30:00Z", null, captured),
);
expect(result.idleDetected).toBe(true);
expect(result.publishedEnvelopeId).toBeNull();
expect(captured).toHaveLength(0);
expect(result.note).toContain("publish skipped");
});

test("does NOT publish when not idle", () => {
test("P1 fix — publish failure surfaces in structured lastPublishError field", () => {
const captured: FakeNudgeCall[] = [];
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15 },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:55:00Z", captured),
);
expect(result.idleDetected).toBe(false);
expect(captured).toHaveLength(0);
});

test("respects --agent and --to flags for publish identity", () => {
const captured: FakeNudgeCall[] = [];
const result = pollOnce(
{ ...DEFAULT_CONFIG, idleThresholdMin: 15, fromAgent: "vera", toAgent: "lior" },
fakeAdapters("2026-05-13T18:00:00Z", "2026-05-13T17:40:00Z", captured),
fakeAdapters(
"2026-05-13T18:00:00Z",
"2026-05-13T17:30:00Z",
null,
captured,
() => { throw new Error("bus IO failure: disk full"); },
),
);
expect(result.idleDetected).toBe(true);
expect(captured[0]!.from).toBe("vera");
expect(captured[0]!.to).toBe("lior");
expect(result.publishedEnvelopeId).toBeNull();
expect(result.lastPublishError).toBe("bus IO failure: disk full");
expect(result.note).toContain("publish failed");
});
});

Expand All @@ -148,30 +174,19 @@ describe("standing-by-detector slice 4", () => {
expect(parseArgs([])).toEqual(DEFAULT_CONFIG);
});

test("--once flag", () => {
expect(parseArgs(["--once"]).once).toBe(true);
});

test("--no-publish flag", () => {
expect(parseArgs(["--no-publish"]).noPublish).toBe(true);
});

test("--agent + --to flags", () => {
const config = parseArgs(["--agent", "vera", "--to", "lior"]);
test("--once / --no-publish / --agent / --to flags", () => {
const config = parseArgs(["--once", "--no-publish", "--agent", "vera", "--to", "lior"]);
expect(config.once).toBe(true);
expect(config.noPublish).toBe(true);
expect(config.fromAgent).toBe("vera");
expect(config.toAgent).toBe("lior");
});

test("rejects invalid --agent values", () => {
expect(() => parseArgs(["--agent", "invalid"])).toThrow(/must be one of/);
test("rejects invalid --agent (no broadcast as sender)", () => {
expect(() => parseArgs(["--agent", "*"])).toThrow(/must be one of/);
});

test("rejects invalid --to values", () => {
expect(() => parseArgs(["--to", "invalid"])).toThrow(/must be one of/);
});

test("rejects unknown flags fail-fast", () => {
test("rejects unknown flags", () => {
expect(() => parseArgs(["--unknown"])).toThrow(/unknown flag/);
});
});
Expand Down
92 changes: 75 additions & 17 deletions tools/bg/standing-by-detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,32 @@ export const DEFAULT_CONFIG: DetectorConfig = {
export type PollResult = {
pollAt: string; // ISO-8601
idleDetected: boolean;
/** Most-recent commit on HEAD; null if no commit or git unavailable. */
lastCommitAt: string | null;
/** Most-recent PR activity (opened OR closed OR reviewed) authored by `agentForActivity`; null if no PRs or gh unavailable. */
lastPrActivityAt: string | null;
/** Idle gap in minutes — computed from MAX of (lastCommitAt, lastPrActivityAt). */
idleMinutes: number | null;
/** Envelope ID if a nudge was published, null otherwise. */
publishedEnvelopeId: string | null;
/** Structured publish-failure reason; null on success or skip. (Riven P1) */
lastPublishError: string | null;
note: string;
};

/** Adapter abstraction so tests can inject deterministic time + git + bus. */
/** Adapter abstraction so tests can inject deterministic time + git + gh + bus. */
export type Adapters = {
now: () => Date;
/** ISO-8601 of the most recent commit on HEAD, or null. */
lastCommitIso: () => string | null;
/**
* ISO-8601 of the most-recently-updated PR in the repo (any author, any
* state). Per B-0440 AC: "no PRs opened/closed" is repo-level, not
* author-filtered — multiple factory agents share the AceHack account
* so author-filtering would miss most activity. Returns null when gh
* is unavailable OR there are no PRs.
*/
lastPrActivityIso: () => string | null;
publishNudge: (
from: SenderAgentId,
to: AgentId,
Expand All @@ -75,6 +90,32 @@ const REAL_ADAPTERS: Adapters = {
const trimmed = result.stdout.trim();
return trimmed.length > 0 ? trimmed : null;
},
lastPrActivityIso: () => {
// gh pr list --state all --json updatedAt --limit 1
// Repo-level: any PR activity counts (factory agents share AceHack account).
// eslint-disable-next-line sonarjs/no-os-command-from-path -- gh invoked as explicit args array; no shell, no injection risk.
const result = spawnSync(
"gh",
[
"pr", "list",
"--state", "all",
"--json", "updatedAt",
"--limit", "1",
Comment on lines +100 to +103
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Query PR activity by updated timestamp, not creation order

Using gh pr list --state all --json updatedAt --limit 1 does not guarantee you get the most recently updated PR; gh pr list is ordered by PR creation time (GitHub CLI’s PullRequestList query uses orderBy: {field: CREATED_AT, direction: DESC}), so activity on older PRs can be missed. In a repo where agents review/comment on an older PR while no new PRs are created, lastPrActivityIso can return a stale updatedAt, causing false idle detection and incorrect nudges.

Useful? React with 👍 / 👎.

],
{ encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] },
);
if (result.status !== 0 || !result.stdout) return null;
try {
const parsed = JSON.parse(result.stdout);
if (!Array.isArray(parsed) || parsed.length === 0) return null;
const first = parsed[0];
if (typeof first !== "object" || first === null) return null;
const updatedAt = (first as { updatedAt?: unknown }).updatedAt;
return typeof updatedAt === "string" ? updatedAt : null;
} catch {
return null;
}
},
publishNudge: (from, to, idleMinutes, rationale) =>
publish(from, to, {
topic: "infinite-backlog-nudge",
Expand All @@ -83,64 +124,81 @@ const REAL_ADAPTERS: Adapters = {
};

/**
* Single poll iteration. Reads the most recent commit on HEAD, compares
* against the idle threshold, and publishes a bus nudge when idle is
* detected (unless noPublish is set).
* Single poll iteration. Per B-0440 AC: idle iff NO new commits AND
* NO PR activity within idleThresholdMin. The detector reads both signals
* and computes idle from MAX (most-recent) of the two timestamps. Either
* signal being recent means the agent is NOT standing by — even an
* agent doing PR-review-only / bus-coordination / claim-work with no
* commits is correctly NOT flagged.
*
* Resolves Riven's P0 finding (envelope 6c689634-...): the prior slice-2
* version only checked commit-history and produced false negatives on
* non-commit agent activity.
*/
export function pollOnce(
config: DetectorConfig,
adapters: Adapters = REAL_ADAPTERS,
): PollResult {
const pollAt = adapters.now();
const lastCommitIso = adapters.lastCommitIso();
const lastPrActivityIso = adapters.lastPrActivityIso();

if (lastCommitIso === null) {
// If BOTH signals are null we have no data to evaluate idle threshold.
if (lastCommitIso === null && lastPrActivityIso === null) {
return {
pollAt: pollAt.toISOString(),
idleDetected: false,
lastCommitAt: null,
lastPrActivityAt: null,
idleMinutes: null,
publishedEnvelopeId: null,
note: "no commit found on HEAD (fresh repo or git unavailable); cannot evaluate idle threshold",
lastPublishError: null,
note: "no commit AND no PR activity found (fresh repo / git or gh unavailable); cannot evaluate idle threshold",
};
}

const lastCommit = new Date(lastCommitIso);
const idleMs = pollAt.getTime() - lastCommit.getTime();
// Compute idle from MAX of available signals (most-recent activity).
const commitMs = lastCommitIso !== null ? new Date(lastCommitIso).getTime() : 0;
const prMs = lastPrActivityIso !== null ? new Date(lastPrActivityIso).getTime() : 0;
const mostRecentMs = Math.max(commitMs, prMs);
const idleMs = pollAt.getTime() - mostRecentMs;
const idleMinutes = Math.max(0, idleMs / 60_000);
const idleDetected = idleMinutes >= config.idleThresholdMin;

let publishedEnvelopeId: string | null = null;
let publishError: string | null = null;
let lastPublishError: string | null = null;
if (idleDetected && !config.noPublish) {
const rationale = `Standing-by detected: ${idleMinutes.toFixed(1)}min since last commit on HEAD (threshold ${config.idleThresholdMin}min). Pick decomposition work per infinite-backlog metabolism.`;
const rationale = `Standing-by detected: ${idleMinutes.toFixed(1)}min since last activity (commit or PR; threshold ${config.idleThresholdMin}min). Pick decomposition work per infinite-backlog metabolism.`;
try {
const envelope = adapters.publishNudge(config.fromAgent, config.toAgent, idleMinutes, rationale);
publishedEnvelopeId = envelope.id;
} catch (e) {
// Bus publish failure must NOT kill the poll loop; the daemon needs to
// survive transient bus IO errors. Capture the reason in the result.
publishError = e instanceof Error ? e.message : String(e);
// Bus publish failure must NOT kill the poll loop; the daemon needs
// to survive transient bus IO. Captured both in lastPublishError
// (structured; machine-readable per Riven's P1 finding) and in note.
lastPublishError = e instanceof Error ? e.message : String(e);
}
}

return {
pollAt: pollAt.toISOString(),
idleDetected,
lastCommitAt: lastCommit.toISOString(),
lastCommitAt: lastCommitIso !== null ? new Date(lastCommitIso).toISOString() : null,
lastPrActivityAt: lastPrActivityIso !== null ? new Date(lastPrActivityIso).toISOString() : null,
idleMinutes,
publishedEnvelopeId,
lastPublishError,
note: idleDetected
? `idle ${idleMinutes.toFixed(1)}min >= threshold ${config.idleThresholdMin}min — Standing-by candidate${
publishError
? ` (publish failed: ${publishError})`
lastPublishError
? ` (publish failed: ${lastPublishError})`
: publishedEnvelopeId
? ` (nudge published; envelope=${publishedEnvelopeId})`
: config.noPublish
? " (publish skipped per --no-publish)"
: ""
}`
: `last commit ${idleMinutes.toFixed(1)}min ago; under threshold ${config.idleThresholdMin}min`,
: `last activity ${idleMinutes.toFixed(1)}min ago (commit=${lastCommitIso ?? "n/a"}, pr=${lastPrActivityIso ?? "n/a"}); under threshold ${config.idleThresholdMin}min`,
};
}

Expand Down
Loading