diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index 78aa32f7..d9b46470 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -57,6 +57,13 @@ export type RecommenderSettings = { intervalMinutes: number; adapter: string; autoDispatch: boolean; + /** + * Hard cap on the recommender agent run, in milliseconds (from + * `agent_timeout_minutes`). Undefined when unset — the workflow then applies + * its own default. Operators bump this for repos large enough that ranking + + * rewriting the state issue doesn't finish inside the default window. + */ + agentTimeoutMs?: number; }; export type StateIssueSettings = { @@ -236,6 +243,8 @@ function mapRecommender(raw: RawTable): RecommenderSettings | undefined { intervalMinutes: r.interval_minutes as number, adapter: r.adapter as string, autoDispatch: r.auto_dispatch as boolean, + agentTimeoutMs: + typeof r.agent_timeout_minutes === "number" ? r.agent_timeout_minutes * 60_000 : undefined, }; } diff --git a/packages/core/test/config.test.ts b/packages/core/test/config.test.ts index 36b9103d..ef6e2cf6 100644 --- a/packages/core/test/config.test.ts +++ b/packages/core/test/config.test.ts @@ -64,6 +64,7 @@ enabled = true interval_minutes = 15 adapter = "claude" auto_dispatch = false +agent_timeout_minutes = 20 [state_issue] number = 142 @@ -165,6 +166,7 @@ describe("loadConfig — per-repo merge", () => { expect(config.limits!.complexityCeiling).toBe(3); expect(config.recommender!.intervalMinutes).toBe(15); expect(config.recommender!.autoDispatch).toBe(false); + expect(config.recommender!.agentTimeoutMs).toBe(20 * 60_000); expect(config.stateIssue!.number).toBe(142); expect(config.bootstrap!.version).toBe(1); }); diff --git a/packages/dispatcher/src/poller-cron.ts b/packages/dispatcher/src/poller-cron.ts index 4c3b7dd7..5d802fbc 100644 --- a/packages/dispatcher/src/poller-cron.ts +++ b/packages/dispatcher/src/poller-cron.ts @@ -2,21 +2,27 @@ import { Bunqueue } from "bunqueue/client"; import { runPoller, type PollerDeps } from "./poller.ts"; /** - * How often the poller checks GitHub for resume triggers. Slower than the - * watchdog (30s) — a human reply or a review verdict is not latency-sensitive, - * and a gentler cadence is kinder to GitHub rate limits. + * Default cadence for the GitHub poller. Much slower than the watchdog (30s) — + * a human reply or a review verdict is not latency-sensitive, and a gentler + * cadence is kinder to GitHub rate limits. The poller spends ~1 `gh` call per + * parked workflow per tick and has no backoff yet (see #122), so a conservative + * default keeps a many-parked-workflow / multi-repo deployment well clear of + * the 5000/hr ceiling and of secondary (burst) limits. Override via `startPoller`. */ -export const POLLER_INTERVAL_MS = 60_000; +export const POLLER_INTERVAL_MS = 120_000; /** - * Stand up the GitHub poller as a bunqueue cron: every {@link POLLER_INTERVAL_MS} - * it runs one {@link runPoller} pass over parked workflows with an armed wait, - * firing the resume signal when the unblocking event appears. Returns a stop - * function that tears the cron down. The pass is resilient on its own (per- - * workflow failures are isolated); this wrapper guards the whole pass too so a - * thrown pass never crashes the cron worker. + * Stand up the GitHub poller as a bunqueue cron: every `intervalMs` (default + * {@link POLLER_INTERVAL_MS}) it runs one {@link runPoller} pass over parked + * workflows with an armed wait, firing the resume signal when the unblocking + * event appears. Returns a stop function that tears the cron down. The pass is + * resilient on its own (per-workflow failures are isolated); this wrapper guards + * the whole pass too so a thrown pass never crashes the cron worker. */ -export async function startPoller(deps: PollerDeps): Promise<() => Promise> { +export async function startPoller( + deps: PollerDeps, + intervalMs: number = POLLER_INTERVAL_MS, +): Promise<() => Promise> { const queue = new Bunqueue("middle-poller", { embedded: true, processor: async () => { @@ -27,7 +33,7 @@ export async function startPoller(deps: PollerDeps): Promise<() => Promise } }, }); - await queue.every("poller-tick", POLLER_INTERVAL_MS); + await queue.every("poller-tick", intervalMs); return async () => { await queue.close(true); }; diff --git a/packages/dispatcher/src/recommender-run.ts b/packages/dispatcher/src/recommender-run.ts index 267170c8..1a40567e 100644 --- a/packages/dispatcher/src/recommender-run.ts +++ b/packages/dispatcher/src/recommender-run.ts @@ -59,6 +59,8 @@ export type DispatchRecommenderOptions = { slots: RecommenderSlotLimits; /** The `config` block reported to the recommender. */ runConfig: RecommenderRunConfig; + /** Hard cap on the agent run (from `[recommender] agent_timeout_minutes`); undefined → workflow default. */ + agentTimeoutMs?: number; /** Test seams; production passes none. */ overrides?: RecommenderRunOverrides; }; @@ -141,6 +143,7 @@ export async function resolveRecommenderOptions( autoDispatch: config.recommender?.autoDispatch ?? false, prMode: config.repo?.prMode ?? "worktree", }, + agentTimeoutMs: config.recommender?.agentTimeoutMs, }, }; } @@ -255,6 +258,7 @@ export async function dispatchRecommender( stateIssue: ov.stateIssue ?? ghStateIssueGateway, repoConfig: { adapters: opts.slots.adapters }, config: opts.runConfig, + agentTimeoutMs: opts.agentTimeoutMs, gatherContext, surfaceProblem: ov.surfaceProblem ?? ghSurfaceProblem, // Phase 7 read-only: triggerAutoDispatch intentionally UNWIRED. diff --git a/packages/dispatcher/src/workflows/recommender.ts b/packages/dispatcher/src/workflows/recommender.ts index b335adc9..3f4788c8 100644 --- a/packages/dispatcher/src/workflows/recommender.ts +++ b/packages/dispatcher/src/workflows/recommender.ts @@ -109,7 +109,11 @@ export type RecommenderDeps = { }; const DEFAULT_LAUNCH_TIMEOUT_MS = 90_000; -const DEFAULT_AGENT_TIMEOUT_MS = 5 * 60 * 1000; // the spec's 5-minute hard cap +// 15-minute hard cap. The spec's original 5 minutes proved too tight against a +// real repo — the agent ran the full window ranking ~4 epics + ~15 issues and +// rewriting the schema-strict state issue without finishing. Operators tune it +// per repo via `[recommender] agent_timeout_minutes`. +const DEFAULT_AGENT_TIMEOUT_MS = 15 * 60 * 1000; /** * Deterministic, repo-namespaced session name for the recommender's dedicated diff --git a/packages/dispatcher/test/recommender-workflow.test.ts b/packages/dispatcher/test/recommender-workflow.test.ts index f7f40552..24b62a78 100644 --- a/packages/dispatcher/test/recommender-workflow.test.ts +++ b/packages/dispatcher/test/recommender-workflow.test.ts @@ -275,16 +275,17 @@ describe("recommender workflow — #43 shell: step order + dedicated slot", () = expect(countActiveImplementationSlots(db)).toEqual({ total: 0, perAdapter: {} }); }); - test("spawn-recommender-agent has the spec's 5-minute hard cap", () => { + test("spawn-recommender-agent uses the default 15-minute hard cap", () => { // The step `timeout` is the hard cap; assert it via the built workflow's - // step config rather than wall-clock. Defaults: 90s launch + 5min agent. + // step config rather than wall-clock. Defaults: 90s launch + 15min agent + // (bumped from 5min, which was too tight against a real repo). const h = makeHarness(); delete (h.deps as { agentTimeoutMs?: number }).agentTimeoutMs; delete (h.deps as { launchTimeoutMs?: number }).launchTimeoutMs; const def = stepDef(h.deps, "spawn-recommender-agent"); expect(def).toBeDefined(); - // launch (90s) + agent (5min) + 30s backstop, per the factory. - expect(def!.timeout).toBe(90_000 + 5 * 60 * 1000 + 30_000); + // launch (90s) + agent (15min) + 30s backstop, per the factory. + expect(def!.timeout).toBe(90_000 + 15 * 60 * 1000 + 30_000); }); test("prepare-shallow-worktree registers a compensation handler", () => {