diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js index ae5403a87..478c6a12b 100644 --- a/.github/scripts/keepalive_loop.js +++ b/.github/scripts/keepalive_loop.js @@ -2967,6 +2967,9 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in // tasks off. Re-derive the counter here with the authoritative counts. // When force_retry is active, honour the evaluate-step's reset to 0 and // do not overwrite it — the human explicitly wants a fresh start. + // After a review action, reset to 0 so the next evaluate triggers a run + // instead of another review (the review already provided course-correction + // feedback — the agent needs a chance to act on it). if (isForceRetry) { if (roundsWithoutTaskCompletion !== 0) { core?.info?.( @@ -2975,6 +2978,12 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in ); roundsWithoutTaskCompletion = 0; } + } else if (action === 'review') { + core?.info?.( + `[summary] review action completed — resetting rounds_without_task_completion ` + + `from ${roundsWithoutTaskCompletion} to 0 so next iteration runs the agent`, + ); + roundsWithoutTaskCompletion = 0; } else { const prevTasks = previousState?.tasks || {}; const prevUncheckedForCounter = toNumber(prevTasks.unchecked, tasksUnchecked); diff --git a/.github/workflows/agents-auto-pilot.yml b/.github/workflows/agents-auto-pilot.yml index f39f35518..86218a2e6 100644 --- a/.github/workflows/agents-auto-pilot.yml +++ b/.github/workflows/agents-auto-pilot.yml @@ -1916,23 +1916,90 @@ jobs: return; } - // Force-dispatch Codex belt dispatcher to create the branch - try { - await withRetry((client) => client.rest.actions.createWorkflowDispatch({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'agents-71-codex-belt-dispatcher.yml', - ref: baseBranch, - inputs: { - agent_key: agentKey, - force_issue: issueNumber.toString(), - dry_run: 'false' + // Force-dispatch Codex belt dispatcher to create the branch. + // Retry up to 3 times because GitHub Actions can silently cancel + // queued runs before they receive a runner (observed on issue #34). + const maxDispatchAttempts = 3; + let dispatchSucceeded = false; + for (let attempt = 1; attempt <= maxDispatchAttempts; attempt++) { + const dispatchedAt = new Date(); + try { + await withRetry((client) => client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: baseBranch, + inputs: { + agent_key: agentKey, + force_issue: issueNumber.toString(), + dry_run: 'false' + } + })); + core.info( + `Dispatched belt dispatcher (agent: ${agentKey}) ` + + `for issue #${issueNumber} (attempt ${attempt}/${maxDispatchAttempts})` + ); + } catch (dispatchError) { + core.warning( + `Belt dispatch attempt ${attempt} failed: ${dispatchError?.message}` + ); + if (attempt < maxDispatchAttempts) { + await new Promise(r => setTimeout(r, attempt * 5000)); } - })); - const prefix = `Dispatched belt dispatcher (agent: ${agentKey}) for issue`; - core.info(`${prefix} #${issueNumber}`); - } catch (dispatchError) { - core.warning(`Could not dispatch belt dispatcher: ${dispatchError?.message}`); + continue; + } + + // Wait briefly then verify the dispatched run is queued/in_progress + // (not cancelled before receiving a runner). Only consider runs + // created after the dispatch timestamp to avoid matching stale runs + // for other issues. + await new Promise(r => setTimeout(r, 15000)); + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 5, + }) + ); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= dispatchedAt + ); + const alive = recentRuns.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (alive) { + core.info(`Belt dispatcher run ${alive.id} is ${alive.status}`); + dispatchSucceeded = true; + break; + } + const succeeded = recentRuns.find( + r => r.conclusion === 'success' + ); + if (succeeded) { + core.info(`Belt dispatcher run ${succeeded.id} already succeeded`); + dispatchSucceeded = true; + break; + } + const latest = recentRuns[0] || runs.workflow_runs[0]; + core.warning( + `Belt dispatcher run not alive after attempt ${attempt}` + + ` (latest: ${latest?.id} ${latest?.conclusion}); ` + + (attempt < maxDispatchAttempts ? 'retrying…' : 'no more attempts.') + ); + } catch (checkError) { + core.warning( + `Could not verify dispatcher run after attempt ${attempt}: ` + + `${checkError?.message}; status unknown, will retry.` + ); + } + } + if (!dispatchSucceeded) { + core.warning( + `Belt dispatcher could not be confirmed after ${maxDispatchAttempts} attempts ` + + `for issue #${issueNumber}. The branch-check loop will re-dispatch if needed.` + ); } - name: Metrics - End capability check timer @@ -2380,6 +2447,65 @@ jobs: const actualBackoffMs = Math.min(backoffMs, maxBackoffMs); const actualMinutes = Math.round(actualBackoffMs / 60000); + // Re-dispatch the belt if no recent dispatcher run is active + // for this issue. Only consider runs created in the last 30 + // minutes to avoid matching stale runs for other issues. + let redispatched = false; + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 10, + }) + ); + const cutoff = new Date(Date.now() - 30 * 60 * 1000); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= cutoff + ); + const alive = recentRuns.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (!alive) { + core.info( + `No active belt dispatcher run in last 30m ` + + `(${recentRuns.length} recent runs checked); re-dispatching` + ); + const { data: repoInfo } = await withRetry((client) => + client.rest.repos.get({ + owner: context.repo.owner, + repo: context.repo.repo, + }) + ); + const dispatchRef = repoInfo.default_branch || 'main'; + await withRetry((client) => + client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: dispatchRef, + inputs: { + agent_key: agentKey, + force_issue: String(issueNumber), + dry_run: 'false', + }, + }) + ); + redispatched = true; + core.info(`Re-dispatched belt for issue #${issueNumber}`); + } else { + core.info( + `Belt dispatcher run ${alive.id} still ${alive.status}; skipping re-dispatch` + ); + } + } catch (redispatchErr) { + core.warning(`Belt re-dispatch check failed: ${redispatchErr?.message}`); + } + + const redispatchNote = redispatched + ? ' Re-dispatched belt dispatcher.' + : ''; core.info(`Applying branch-creation backoff: waiting ${actualMinutes} minutes`); await withRetry((client) => client.rest.issues.createComment({ owner: context.repo.owner, @@ -2387,7 +2513,7 @@ jobs: issue_number: issueNumber, body: `🤖 **Auto-pilot**: Backoff delay (${actualMinutes}m) - Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}. + Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}.${redispatchNote} Waiting before retry...` })); diff --git a/.github/workflows/agents-keepalive-loop.yml b/.github/workflows/agents-keepalive-loop.yml index 629e598f0..b7cdae91f 100644 --- a/.github/workflows/agents-keepalive-loop.yml +++ b/.github/workflows/agents-keepalive-loop.yml @@ -859,6 +859,7 @@ jobs: - evaluate - run-codex - run-claude + - progress-review # Run if PR exists, handle skipped/failed agent jobs gracefully # run-codex will be skipped when action != run/fix/conflict, which is expected if: | diff --git a/.github/workflows/maint-69-sync-labels.yml b/.github/workflows/maint-69-sync-labels.yml index 8f73338eb..21e732266 100644 --- a/.github/workflows/maint-69-sync-labels.yml +++ b/.github/workflows/maint-69-sync-labels.yml @@ -40,7 +40,7 @@ jobs: github_token: ${{ github.token }} - name: Install js-yaml - run: npm install js-yaml + run: npm install --no-save --no-package-lock js-yaml - name: Parse labels-core.yml id: parse diff --git a/templates/consumer-repo/.github/scripts/keepalive_loop.js b/templates/consumer-repo/.github/scripts/keepalive_loop.js index ae5403a87..478c6a12b 100644 --- a/templates/consumer-repo/.github/scripts/keepalive_loop.js +++ b/templates/consumer-repo/.github/scripts/keepalive_loop.js @@ -2967,6 +2967,9 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in // tasks off. Re-derive the counter here with the authoritative counts. // When force_retry is active, honour the evaluate-step's reset to 0 and // do not overwrite it — the human explicitly wants a fresh start. + // After a review action, reset to 0 so the next evaluate triggers a run + // instead of another review (the review already provided course-correction + // feedback — the agent needs a chance to act on it). if (isForceRetry) { if (roundsWithoutTaskCompletion !== 0) { core?.info?.( @@ -2975,6 +2978,12 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in ); roundsWithoutTaskCompletion = 0; } + } else if (action === 'review') { + core?.info?.( + `[summary] review action completed — resetting rounds_without_task_completion ` + + `from ${roundsWithoutTaskCompletion} to 0 so next iteration runs the agent`, + ); + roundsWithoutTaskCompletion = 0; } else { const prevTasks = previousState?.tasks || {}; const prevUncheckedForCounter = toNumber(prevTasks.unchecked, tasksUnchecked); diff --git a/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml b/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml index d29bc9958..7c82d20cb 100644 --- a/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml +++ b/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml @@ -1886,23 +1886,90 @@ jobs: return; } - // Force-dispatch Codex belt dispatcher to create the branch - try { - await withRetry((client) => client.rest.actions.createWorkflowDispatch({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'agents-71-codex-belt-dispatcher.yml', - ref: baseBranch, - inputs: { - agent_key: agentKey, - force_issue: issueNumber.toString(), - dry_run: 'false' + // Force-dispatch Codex belt dispatcher to create the branch. + // Retry up to 3 times because GitHub Actions can silently cancel + // queued runs before they receive a runner (observed on issue #34). + const maxDispatchAttempts = 3; + let dispatchSucceeded = false; + for (let attempt = 1; attempt <= maxDispatchAttempts; attempt++) { + const dispatchedAt = new Date(); + try { + await withRetry((client) => client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: baseBranch, + inputs: { + agent_key: agentKey, + force_issue: issueNumber.toString(), + dry_run: 'false' + } + })); + core.info( + `Dispatched belt dispatcher (agent: ${agentKey}) ` + + `for issue #${issueNumber} (attempt ${attempt}/${maxDispatchAttempts})` + ); + } catch (dispatchError) { + core.warning( + `Belt dispatch attempt ${attempt} failed: ${dispatchError?.message}` + ); + if (attempt < maxDispatchAttempts) { + await new Promise(r => setTimeout(r, attempt * 5000)); } - })); - const prefix = `Dispatched belt dispatcher (agent: ${agentKey}) for issue`; - core.info(`${prefix} #${issueNumber}`); - } catch (dispatchError) { - core.warning(`Could not dispatch belt dispatcher: ${dispatchError?.message}`); + continue; + } + + // Wait briefly then verify the dispatched run is queued/in_progress + // (not cancelled before receiving a runner). Only consider runs + // created after the dispatch timestamp to avoid matching stale runs + // for other issues. + await new Promise(r => setTimeout(r, 15000)); + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 5, + }) + ); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= dispatchedAt + ); + const alive = recentRuns.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (alive) { + core.info(`Belt dispatcher run ${alive.id} is ${alive.status}`); + dispatchSucceeded = true; + break; + } + const succeeded = recentRuns.find( + r => r.conclusion === 'success' + ); + if (succeeded) { + core.info(`Belt dispatcher run ${succeeded.id} already succeeded`); + dispatchSucceeded = true; + break; + } + const latest = recentRuns[0] || runs.workflow_runs[0]; + core.warning( + `Belt dispatcher run not alive after attempt ${attempt}` + + ` (latest: ${latest?.id} ${latest?.conclusion}); ` + + (attempt < maxDispatchAttempts ? 'retrying…' : 'no more attempts.') + ); + } catch (checkError) { + core.warning( + `Could not verify dispatcher run after attempt ${attempt}: ` + + `${checkError?.message}; status unknown, will retry.` + ); + } + } + if (!dispatchSucceeded) { + core.warning( + `Belt dispatcher could not be confirmed after ${maxDispatchAttempts} attempts ` + + `for issue #${issueNumber}. The branch-check loop will re-dispatch if needed.` + ); } - name: Metrics - End capability check timer @@ -2304,6 +2371,65 @@ jobs: const actualBackoffMs = Math.min(backoffMs, maxBackoffMs); const actualMinutes = Math.round(actualBackoffMs / 60000); + // Re-dispatch the belt if no recent dispatcher run is active + // for this issue. Only consider runs created in the last 30 + // minutes to avoid matching stale runs for other issues. + let redispatched = false; + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 10, + }) + ); + const cutoff = new Date(Date.now() - 30 * 60 * 1000); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= cutoff + ); + const alive = recentRuns.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (!alive) { + core.info( + `No active belt dispatcher run in last 30m ` + + `(${recentRuns.length} recent runs checked); re-dispatching` + ); + const { data: repoInfo } = await withRetry((client) => + client.rest.repos.get({ + owner: context.repo.owner, + repo: context.repo.repo, + }) + ); + const dispatchRef = repoInfo.default_branch || 'main'; + await withRetry((client) => + client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: dispatchRef, + inputs: { + agent_key: agentKey, + force_issue: String(issueNumber), + dry_run: 'false', + }, + }) + ); + redispatched = true; + core.info(`Re-dispatched belt for issue #${issueNumber}`); + } else { + core.info( + `Belt dispatcher run ${alive.id} still ${alive.status}; skipping re-dispatch` + ); + } + } catch (redispatchErr) { + core.warning(`Belt re-dispatch check failed: ${redispatchErr?.message}`); + } + + const redispatchNote = redispatched + ? ' Re-dispatched belt dispatcher.' + : ''; core.info(`Applying branch-creation backoff: waiting ${actualMinutes} minutes`); await withRetry((client) => client.rest.issues.createComment({ owner: context.repo.owner, @@ -2311,7 +2437,7 @@ jobs: issue_number: issueNumber, body: `🤖 **Auto-pilot**: Backoff delay (${actualMinutes}m) - Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}. + Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}.${redispatchNote} Waiting before retry...` })); diff --git a/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml b/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml index 629e598f0..b7cdae91f 100644 --- a/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml +++ b/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml @@ -859,6 +859,7 @@ jobs: - evaluate - run-codex - run-claude + - progress-review # Run if PR exists, handle skipped/failed agent jobs gracefully # run-codex will be skipped when action != run/fix/conflict, which is expected if: |