Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 120 additions & 20 deletions .github/scripts/keepalive_loop.js
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ function buildAttemptEntry({
gateConclusion,
errorCategory,
errorType,
tasksTotal,
tasksUnchecked,
tasksCompletedDelta,
allComplete,
}) {
const actionValue = normalise(action) || 'unknown';
const reasonValue = normalise(reason) || actionValue;
Expand Down Expand Up @@ -147,6 +151,18 @@ function buildAttemptEntry({
if (errorType) {
entry.error_type = normalise(errorType);
}
if (Number.isFinite(tasksTotal)) {
entry.tasks_total = Math.max(0, Math.floor(tasksTotal));
}
if (Number.isFinite(tasksUnchecked)) {
entry.tasks_unchecked = Math.max(0, Math.floor(tasksUnchecked));
}
if (Number.isFinite(tasksCompletedDelta)) {
entry.tasks_completed_delta = Math.max(0, Math.floor(tasksCompletedDelta));
}
if (typeof allComplete === 'boolean') {
entry.all_complete = allComplete;
}

return entry;
}
Expand Down Expand Up @@ -1763,6 +1779,10 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP
const maxIterations = toNumber(config.max_iterations ?? state.max_iterations, 5);
const failureThreshold = toNumber(config.failure_threshold ?? state.failure_threshold, 3);
const progressReviewThreshold = toNumber(config.progress_review_threshold ?? state.progress_review_threshold, 4);
const completeGateFailureMax = Math.max(
1,
toNumber(config.complete_gate_failure_rounds ?? state.complete_gate_failure_rounds_max, 2),
);

// Evidence-based productivity tracking
// Uses multiple signals to determine if work is being done:
Expand All @@ -1789,11 +1809,16 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP
? 0
: prevRoundsWithoutCompletion + (iteration > 0 ? 1 : 0);

const prevCompleteGateFailureRounds = toNumber(state.complete_gate_failure_rounds, 0);
const completeGateFailureRounds = allComplete && gateNormalized !== 'success'
? prevCompleteGateFailureRounds + 1
: 0;

// Progress review threshold: trigger after N rounds of activity without task completion
// This catches "productive but unfocused" patterns where agent makes changes but doesn't advance criteria
// Default is 4 rounds - enough leeway for prep work but early enough for course correction
const needsProgressReview = roundsWithoutTaskCompletion >= progressReviewThreshold
&& !allComplete; // Don't review if all tasks are done
&& (!allComplete || gateNormalized !== 'success');

// Calculate productivity score (0-100)
// This is evidence-based: higher score = more confidence work is happening
Expand Down Expand Up @@ -1854,7 +1879,10 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP
action = 'stop';
reason = 'no-checklists';
} else if (gateNormalized !== 'success') {
if (gateNormalized === 'cancelled') {
if (allComplete && completeGateFailureRounds >= completeGateFailureMax) {
action = 'stop';
reason = 'complete-gate-failure-max';
} else if (gateNormalized === 'cancelled') {
gateRateLimit = await detectRateLimitCancellation({
github,
context,
Expand Down Expand Up @@ -2023,20 +2051,73 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
const gateConclusion = normalise(inputs.gateConclusion || inputs.gate_conclusion);
const action = normalise(inputs.action);
const reason = normalise(inputs.reason);
const tasksTotal = toNumber(inputs.tasksTotal ?? inputs.tasks_total, 0);
const tasksUnchecked = toNumber(inputs.tasksUnchecked ?? inputs.tasks_unchecked, 0);
const keepaliveEnabled = toBool(inputs.keepaliveEnabled ?? inputs.keepalive_enabled, false);
const autofixEnabled = toBool(inputs.autofixEnabled ?? inputs.autofix_enabled, false);
const tasksTotalInput = inputs.tasksTotal ?? inputs.tasks_total;
const tasksUncheckedInput = inputs.tasksUnchecked ?? inputs.tasks_unchecked;
const keepaliveEnabledInput = inputs.keepaliveEnabled ?? inputs.keepalive_enabled;
const autofixEnabledInput = inputs.autofixEnabled ?? inputs.autofix_enabled;
const iterationInput = inputs.iteration;
const maxIterationsInput = inputs.maxIterations ?? inputs.max_iterations;
const failureThresholdInput = inputs.failureThreshold ?? inputs.failure_threshold;
const roundsWithoutTaskCompletionInput =
inputs.roundsWithoutTaskCompletion ?? inputs.rounds_without_task_completion;
const agentType = normalise(inputs.agent_type ?? inputs.agentType) || 'codex';
const iteration = toNumber(inputs.iteration, 0);
const maxIterations = toNumber(inputs.maxIterations ?? inputs.max_iterations, 0);
const failureThreshold = Math.max(1, toNumber(inputs.failureThreshold ?? inputs.failure_threshold, 3));
const runResult = normalise(inputs.runResult || inputs.run_result);
const stateTrace = normalise(inputs.trace || inputs.keepalive_trace || '');
const roundsWithoutTaskCompletion = toNumber(
inputs.roundsWithoutTaskCompletion ?? inputs.rounds_without_task_completion,
0,

const { state: previousState, commentId } = await loadKeepaliveState({
github,
context,
prNumber,
trace: stateTrace,
});

const hasTasksTotalInput = tasksTotalInput !== undefined && tasksTotalInput !== '';
const hasTasksUncheckedInput = tasksUncheckedInput !== undefined && tasksUncheckedInput !== '';
const hasIterationInput = iterationInput !== undefined && iterationInput !== '';
const hasMaxIterationsInput = maxIterationsInput !== undefined && maxIterationsInput !== '';
const hasFailureThresholdInput = failureThresholdInput !== undefined && failureThresholdInput !== '';
const hasRoundsWithoutTaskCompletionInput =
roundsWithoutTaskCompletionInput !== undefined && roundsWithoutTaskCompletionInput !== '';
const hasKeepaliveEnabledInput = keepaliveEnabledInput !== undefined && keepaliveEnabledInput !== '';
const hasAutofixEnabledInput = autofixEnabledInput !== undefined && autofixEnabledInput !== '';

const tasksTotal = hasTasksTotalInput
? toNumber(tasksTotalInput, 0)
: toNumber(previousState?.tasks?.total, 0);
const tasksUnchecked = hasTasksUncheckedInput
? toNumber(tasksUncheckedInput, 0)
: toNumber(previousState?.tasks?.unchecked, 0);
const keepaliveEnabledFallback = toBool(
previousState?.keepalive_enabled ??
previousState?.keepaliveEnabled ??
previousState?.keepalive,
Boolean(previousState?.running),
);
const keepaliveEnabled = hasKeepaliveEnabledInput
? toBool(keepaliveEnabledInput, keepaliveEnabledFallback)
: keepaliveEnabledFallback;
const autofixEnabledFallback = toBool(
previousState?.autofix_enabled ?? previousState?.autofixEnabled ?? previousState?.autofix,
false,
);
const autofixEnabled = hasAutofixEnabledInput
? toBool(autofixEnabledInput, autofixEnabledFallback)
: autofixEnabledFallback;
const iteration = hasIterationInput
? toNumber(iterationInput, 0)
: toNumber(previousState?.iteration, 0);
const maxIterations = hasMaxIterationsInput
? toNumber(maxIterationsInput, 0)
: toNumber(previousState?.max_iterations, 0);
const failureThreshold = Math.max(
1,
hasFailureThresholdInput
? toNumber(failureThresholdInput, 3)
: toNumber(previousState?.failure_threshold, 3),
);
const roundsWithoutTaskCompletion = hasRoundsWithoutTaskCompletionInput
? toNumber(roundsWithoutTaskCompletionInput, 0)
: toNumber(previousState?.rounds_without_task_completion, 0);

// Agent output details (agent-agnostic, with fallback to old codex_ names)
const agentExitCode = normalise(inputs.agent_exit_code ?? inputs.agentExitCode ?? inputs.codex_exit_code ?? inputs.codexExitCode);
Expand Down Expand Up @@ -2096,12 +2177,6 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
...timeoutWarningConfig,
});

const { state: previousState, commentId } = await loadKeepaliveState({
github,
context,
prNumber,
trace: stateTrace,
});
const previousFailure = previousState?.failure || {};
const prBody = await fetchPrBody({ github, context, prNumber, core });
const focusSections = prBody ? normaliseChecklistSections(parseScopeTasksAcceptanceSections(prBody)) : {};
Expand Down Expand Up @@ -2238,6 +2313,20 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
const errorRecovery = failureDetails.recovery;
const tasksComplete = Math.max(0, tasksTotal - tasksUnchecked);
const allTasksComplete = tasksUnchecked === 0 && tasksTotal > 0;
const previousCompleteGateFailureRounds = toNumber(previousState?.complete_gate_failure_rounds, 0);
const completeGateFailureMax = Math.max(
1,
toNumber(
inputs.completeGateFailureRoundsMax ??
inputs.complete_gate_failure_rounds_max ??
previousState?.complete_gate_failure_rounds_max,
2,
),
);
const completeGateFailureRounds =
allTasksComplete && gateConclusion && gateConclusion !== 'success'
? previousCompleteGateFailureRounds + 1
: 0;
const metricsIteration = action === 'run' ? currentIteration + 1 : currentIteration;
const durationMs = resolveDurationMs({
durationMs: toOptionalNumber(inputs.duration_ms ?? inputs.durationMs),
Expand Down Expand Up @@ -2565,8 +2654,11 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
const focusTask = currentFocus || fallbackFocus;
const shouldRecordAttempt = action === 'run' && reason !== 'verify-acceptance';
let attemptedTasks = normaliseAttemptedTasks(previousState?.attempted_tasks);
if (shouldRecordAttempt && focusTask) {
attemptedTasks = updateAttemptedTasks(attemptedTasks, focusTask, metricsIteration);
if (shouldRecordAttempt) {
const attemptLabel = focusTask || (tasksCompletedThisRound > 0 ? 'checkbox-progress' : 'no-focus');
if (attemptLabel) {
attemptedTasks = updateAttemptedTasks(attemptedTasks, attemptLabel, metricsIteration);
}
}

let verification = previousState?.verification && typeof previousState.verification === 'object'
Expand Down Expand Up @@ -2605,12 +2697,16 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
prev_files_changed: toNumber(previousState?.last_files_changed, 0),
// Track consecutive rounds without task completion for progress review
rounds_without_task_completion: roundsWithoutTaskCompletion,
complete_gate_failure_rounds: completeGateFailureRounds,
complete_gate_failure_rounds_max: completeGateFailureMax,
// Quality metrics for analysis validation
last_effort_score: sessionEffortScore,
last_data_quality: sessionDataQuality,
attempted_tasks: attemptedTasks,
last_focus: focusTask || '',
verification,
keepalive_enabled: keepaliveEnabled,
autofix_enabled: autofixEnabled,
timeout: {
resolved_minutes: timeoutStatus.resolvedMinutes,
default_minutes: timeoutStatus.defaultMinutes,
Expand All @@ -2634,6 +2730,10 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
gateConclusion,
errorCategory,
errorType,
tasksTotal,
tasksUnchecked,
tasksCompletedDelta: tasksCompletedThisRound,
allComplete: allTasksComplete,
});
newState.attempts = updateAttemptHistory(previousState?.attempts, attemptEntry);

Expand Down
3 changes: 3 additions & 0 deletions .github/sync-manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ workflows:
- source: .github/workflows/agents-keepalive-loop.yml
description: "Keepalive loop - continues agent work until tasks complete (deprecated; replaced by agents-81-gate-followups.yml, removal no earlier than 2026-02-15)"

- source: .github/workflows/agents-keepalive-loop-reporter.yml
description: "Keepalive reporter - posts summary when keepalive run fails or cancels"

- source: .github/workflows/agents-71-codex-belt-dispatcher.yml
description: "Codex belt dispatcher - selects issues and creates codex/issue-N branches for agent work"

Expand Down
83 changes: 83 additions & 0 deletions .github/workflows/agents-keepalive-loop-reporter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
name: Keepalive Loop Reporter

on:
workflow_run:
workflows: ["Agents Keepalive Loop"]
types: [completed]

permissions:
contents: read
pull-requests: write
issues: write
actions: read

concurrency:
group: >-
keepalive-loop-reporter-${{ github.event.workflow_run.pull_requests[0].number ||
github.run_id }}
cancel-in-progress: false

jobs:
report:
name: Report keepalive completion
if: vars.USE_CONSOLIDATED_WORKFLOWS != 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout keepalive scripts
uses: actions/checkout@v6
with:
sparse-checkout: |
.github/scripts
sparse-checkout-cone-mode: false
fetch-depth: 1

- name: Update summary for cancelled/failed runs
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const run = context.payload?.workflow_run || {};
const prNumber = Number(run.pull_requests?.[0]?.number || 0);
if (!prNumber) {
core.info('No PR context found; skipping keepalive post summary.');
return;
}

const conclusion = String(run.conclusion || run.status || '').toLowerCase();
if (!conclusion || conclusion === 'success') {
core.info(
`Keepalive run conclusion=${conclusion || 'unknown'}; ` +
'no post summary needed.'
);
return;
}

const { loadKeepaliveState } = require('./.github/scripts/keepalive_state.js');
const { updateKeepaliveLoopSummary } = require('./.github/scripts/keepalive_loop.js');

const { state } = await loadKeepaliveState({ github, context, prNumber, trace: '' });
if (!state || state.running !== true) {
core.info('Keepalive state not marked as running; skipping post summary.');
return;
}

const inputs = {
pr_number: prNumber,
action: 'run',
reason: '',
gate_conclusion: state.gate_conclusion || '',
iteration: state.iteration || 0,
max_iterations: state.max_iterations || 0,
failure_threshold: state.failure_threshold || 3,
tasks_total: state.tasks?.total ?? 0,
tasks_unchecked: state.tasks?.unchecked ?? 0,
keepalive_enabled: state.keepalive_enabled ?? 'true',
autofix_enabled: state.autofix_enabled ?? '',
agent_type: state.agent_type || 'codex',
trace: state.trace || '',
run_result: conclusion,
run_url: run.html_url || '',
rounds_without_task_completion: state.rounds_without_task_completion || 0,
};

await updateKeepaliveLoopSummary({ github, context, core, inputs });
10 changes: 2 additions & 8 deletions .github/workflows/agents-keepalive-loop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -590,16 +590,10 @@ jobs:
needs:
- evaluate
- run-codex
# Run always if PR exists, handle skipped agent jobs gracefully
# Run if PR exists, handle skipped/failed agent jobs gracefully
# run-codex will be skipped when action != run/fix/conflict, which is expected
# Using !cancelled() instead of always() to work around GitHub Actions skipping behavior
# We check that neither job was cancelled AND run-codex didn't fail (skipped is OK)
if: |
!cancelled() &&
needs.evaluate.result != 'failure' &&
needs.evaluate.result != 'cancelled' &&
needs.run-codex.result != 'failure' &&
needs.run-codex.result != 'cancelled' &&
always() &&
needs.evaluate.outputs.pr_number != '' &&
needs.evaluate.outputs.pr_number != '0'
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/reusable-codex-run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ permissions:
models: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
group: codex-${{ inputs.mode }}-${{ inputs.pr_number || github.ref }}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Serialize runs across modes for the same PR branch

The new concurrency group includes inputs.mode, so keepalive and autofix runs for the same PR no longer share a group. Both .github/workflows/agents-keepalive-loop.yml and .github/workflows/agents-autofix-loop.yml call this reusable workflow with the same pr_number/pr_ref but different mode values, which now allows simultaneous runs that both push to the same branch. That increases the chance of push rejections or conflicting rebases in the commit step, causing failed runs or interleaved commits. Consider keying the group only by PR/target branch (or pr_ref) so different modes still serialize on the same branch.

Useful? React with 👍 / 👎.

cancel-in-progress: false

jobs:
codex:
Expand Down
1 change: 1 addition & 0 deletions docs/ci/WORKFLOWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ The agent workflows coordinate Codex and chat orchestration across topics:
* [`agents-70-orchestrator.yml`](../../.github/workflows/agents-70-orchestrator.yml) is the thin dispatcher that triggers the orchestrator init and main phases. It calls [`reusable-70-orchestrator-init.yml`](../../.github/workflows/reusable-70-orchestrator-init.yml) for initialization (rate limit checks, token preflight, parameter resolution) and [`reusable-70-orchestrator-main.yml`](../../.github/workflows/reusable-70-orchestrator-main.yml) for the main keepalive and belt operations.
* Required permissions: `actions: write`, `contents: write`, and `pull-requests: write` at the workflow root so nested branch-sync and keepalive post-work steps can request their scopes without startup failure.
* [`agents-keepalive-loop.yml`](../../.github/workflows/agents-keepalive-loop.yml) listens for Gate completion (and the optional `agent:codex` label event) to continue keepalive work in a GitHub-native loop: it inspects PR checklists/config, gates on Gate success, dispatches `reusable-codex-run` with the keepalive prompt, updates a single summary comment, and pauses with a `needs-human` label when tasks complete, limits are reached, or repeated failures occur.
* [`agents-keepalive-loop-reporter.yml`](../../.github/workflows/agents-keepalive-loop-reporter.yml) posts the keepalive summary comment when the keepalive run is cancelled or fails before the summary job can execute, preserving the final status for triage.
* [`agents-73-codex-belt-conveyor.yml`](../../.github/workflows/agents-73-codex-belt-conveyor.yml) manages task distribution. The orchestrator summary now logs "keepalive skipped" when the pause label is present and surfaces `keepalive_pause_label`/`keepalive_paused_label` outputs for downstream consumers.
* [`agents-autofix-loop.yml`](../../.github/workflows/agents-autofix-loop.yml) triggers on Gate failure (for PRs with `agent:codex` label or `autofix: true` in body) and calls Codex to attempt bounded autofix iterations.
* [`agents-keepalive-branch-sync.yml`](../../.github/workflows/agents-keepalive-branch-sync.yml) issues short-lived sync branches, merges the reconciliation PR automatically, and tears down the branch once the update lands so keepalive can clear branch drift without human intervention.
Expand Down
Loading
Loading