From 02064f00341616080881da9a3cb81b279bfdee37 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 21:05:25 +0000 Subject: [PATCH 01/15] fix: avoid multi-line stderr in workflow annotations GitHub Actions ::warning:: commands truncate/mangle multi-line content. Emit a short annotation message and print full npm stderr in a collapsible ::group:: instead, so logs stay readable. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/actions/setup-api-client/action.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml index 3e43bc863..5fead358b 100644 --- a/.github/actions/setup-api-client/action.yml +++ b/.github/actions/setup-api-client/action.yml @@ -266,7 +266,10 @@ runs: npm_err=$(cat "$npm_output") rm -f "$npm_output" - echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed: $npm_err" + echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed (see logs)" + echo "::group::npm stderr (attempt $attempt)" + echo "$npm_err" + echo "::endgroup::" # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict if [ "$attempt" -eq 1 ]; then @@ -279,7 +282,10 @@ runs: fi npm_err_legacy=$(cat "$npm_output") rm -f "$npm_output" - echo "::warning::npm install with --legacy-peer-deps failed: $npm_err_legacy" + echo "::warning::npm install with --legacy-peer-deps failed (see logs)" + echo "::group::npm stderr (--legacy-peer-deps)" + echo "$npm_err_legacy" + echo "::endgroup::" fi if [ "$attempt" -lt "$NPM_MAX_RETRIES" ]; then From 4d84c1f97f3b7c231f11bb9b296c1d6a3427e7fc Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 21:22:43 +0000 Subject: [PATCH 02/15] fix: sync template setup-api-client with retry-with-backoff and annotation fixes Mirror the main setup-api-client changes into the consumer-repo template to prevent template drift: - Exponential backoff retry (3 attempts, 5s/10s) for transient npm errors - --legacy-peer-deps fallback on first failure - Short ::warning:: annotations with full stderr in collapsible ::group:: - Pin lru-cache@10.4.3 (was ^10.0.0) https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .../actions/setup-api-client/action.yml | 80 ++++++++++++++----- 1 file changed, 58 insertions(+), 22 deletions(-) diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml index b912fe4ad..24736497c 100644 --- a/templates/consumer-repo/.github/actions/setup-api-client/action.yml +++ b/templates/consumer-repo/.github/actions/setup-api-client/action.yml @@ -239,29 +239,65 @@ runs: # Install with pinned versions for consistency. # lru-cache is an explicit transitive dep of @octokit/auth-app required for - # GitHub App token minting; pin it here so npm always hoists it to the top - # level even if a prior cached node_modules state is missing it. - # Capture stderr for debugging if the command fails - npm_output=$(mktemp) - npm_cmd=(npm install --no-save --location=project \ - @octokit/rest@20.0.2 \ - @octokit/plugin-retry@6.0.1 \ - @octokit/plugin-paginate-rest@9.1.5 \ - @octokit/auth-app@6.0.3 \ - lru-cache@^10.0.0) - if "${npm_cmd[@]}" 2>"$npm_output"; then - rm -f "$npm_output" - else - echo "::warning::npm install failed with: $(cat "$npm_output")" - echo "::warning::Retrying with --legacy-peer-deps" + # GitHub App token minting; pin it here so npm always hoists a specific version + # even if a prior cached node_modules state is missing it. + # + # Retry with exponential backoff to survive transient npm registry errors + # (e.g. 403 Forbidden from CDN/rate-limit on safe-buffer, undici, etc.). + NPM_PACKAGES=( + @octokit/rest@20.0.2 + @octokit/plugin-retry@6.0.1 + @octokit/plugin-paginate-rest@9.1.5 + @octokit/auth-app@6.0.3 + lru-cache@10.4.3 + ) + NPM_MAX_RETRIES=3 + NPM_BACKOFF=5 # seconds; doubles each retry (5, 10) + npm_installed=false + + for (( attempt=1; attempt<=NPM_MAX_RETRIES; attempt++ )); do + npm_output=$(mktemp) + + if npm install --no-save --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then + rm -f "$npm_output" + npm_installed=true + break + fi + + npm_err=$(cat "$npm_output") rm -f "$npm_output" - npm_cmd=(npm install --no-save --legacy-peer-deps --location=project \ - @octokit/rest@20.0.2 \ - @octokit/plugin-retry@6.0.1 \ - @octokit/plugin-paginate-rest@9.1.5 \ - @octokit/auth-app@6.0.3 \ - lru-cache@^10.0.0) - "${npm_cmd[@]}" + echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed (see logs)" + echo "::group::npm stderr (attempt $attempt)" + echo "$npm_err" + echo "::endgroup::" + + # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict + if [ "$attempt" -eq 1 ]; then + echo "::warning::Retrying with --legacy-peer-deps" + npm_output=$(mktemp) + if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then + rm -f "$npm_output" + npm_installed=true + break + fi + npm_err_legacy=$(cat "$npm_output") + rm -f "$npm_output" + echo "::warning::npm install with --legacy-peer-deps failed (see logs)" + echo "::group::npm stderr (--legacy-peer-deps)" + echo "$npm_err_legacy" + echo "::endgroup::" + fi + + if [ "$attempt" -lt "$NPM_MAX_RETRIES" ]; then + echo "::notice::Waiting ${NPM_BACKOFF}s before retry..." + sleep "$NPM_BACKOFF" + NPM_BACKOFF=$((NPM_BACKOFF * 2)) + fi + done + + if [ "$npm_installed" != "true" ]; then + echo "::error::npm install failed after $NPM_MAX_RETRIES attempts" + exit 1 fi # Restore vendored package metadata that npm may have overwritten From c36adc17812f9a51f94ae474bde59b3458e212d8 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 21:50:29 +0000 Subject: [PATCH 03/15] fix: pre-timeout watchdog, robust parser import, and always-commit safeguard Three changes to reusable-codex-run.yml to prevent work loss on timeout: 1. Pre-timeout watchdog: A background timer fires 5 minutes before max_runtime_minutes, committing and pushing any uncommitted work so it survives the job cancellation. Killed automatically if Codex finishes before the timer fires. 2. Robust parser import: Replace sys.path-based import of codex_jsonl_parser with importlib.util.spec_from_file_location. Consumer repos (e.g. Counter_Risk) have their own tools/ package with __init__.py that shadows the Workflows tools/ on sys.path, causing "No module named 'tools.codex_jsonl_parser'". 3. Commit step always runs: Add if: always() to the "Commit and push changes" step so uncommitted work is captured even on non-zero exit codes (the watchdog handles timeout, this handles failures). https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-codex-run.yml | 69 ++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml index 11b6cad0c..63325e3e6 100644 --- a/.github/workflows/reusable-codex-run.yml +++ b/.github/workflows/reusable-codex-run.yml @@ -188,6 +188,7 @@ jobs: error-type: ${{ steps.classify_failure.outputs.error_type }} error-recovery: ${{ steps.classify_failure.outputs.error_recovery }} error-summary: ${{ steps.classify_failure.outputs.error_summary }} + watchdog-saved: ${{ steps.run_codex.outputs.watchdog-saved }} # LLM analysis outputs llm-analysis-run: ${{ steps.llm_analysis.outputs.llm-analysis-run }} llm-completed-tasks: ${{ steps.llm_analysis.outputs.completed-tasks }} @@ -938,6 +939,47 @@ jobs: echo "Extra args: provided (${#EXTRA_ARGS[@]} arg(s))" fi + # --- Pre-timeout watchdog --- + # When the job approaches the timeout limit, this background process + # commits and pushes any uncommitted work so it isn't lost to the + # job cancellation. It fires once, 5 minutes before max_runtime. + MAX_RUNTIME_MIN=${{ inputs.max_runtime_minutes }} + GRACE_MIN=5 + WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 )) + if [ "$WATCHDOG_DELAY" -gt 60 ]; then + ( + sleep "$WATCHDOG_DELAY" + echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)" + + CHANGED=$(git status --porcelain | wc -l) + UNPUSHED=0 + if git rev-parse FETCH_HEAD >/dev/null 2>&1; then + UNPUSHED=$(git rev-list FETCH_HEAD..HEAD --count 2>/dev/null || echo 0) + fi + + if [ "$CHANGED" -gt 0 ] || [ "$UNPUSHED" -gt 0 ]; then + echo "::notice::Watchdog saving ${CHANGED} uncommitted file(s) and ${UNPUSHED} unpushed commit(s)" + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + if [ "$CHANGED" -gt 0 ]; then + git add -A + git commit -m "chore(codex-keepalive): pre-timeout checkpoint (PR #${PR_NUM:-})" --no-verify || true + fi + TARGET_BRANCH="${{ inputs.pr_ref }}" + TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}" + PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}" + REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}" + git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" || \ + echo "::warning::Watchdog push failed" + echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" + else + echo "::notice::Watchdog: no uncommitted or unpushed work to save" + fi + ) & + WATCHDOG_PID=$! + echo "::notice::Pre-timeout watchdog started (PID ${WATCHDOG_PID}, fires in $((WATCHDOG_DELAY/60))m)" + fi + # Run codex exec with --json to capture rich session data # JSONL events stream to stdout, final message still goes to OUTPUT_FILE CODEX_EXIT=0 @@ -954,6 +996,12 @@ jobs: prompt_content="$(cat "$PROMPT_FILE")" "${cmd[@]}" "$prompt_content" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$? + # Kill watchdog if Codex finished before the timer fired + if [ -n "${WATCHDOG_PID:-}" ]; then + kill "$WATCHDOG_PID" 2>/dev/null || true + wait "$WATCHDOG_PID" 2>/dev/null || true + fi + echo "exit-code=${CODEX_EXIT}" >> "$GITHUB_OUTPUT" if [ "$CODEX_EXIT" -ne 0 ]; then @@ -1008,17 +1056,29 @@ jobs: # Basic parsing (always available) python3 << 'PYEOF' + import importlib.util import os import sys - # Add .workflows-lib to path for tools imports - sys.path.insert(0, '.workflows-lib') - sys.path.insert(0, '.') session_file = os.environ.get("SESSION_JSONL", "codex-session.jsonl") github_output = os.environ.get("GITHUB_OUTPUT", "/dev/null") try: - from tools.codex_jsonl_parser import parse_codex_jsonl_file + # Load codex_jsonl_parser from the Workflows checkout by exact path. + # Consumer repos (e.g. Counter_Risk) have their own tools/ package with + # __init__.py, which shadows the Workflows tools/ on sys.path. Using + # importlib.util.spec_from_file_location bypasses sys.path entirely. + _parser_path = os.path.join( + ".workflows-lib", "tools", "codex_jsonl_parser.py" + ) + _spec = importlib.util.spec_from_file_location( + "codex_jsonl_parser", _parser_path + ) + if _spec is None or _spec.loader is None: + raise ImportError(f"Cannot load spec from {_parser_path}") + _mod = importlib.util.module_from_spec(_spec) + _spec.loader.exec_module(_mod) + parse_codex_jsonl_file = _mod.parse_codex_jsonl_file session = parse_codex_jsonl_file(session_file) @@ -1150,6 +1210,7 @@ jobs: - name: Commit and push changes id: commit + if: always() env: MODE: ${{ inputs.mode }} PR_NUMBER: ${{ inputs.pr_number }} From 7924d1ea650b6919aa2e222c164f98d12ea495a6 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 01:35:02 +0000 Subject: [PATCH 04/15] fix: preserve indented checkbox states in PR Meta body sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parseCheckboxStates() and mergeCheckboxStates() only matched top-level checkboxes (^- \[), ignoring indented sub-tasks ( - \[). When PR Meta regenerated the PR body from the issue, auto-reconciled sub-task checkboxes were silently reverted to unchecked. This caused the keepalive loop to stall with rounds_without_task_completion: 8 despite the agent completing real work — PR #256 had 5 tasks auto-checked then immediately un-checked on every push. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/scripts/agents_pr_meta_update_body.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/scripts/agents_pr_meta_update_body.js b/.github/scripts/agents_pr_meta_update_body.js index ec9a585aa..3e987c10c 100644 --- a/.github/scripts/agents_pr_meta_update_body.js +++ b/.github/scripts/agents_pr_meta_update_body.js @@ -404,7 +404,7 @@ function parseCheckboxStates(block) { if (inCodeBlock) { continue; } - const match = line.match(/^- \[(x| )\]\s*(.+)$/i); + const match = line.match(/^\s*- \[(x| )\]\s*(.+)$/i); if (match) { const checked = match[1].toLowerCase() === 'x'; const text = match[2].trim(); @@ -461,12 +461,13 @@ function mergeCheckboxStates(newContent, existingStates) { updated.push(line); continue; } - const match = line.match(/^- \[( )\]\s*(.+)$/); + const match = line.match(/^(\s*)- \[( )\]\s*(.+)$/); if (match) { - const text = match[2].trim(); + const indent = match[1]; + const text = match[3].trim(); const normalized = text.replace(/^-\s*/, '').trim().toLowerCase(); if (existingStates.has(normalized)) { - updated.push(`- [x] ${text}`); + updated.push(`${indent}- [x] ${text}`); continue; } } From 6145c5e40686053b5a1d0e27322fa548d2be1d6a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:35:36 +0000 Subject: [PATCH 05/15] chore: sync template scripts --- .../.github/scripts/agents_pr_meta_update_body.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js index ec9a585aa..3e987c10c 100644 --- a/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js +++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js @@ -404,7 +404,7 @@ function parseCheckboxStates(block) { if (inCodeBlock) { continue; } - const match = line.match(/^- \[(x| )\]\s*(.+)$/i); + const match = line.match(/^\s*- \[(x| )\]\s*(.+)$/i); if (match) { const checked = match[1].toLowerCase() === 'x'; const text = match[2].trim(); @@ -461,12 +461,13 @@ function mergeCheckboxStates(newContent, existingStates) { updated.push(line); continue; } - const match = line.match(/^- \[( )\]\s*(.+)$/); + const match = line.match(/^(\s*)- \[( )\]\s*(.+)$/); if (match) { - const text = match[2].trim(); + const indent = match[1]; + const text = match[3].trim(); const normalized = text.replace(/^-\s*/, '').trim().toLowerCase(); if (existingStates.has(normalized)) { - updated.push(`- [x] ${text}`); + updated.push(`${indent}- [x] ${text}`); continue; } } From 68111870655048c0e0d1619356b90aceeafbbabf Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 01:41:53 +0000 Subject: [PATCH 06/15] fix: address review comments on watchdog pre-timeout mechanism - P1: Add fetch/rebase before watchdog push to avoid non-fast-forward rejection when another workflow updates the branch during the run. Includes one retry with re-fetch/rebase and merge fallback. - P2: Export watchdog-saved in on.workflow_call.outputs so callers of the reusable workflow can observe the signal. - Copilot: Add git fetch before checking FETCH_HEAD to ensure it exists and is current (actions/checkout doesn't set FETCH_HEAD). - Copilot: Initialize watchdog-saved=false before background subshell so downstream consumers always get a defined value. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-codex-run.yml | 42 ++++++++++++++++++++---- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml index 63325e3e6..f5dfa67be 100644 --- a/.github/workflows/reusable-codex-run.yml +++ b/.github/workflows/reusable-codex-run.yml @@ -124,6 +124,9 @@ on: error-recovery: description: 'Suggested recovery action if failure occurred' value: ${{ jobs.codex.outputs.error-recovery }} + watchdog-saved: + description: 'Whether the pre-timeout watchdog saved uncommitted work (true/false)' + value: ${{ jobs.codex.outputs.watchdog-saved }} # LLM task analysis outputs llm-analysis-run: description: 'Whether LLM analysis was performed' @@ -946,11 +949,20 @@ jobs: MAX_RUNTIME_MIN=${{ inputs.max_runtime_minutes }} GRACE_MIN=5 WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 )) + echo "watchdog-saved=false" >> "$GITHUB_OUTPUT" if [ "$WATCHDOG_DELAY" -gt 60 ]; then ( sleep "$WATCHDOG_DELAY" echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)" + TARGET_BRANCH="${{ inputs.pr_ref }}" + TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}" + PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}" + REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}" + + # Fetch to get current FETCH_HEAD before checking for unpushed work + git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true + CHANGED=$(git status --porcelain | wc -l) UNPUSHED=0 if git rev-parse FETCH_HEAD >/dev/null 2>&1; then @@ -965,12 +977,30 @@ jobs: git add -A git commit -m "chore(codex-keepalive): pre-timeout checkpoint (PR #${PR_NUM:-})" --no-verify || true fi - TARGET_BRANCH="${{ inputs.pr_ref }}" - TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}" - PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}" - REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}" - git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" || \ - echo "::warning::Watchdog push failed" + # Rebase onto remote before pushing to avoid non-fast-forward rejection + if git rev-parse FETCH_HEAD >/dev/null 2>&1; then + if ! git rebase FETCH_HEAD 2>/dev/null; then + echo "::warning::Watchdog rebase failed; attempting merge fallback." + git rebase --abort 2>/dev/null || true + git pull --no-rebase "${REMOTE_URL}" "${TARGET_BRANCH}" \ + --allow-unrelated-histories 2>/dev/null || true + fi + fi + # Push with one retry + if ! git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + echo "::warning::Watchdog push failed (attempt 1), retrying after fetch/rebase..." + sleep 3 + git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true + if git rev-parse FETCH_HEAD >/dev/null 2>&1; then + git rebase FETCH_HEAD 2>/dev/null || { + git rebase --abort 2>/dev/null || true + git pull --no-rebase "${REMOTE_URL}" "${TARGET_BRANCH}" \ + --allow-unrelated-histories 2>/dev/null || true + } + fi + git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null || \ + echo "::warning::Watchdog push failed after retry" + fi echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" else echo "::notice::Watchdog: no uncommitted or unpushed work to save" From 0b9a46cf508ad24c6b855cf7da13b5ddc41413fb Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 01:45:32 +0000 Subject: [PATCH 07/15] docs: add watchdog-saved to workflow outputs reference Update WORKFLOW_OUTPUTS.md to include the new watchdog-saved output from reusable-codex-run.yml, fixing the test_reusable_workflow_outputs_documented test. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- docs/ci/WORKFLOW_OUTPUTS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ci/WORKFLOW_OUTPUTS.md b/docs/ci/WORKFLOW_OUTPUTS.md index 543d15099..7e971cf5b 100644 --- a/docs/ci/WORKFLOW_OUTPUTS.md +++ b/docs/ci/WORKFLOW_OUTPUTS.md @@ -78,6 +78,7 @@ that only emit artifacts, see the "Workflows without workflow_call outputs" sect | `reusable-codex-run.yml` | `error-category` | string | Error category if failure occurred (transient/auth/resource/logic/unknown) | `needs.codex.outputs.error-category` | | `reusable-codex-run.yml` | `error-type` | string | Error type if failure occurred (codex/infrastructure/auth/unknown) | `needs.codex.outputs.error-type` | | `reusable-codex-run.yml` | `error-recovery` | string | Suggested recovery action if failure occurred | `needs.codex.outputs.error-recovery` | +| `reusable-codex-run.yml` | `watchdog-saved` | string (boolean-like) | Whether the pre-timeout watchdog saved uncommitted work (true/false) | `needs.codex.outputs.watchdog-saved` | | `reusable-codex-run.yml` | `llm-analysis-run` | string (boolean-like) | Whether LLM analysis was performed | `needs.codex.outputs.llm-analysis-run` | | `reusable-codex-run.yml` | `llm-provider` | string | LLM provider used for analysis (github-models, openai, regex-fallback) | `needs.codex.outputs.llm-provider` | | `reusable-codex-run.yml` | `llm-model` | string | Specific model used for analysis (e.g., gpt-4o, claude-3-5-sonnet) | `needs.codex.outputs.llm-model` | From 97056c028445b147ab35ce5d091fbd9dbcaec2b2 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 02:03:54 +0000 Subject: [PATCH 08/15] fix: skip non-issue refs like "Run #NNN" in extractIssueNumberFromPull The body scan in extractIssueNumberFromPull was treating patterns like "Run #2615" as issue references, causing the Upsert PR body sections check to fail with a 404 when trying to fetch non-existent issues. Add a preceding-word filter to skip #NNN when preceded by common non-issue words (run, attempt, step, job, check, task, version, v). Add 12 unit tests covering the extraction logic. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .../agents-pr-meta-keepalive.test.js | 63 ++++++++++++++++++- .github/scripts/agents_pr_meta_keepalive.js | 5 ++ .../scripts/agents_pr_meta_keepalive.js | 5 ++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js index 673a5d13c..64cec070d 100644 --- a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js +++ b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js @@ -3,7 +3,7 @@ const test = require('node:test'); const assert = require('node:assert/strict'); -const { detectKeepalive } = require('../agents_pr_meta_keepalive.js'); +const { detectKeepalive, extractIssueNumberFromPull } = require('../agents_pr_meta_keepalive.js'); function createCore(outputs) { return { @@ -543,3 +543,64 @@ test('detectKeepalive does not cache empty pull responses', async () => { assert.equal(outputsFirst.reason, 'pull-fetch-failed'); assert.equal(outputsSecond.reason, 'pull-fetch-failed'); }); + +// --- extractIssueNumberFromPull tests --- + +test('extractIssueNumberFromPull returns null for null input', () => { + assert.equal(extractIssueNumberFromPull(null), null); +}); + +test('extractIssueNumberFromPull extracts from meta comment', () => { + const pull = { body: 'Some text more text', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 42); +}); + +test('extractIssueNumberFromPull extracts from branch name', () => { + const pull = { body: '', head: { ref: 'codex/issue-99' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 99); +}); + +test('extractIssueNumberFromPull extracts from title', () => { + const pull = { body: '', head: { ref: 'feature' }, title: 'fix: resolve #55' }; + assert.equal(extractIssueNumberFromPull(pull), 55); +}); + +test('extractIssueNumberFromPull extracts from body hash ref', () => { + const pull = { body: 'Fixes #123', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 123); +}); + +test('extractIssueNumberFromPull skips "Run #NNN" in body', () => { + const pull = { body: 'Run #2615 timed out after 45 minutes', head: { ref: 'claude/fix-something' }, title: 'fix: pre-timeout watchdog' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "run #NNN" case-insensitive', () => { + const pull = { body: 'The run #500 failed', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "attempt #N" in body', () => { + const pull = { body: 'attempt #3 was successful', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "step #N" in body', () => { + const pull = { body: 'step #2 completed', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "version #N" in body', () => { + const pull = { body: 'Upgraded to version #4', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull prefers meta comment over "Run #NNN"', () => { + const pull = { body: ' Run #2615 timed out', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 77); +}); + +test('extractIssueNumberFromPull finds real issue after skipping Run ref', () => { + const pull = { body: 'Run #2615 timed out. Relates to #88', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 88); +}); diff --git a/.github/scripts/agents_pr_meta_keepalive.js b/.github/scripts/agents_pr_meta_keepalive.js index 7a2197536..10eab2c81 100644 --- a/.github/scripts/agents_pr_meta_keepalive.js +++ b/.github/scripts/agents_pr_meta_keepalive.js @@ -240,6 +240,11 @@ function extractIssueNumberFromPull(pull) { if (match.index > 0 && /\w/.test(bodyText[match.index - 1])) { continue; } + // Skip non-issue refs like "Run #123", "run #123", "attempt #2" + const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); + if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + continue; + } candidates.push(match[1]); } diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js index 7a2197536..10eab2c81 100644 --- a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js +++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js @@ -240,6 +240,11 @@ function extractIssueNumberFromPull(pull) { if (match.index > 0 && /\w/.test(bodyText[match.index - 1])) { continue; } + // Skip non-issue refs like "Run #123", "run #123", "attempt #2" + const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); + if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + continue; + } candidates.push(match[1]); } From 96e5f6ec2ec143c047674e2caaf4c35ec763d40e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 02:29:53 +0000 Subject: [PATCH 09/15] feat: add session analysis, completion comment, and error diagnostics to Claude runner Closes the three remaining feature gaps between the Claude and Codex runners identified in issue #1646: 1. **Session analysis (LLM-powered)**: Reuses analyze_codex_session.py which auto-detects Claude's plain-text session log (data_source=summary) and feeds it through the same LLM analysis pipeline for structured task completion assessment. Outputs feed into the keepalive loop. 2. **Completion checkpoint comment**: Posts a PR comment summarizing completed tasks and acceptance criteria using the shared post_completion_comment.js script. Supports both claude-prompt*.md and codex-prompt*.md file names. 3. **Error diagnostics**: Adds GITHUB_STEP_SUMMARY with error table, creates a diagnostics artifact (JSON + agent output), and posts a structured PR comment on non-transient failures with recovery guidance and log links. Uses a distinct marker. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-claude-run.yml | 409 +++++++++++++++++++++- 1 file changed, 400 insertions(+), 9 deletions(-) diff --git a/.github/workflows/reusable-claude-run.yml b/.github/workflows/reusable-claude-run.yml index 490eac122..9827d6819 100644 --- a/.github/workflows/reusable-claude-run.yml +++ b/.github/workflows/reusable-claude-run.yml @@ -1207,18 +1207,209 @@ jobs: claude-session*.jsonl if-no-files-found: ignore - - name: Compatibility outputs (LLM analysis placeholders) + - name: Analyze Claude session + id: analyze_session + if: always() + env: + PYTHONPATH: ${{ github.workspace }}/.workflows-lib:${{ github.workspace }} + PR_NUM: ${{ inputs.pr_number }} + run: | + set -euo pipefail + + if [ -n "${PR_NUM}" ]; then + SESSION_LOG="claude-session-${PR_NUM}.log" + else + SESSION_LOG="claude-session.log" + fi + + # Check if session file exists and has content + if [ ! -f "$SESSION_LOG" ] || [ ! -s "$SESSION_LOG" ]; then + echo "No Claude session log found or file is empty" + echo "session-available=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "Session log captured: $(wc -l < "$SESSION_LOG") lines, $(wc -c < "$SESSION_LOG") bytes" + echo "session-available=true" >> "$GITHUB_OUTPUT" + echo "session-file=$SESSION_LOG" >> "$GITHUB_OUTPUT" + + - name: Analyze task completion with LLM + id: llm_analysis + if: >- + always() && + steps.analyze_session.outputs.session-available == 'true' && + inputs.pr_number != '' + env: + PYTHONPATH: ${{ github.workspace }}/.workflows-lib:${{ github.workspace }} + PR_NUM: ${{ inputs.pr_number }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CLAUDE_API_STRANSKE: ${{ secrets.CLAUDE_API_STRANSKE }} + run: | + set -euo pipefail + + SESSION_LOG="${{ steps.analyze_session.outputs.session-file }}" + ANALYSIS_FILE="claude-analysis-${PR_NUM}.json" + + # Fetch PR body to extract tasks + echo "Fetching PR #${PR_NUM} body..." + set +e + PR_BODY=$(gh pr view "${PR_NUM}" --json body --jq '.body' 2>&1) + fetch_exit=$? + set -e + + if [ $fetch_exit -ne 0 ] || [ -z "$PR_BODY" ]; then + echo "::warning::Could not fetch PR body for #${PR_NUM} (exit code: $fetch_exit)" + echo "::warning::LLM task completion analysis will be skipped" + if [ $fetch_exit -ne 0 ]; then + echo "Error output: $PR_BODY" + fi + echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Save PR body to temp file + echo "$PR_BODY" > pr_body.md + + # Run full LLM analysis and save JSON output + # The analyze_codex_session.py script auto-detects text vs JSONL input, + # so it works with Claude's plain-text session logs (data_source=summary). + echo "Running LLM-powered task completion analysis..." + if [ ! -f .workflows-lib/scripts/analyze_codex_session.py ]; then + echo "::error::Analysis script not found." + echo "::error::Missing: .workflows-lib/scripts/analyze_codex_session.py" + echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + python3 .workflows-lib/scripts/analyze_codex_session.py \ + --session-file "$SESSION_LOG" \ + --pr-body-file pr_body.md \ + --output json > "$ANALYSIS_FILE" || { + echo "::warning::LLM analysis failed, continuing without it" + cat "$ANALYSIS_FILE" 2>/dev/null || true + echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT" + rm -f "$ANALYSIS_FILE" + exit 0 + } + + # Also output to GitHub Actions for visibility + python3 .workflows-lib/scripts/analyze_codex_session.py \ + --session-file "$SESSION_LOG" \ + --pr-body-file pr_body.md \ + --output github-actions || true + + echo "llm-analysis-run=true" >> "$GITHUB_OUTPUT" + echo "analysis-file=$ANALYSIS_FILE" >> "$GITHUB_OUTPUT" + + # Extract key fields for downstream use + if [ -f "$ANALYSIS_FILE" ]; then + python3 - "$ANALYSIS_FILE" >> "$GITHUB_OUTPUT" <<'PY' + import json + import sys + + analysis_path = sys.argv[1] + with open(analysis_path, encoding='utf-8') as handle: + data = json.load(handle) + + completed_tasks = json.dumps(data.get('completed_tasks', [])) + quality_warnings = json.dumps(data.get('quality_warnings', [])) + + print(f"completed-tasks={completed_tasks}") + print(f"provider={data.get('provider', 'unknown')}") + print(f"model={data.get('model', 'unknown')}") + print(f"confidence={data.get('confidence', 0)}") + print(f"raw-confidence={data.get('raw_confidence', data.get('confidence', 0))}") + print(f"effort-score={data.get('effort_score', 0)}") + print(f"data-quality={data.get('data_quality', 'unknown')}") + print(f"analysis-text-length={data.get('analysis_text_length', 0)}") + print(f"quality-warnings={quality_warnings}") + PY + fi + + - name: Compatibility outputs (LLM analysis) id: compat if: always() run: | - { - echo "llm-analysis-run=false" - echo "llm-provider=" - echo "llm-model=" - echo "llm-confidence=" - echo "llm-completed-tasks=[]" - echo "llm-has-completions=false" - } >> "$GITHUB_OUTPUT" + # If LLM analysis ran, forward its outputs; otherwise emit placeholders. + if [ "${{ steps.llm_analysis.outputs.llm-analysis-run }}" = "true" ]; then + { + echo "llm-analysis-run=true" + echo "llm-provider=${{ steps.llm_analysis.outputs.provider }}" + echo "llm-model=${{ steps.llm_analysis.outputs.model }}" + echo "llm-confidence=${{ steps.llm_analysis.outputs.confidence }}" + echo "llm-completed-tasks=${{ steps.llm_analysis.outputs.completed-tasks }}" + has_completions="false" + tasks='${{ steps.llm_analysis.outputs.completed-tasks }}' + if [ -n "$tasks" ] && [ "$tasks" != "[]" ]; then + has_completions="true" + fi + echo "llm-has-completions=${has_completions}" + } >> "$GITHUB_OUTPUT" + else + { + echo "llm-analysis-run=false" + echo "llm-provider=" + echo "llm-model=" + echo "llm-confidence=" + echo "llm-completed-tasks=[]" + echo "llm-has-completions=false" + } >> "$GITHUB_OUTPUT" + fi + + - name: Post completion checkpoint comment + id: completion_comment + if: steps.commit.outputs.changes-made == 'true' && inputs.pr_number != '' + uses: actions/github-script@v8 + env: + PR_NUMBER: ${{ inputs.pr_number }} + COMMIT_SHA: ${{ steps.commit.outputs.commit-sha }} + ITERATION: ${{ inputs.iteration || '' }} + with: + script: | + // Try .workflows-lib first (consumer repos), fall back to local copy + const fs = require('fs'); + const modulePath = fs.existsSync('./.workflows-lib/.github/scripts/post_completion_comment.js') + ? './.workflows-lib/.github/scripts/post_completion_comment.js' + : './.github/scripts/post_completion_comment.js'; + const { postCompletionComment } = require(modulePath); + + // Determine prompt file — Claude uses claude-prompt*.md + const prNumber = process.env.PR_NUMBER || ''; + let promptFile = 'claude-prompt.md'; + if (prNumber) { + const prSpecific = `claude-prompt-${prNumber}.md`; + if (fs.existsSync(prSpecific)) { + promptFile = prSpecific; + } + } + // Fall back to codex-prompt*.md if claude variant not found + if (!fs.existsSync(promptFile)) { + const codexPrompt = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md'; + if (fs.existsSync(codexPrompt)) { + promptFile = codexPrompt; + } + } + + const result = await postCompletionComment({ + github, context, core, + inputs: { + pr_number: process.env.PR_NUMBER, + commit_sha: process.env.COMMIT_SHA, + iteration: process.env.ITERATION, + prompt_file: promptFile, + }, + }); + core.setOutput('posted', result.posted ? 'true' : 'false'); + core.setOutput('tasks', String(result.tasks || 0)); + core.setOutput('acceptance', String(result.acceptance || 0)); + if (result.posted) { + core.info( + `Posted completion checkpoint: ${result.tasks} tasks, ` + + `${result.acceptance} acceptance criteria`, + ); + } - name: Classify failure id: classify_failure @@ -1275,7 +1466,207 @@ jobs: errorInfo.category === ERROR_CATEGORIES.transient ? 'true' : 'false'; core.setOutput('is_transient', isTransient); + core.setOutput('error_summary', summary || ''); + console.log(`Error Classification:`); console.log(` Category: ${errorInfo.category}`); console.log(` Type: ${errorType}`); console.log(` Recovery: ${errorInfo.recovery}`); + + - name: Write error summary to GITHUB_STEP_SUMMARY + if: always() && steps.run_claude.outputs.exit-code != '0' + env: + EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }} + OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }} + ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }} + ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }} + ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }} + MODE: ${{ inputs.mode }} + PR_NUMBER: ${{ inputs.pr_number }} + run: | + set -euo pipefail + { + echo "## Claude Run Failed" + echo "" + echo "| Field | Value |" + echo "|-------|-------|" + echo "| Mode | ${MODE:-unknown} |" + echo "| Exit Code | ${EXIT_CODE:-unknown} |" + echo "| Error Category | ${ERROR_CATEGORY:-unknown} |" + echo "| Error Type | ${ERROR_TYPE:-unknown} |" + if [ -n "${PR_NUMBER:-}" ]; then + echo "| PR | #${PR_NUMBER} |" + fi + echo "" + echo "### Recovery Guidance" + echo "" + echo "${ERROR_RECOVERY:-Check logs for more details.}" + echo "" + if [ -n "${OUTPUT_SUMMARY:-}" ]; then + echo "### Output Summary" + echo "" + echo '```' + echo "${OUTPUT_SUMMARY}" + echo '```' + fi + } >> "$GITHUB_STEP_SUMMARY" + + - name: Create error diagnostics artifact + if: always() && steps.run_claude.outputs.exit-code != '0' + env: + EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }} + OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }} + ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }} + ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }} + ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }} + IS_TRANSIENT: ${{ steps.classify_failure.outputs.is_transient }} + MODE: ${{ inputs.mode }} + PR_NUMBER: ${{ inputs.pr_number }} + RUN_URL: >- + ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + set -euo pipefail + mkdir -p error-diagnostics + + # Create JSON diagnostics file + cat > error-diagnostics/diagnostics.json << JSONEOF + { + "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", + "run_id": "${{ github.run_id }}", + "run_url": "${RUN_URL}", + "agent": "claude", + "mode": "${MODE:-unknown}", + "pr_number": "${PR_NUMBER:-}", + "exit_code": "${EXIT_CODE:-unknown}", + "error_category": "${ERROR_CATEGORY:-unknown}", + "error_type": "${ERROR_TYPE:-unknown}", + "is_transient": ${IS_TRANSIENT:-false}, + "recovery_guidance": "${ERROR_RECOVERY:-unknown}" + } + JSONEOF + + # Copy claude output if available + for f in claude-output*.md; do + [ -f "$f" ] && cp "$f" error-diagnostics/ && break + done 2>/dev/null || true + + echo "Created error diagnostics in error-diagnostics/" + + - name: Upload error diagnostics + if: always() && steps.run_claude.outputs.exit-code != '0' + uses: actions/upload-artifact@v6 + with: + name: error-diagnostics-${{ inputs.mode }}-${{ github.run_id }} + path: error-diagnostics/ + retention-days: 30 + + - name: Post PR comment on non-transient failure + if: >- + always() && steps.run_claude.outputs.exit-code != '0' && + steps.classify_failure.outputs.is_transient != 'true' && inputs.pr_number != '' + uses: actions/github-script@v8 + env: + PR_NUMBER: ${{ inputs.pr_number }} + EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }} + ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }} + ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }} + ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }} + OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }} + MODE: ${{ inputs.mode }} + RUN_URL: >- + ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + with: + script: | + const fs = require('fs'); + let withRetry; + const retryPath = './.workflows-lib/.github/scripts/github-api-with-retry.js'; + const localRetryPath = './.github/scripts/github-api-with-retry.js'; + if (fs.existsSync(retryPath)) { + ({ withRetry } = require(retryPath)); + } else if (fs.existsSync(localRetryPath)) { + ({ withRetry } = require(localRetryPath)); + } else { + // Inline fallback: single attempt, no retry + withRetry = (fn) => fn(); + } + + const prNumber = parseInt(process.env.PR_NUMBER, 10); + if (!prNumber || prNumber <= 0) { + console.log('No valid PR number, skipping comment'); + return; + } + + const exitCode = process.env.EXIT_CODE || 'unknown'; + const category = process.env.ERROR_CATEGORY || 'unknown'; + const errorType = process.env.ERROR_TYPE || 'unknown'; + const recovery = process.env.ERROR_RECOVERY || 'Check logs for details.'; + const summary = process.env.OUTPUT_SUMMARY || 'No output captured'; + const mode = process.env.MODE || 'unknown'; + const runUrl = process.env.RUN_URL || ''; + + const marker = ''; + + const body = `${marker} + ## Claude ${mode} run failed + + | Field | Value | + |-------|-------| + | Exit Code | \`${exitCode}\` | + | Error Category | \`${category}\` | + | Error Type | \`${errorType}\` | + | Run | [View logs](${runUrl}) | + + ### Suggested Recovery + + ${recovery} + + ### What to do + + 1. Check the [workflow logs](${runUrl}) for detailed error output + 2. If this is a configuration issue, update the relevant settings + 3. If the error persists, consider adding the \`needs-human\` label for manual review + 4. Re-run the workflow once the issue is resolved + +
+ Output summary + + \`\`\` + ${summary.slice(0, 500)} + \`\`\` + +
+ `.trim().split('\n').map(l => l.trim()).join('\n'); + + // Check if we already have a failure comment + const { data: comments } = await withRetry(() => + github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + per_page: 100, + }) + ); + + const existingComment = comments.find(c => c.body && c.body.includes(marker)); + + if (existingComment) { + await withRetry(() => + github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existingComment.id, + body, + }) + ); + console.log(`Updated existing failure comment: ${existingComment.html_url}`); + } else { + const { data: newComment } = await withRetry(() => + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body, + }) + ); + console.log(`Created failure comment: ${newComment.html_url}`); + } From 92809586dc13721dd7fcf4c259ecd2c9720c3c9f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 03:21:00 +0000 Subject: [PATCH 10/15] fix: address code review feedback from Codex and Copilot Claude runner (reusable-claude-run.yml): - Fix shell quoting of completed-tasks JSON by using env vars instead of inline ${{ }} expansion which breaks on apostrophes in task names - Declare OPENAI_API_KEY and CLAUDE_API_STRANSKE in workflow_call.secrets so callers can pass them (matches Codex runner) - Use printf instead of echo when writing PR body to disk to avoid mangling of -n/-e prefixes or backslashes - Add info log when falling back to codex-prompt file Codex runner (reusable-codex-run.yml): - Gate watchdog-saved=true on actual push success instead of emitting it unconditionally after push attempts that may have both failed - Use a fired-flag file so the watchdog kill only terminates the background process if it's still sleeping (hasn't started its commit/push work yet) https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-claude-run.yml | 42 ++++++++++++++++------- .github/workflows/reusable-codex-run.yml | 26 +++++++++++--- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/.github/workflows/reusable-claude-run.yml b/.github/workflows/reusable-claude-run.yml index 9827d6819..9e1517c26 100644 --- a/.github/workflows/reusable-claude-run.yml +++ b/.github/workflows/reusable-claude-run.yml @@ -105,6 +105,13 @@ on: required: false WORKFLOWS_APP_PRIVATE_KEY: required: false + OPENAI_API_KEY: + required: false + description: >- + OpenAI API key for LLM analysis (enables model selection beyond GitHub Models) + CLAUDE_API_STRANSKE: + required: false + description: 'Anthropic API key for LLM analysis (enables Claude slot)' outputs: final-message: description: 'Full Claude output message (base64 encoded)' @@ -1270,7 +1277,7 @@ jobs: fi # Save PR body to temp file - echo "$PR_BODY" > pr_body.md + printf '%s' "$PR_BODY" > pr_body.md # Run full LLM analysis and save JSON output # The analyze_codex_session.py script auto-detects text vs JSONL input, @@ -1331,18 +1338,23 @@ jobs: - name: Compatibility outputs (LLM analysis) id: compat if: always() + env: + LLM_RAN: ${{ steps.llm_analysis.outputs.llm-analysis-run }} + LLM_PROVIDER: ${{ steps.llm_analysis.outputs.provider }} + LLM_MODEL: ${{ steps.llm_analysis.outputs.model }} + LLM_CONFIDENCE: ${{ steps.llm_analysis.outputs.confidence }} + LLM_COMPLETED_TASKS: ${{ steps.llm_analysis.outputs.completed-tasks }} run: | # If LLM analysis ran, forward its outputs; otherwise emit placeholders. - if [ "${{ steps.llm_analysis.outputs.llm-analysis-run }}" = "true" ]; then + if [ "${LLM_RAN}" = "true" ]; then { echo "llm-analysis-run=true" - echo "llm-provider=${{ steps.llm_analysis.outputs.provider }}" - echo "llm-model=${{ steps.llm_analysis.outputs.model }}" - echo "llm-confidence=${{ steps.llm_analysis.outputs.confidence }}" - echo "llm-completed-tasks=${{ steps.llm_analysis.outputs.completed-tasks }}" + echo "llm-provider=${LLM_PROVIDER}" + echo "llm-model=${LLM_MODEL}" + echo "llm-confidence=${LLM_CONFIDENCE}" + echo "llm-completed-tasks=${LLM_COMPLETED_TASKS}" has_completions="false" - tasks='${{ steps.llm_analysis.outputs.completed-tasks }}' - if [ -n "$tasks" ] && [ "$tasks" != "[]" ]; then + if [ -n "${LLM_COMPLETED_TASKS}" ] && [ "${LLM_COMPLETED_TASKS}" != "[]" ]; then has_completions="true" fi echo "llm-has-completions=${has_completions}" @@ -1375,7 +1387,9 @@ jobs: : './.github/scripts/post_completion_comment.js'; const { postCompletionComment } = require(modulePath); - // Determine prompt file — Claude uses claude-prompt*.md + // Determine prompt file — prefer PR-specific variant, then generic. + // The prompt file name is passed to postCompletionComment which uses + // it as the base name; it also checks for PR-specific variants internally. const prNumber = process.env.PR_NUMBER || ''; let promptFile = 'claude-prompt.md'; if (prNumber) { @@ -1384,11 +1398,13 @@ jobs: promptFile = prSpecific; } } - // Fall back to codex-prompt*.md if claude variant not found + // Also check codex-prompt as shared belt PRs use that naming. + // Only fall back when no claude-prompt variant exists at all. if (!fs.existsSync(promptFile)) { - const codexPrompt = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md'; - if (fs.existsSync(codexPrompt)) { - promptFile = codexPrompt; + const codexFallback = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md'; + if (fs.existsSync(codexFallback)) { + core.info(`No claude-prompt file found; using ${codexFallback}`); + promptFile = codexFallback; } } diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml index f5dfa67be..c69303a34 100644 --- a/.github/workflows/reusable-codex-run.yml +++ b/.github/workflows/reusable-codex-run.yml @@ -951,8 +951,10 @@ jobs: WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 )) echo "watchdog-saved=false" >> "$GITHUB_OUTPUT" if [ "$WATCHDOG_DELAY" -gt 60 ]; then + WATCHDOG_FIRED_FLAG="/tmp/.watchdog-fired-$$" ( sleep "$WATCHDOG_DELAY" + touch "$WATCHDOG_FIRED_FLAG" echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)" TARGET_BRANCH="${{ inputs.pr_ref }}" @@ -987,7 +989,10 @@ jobs: fi fi # Push with one retry - if ! git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + watchdog_push_ok=false + if git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + watchdog_push_ok=true + else echo "::warning::Watchdog push failed (attempt 1), retrying after fetch/rebase..." sleep 3 git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true @@ -998,10 +1003,18 @@ jobs: --allow-unrelated-histories 2>/dev/null || true } fi - git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null || \ + if git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + watchdog_push_ok=true + else echo "::warning::Watchdog push failed after retry" + fi + fi + if [ "$watchdog_push_ok" = "true" ]; then + echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" + else + echo "::error::Watchdog: committed locally but failed to push" + echo "watchdog-saved=false" >> "$GITHUB_OUTPUT" fi - echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" else echo "::notice::Watchdog: no uncommitted or unpushed work to save" fi @@ -1026,9 +1039,12 @@ jobs: prompt_content="$(cat "$PROMPT_FILE")" "${cmd[@]}" "$prompt_content" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$? - # Kill watchdog if Codex finished before the timer fired + # Kill watchdog only if it hasn't fired yet. If it has already + # fired (flag file exists), it may be committing/pushing — let it finish. if [ -n "${WATCHDOG_PID:-}" ]; then - kill "$WATCHDOG_PID" 2>/dev/null || true + if [ ! -f "${WATCHDOG_FIRED_FLAG:-/tmp/.no-such-flag}" ]; then + kill "$WATCHDOG_PID" 2>/dev/null || true + fi wait "$WATCHDOG_PID" 2>/dev/null || true fi From 65e89c51a3b1c1003daf8596e319df12844fcd44 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 03:53:16 +0000 Subject: [PATCH 11/15] fix: address sync PR review feedback from coding agents - Remove "task" from the non-issue prefix filter in extractIssueNumberFromPull so "Task #123" is correctly treated as an issue reference (flagged by Codex on PAEM sync PR) - Make --legacy-peer-deps retry conditional on ERESOLVE/peer-dep errors instead of only firing on the first attempt (flagged by Copilot on TMP sync PR) - Add test for "Task #N" being treated as a valid issue ref https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/actions/setup-api-client/action.yml | 6 +++--- .github/scripts/__tests__/agents-pr-meta-keepalive.test.js | 5 +++++ .github/scripts/agents_pr_meta_keepalive.js | 2 +- .../.github/actions/setup-api-client/action.yml | 6 +++--- .../.github/scripts/agents_pr_meta_keepalive.js | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml index 5fead358b..b9539f4a9 100644 --- a/.github/actions/setup-api-client/action.yml +++ b/.github/actions/setup-api-client/action.yml @@ -271,9 +271,9 @@ runs: echo "$npm_err" echo "::endgroup::" - # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict - if [ "$attempt" -eq 1 ]; then - echo "::warning::Retrying with --legacy-peer-deps" + # On peer-dep / ERESOLVE failures, also try --legacy-peer-deps + if echo "$npm_err" | grep -qiE 'ERESOLVE|peer dep|Could not resolve dependency'; then + echo "::warning::Detected peer dependency conflict, retrying with --legacy-peer-deps" npm_output=$(mktemp) if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then rm -f "$npm_output" diff --git a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js index 64cec070d..e653529bb 100644 --- a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js +++ b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js @@ -590,6 +590,11 @@ test('extractIssueNumberFromPull skips "step #N" in body', () => { assert.equal(extractIssueNumberFromPull(pull), null); }); +test('extractIssueNumberFromPull treats "Task #N" as a valid issue ref', () => { + const pull = { body: 'Task #42 is ready for review', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 42); +}); + test('extractIssueNumberFromPull skips "version #N" in body', () => { const pull = { body: 'Upgraded to version #4', head: { ref: 'feature' }, title: 'stuff' }; assert.equal(extractIssueNumberFromPull(pull), null); diff --git a/.github/scripts/agents_pr_meta_keepalive.js b/.github/scripts/agents_pr_meta_keepalive.js index 10eab2c81..32cfa95c8 100644 --- a/.github/scripts/agents_pr_meta_keepalive.js +++ b/.github/scripts/agents_pr_meta_keepalive.js @@ -242,7 +242,7 @@ function extractIssueNumberFromPull(pull) { } // Skip non-issue refs like "Run #123", "run #123", "attempt #2" const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); - if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + if (/\b(?:run|attempt|step|job|check|version|v)\s*$/i.test(preceding)) { continue; } candidates.push(match[1]); diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml index 24736497c..b6ed888ad 100644 --- a/templates/consumer-repo/.github/actions/setup-api-client/action.yml +++ b/templates/consumer-repo/.github/actions/setup-api-client/action.yml @@ -271,9 +271,9 @@ runs: echo "$npm_err" echo "::endgroup::" - # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict - if [ "$attempt" -eq 1 ]; then - echo "::warning::Retrying with --legacy-peer-deps" + # On peer-dep / ERESOLVE failures, also try --legacy-peer-deps + if echo "$npm_err" | grep -qiE 'ERESOLVE|peer dep|Could not resolve dependency'; then + echo "::warning::Detected peer dependency conflict, retrying with --legacy-peer-deps" npm_output=$(mktemp) if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then rm -f "$npm_output" diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js index 10eab2c81..32cfa95c8 100644 --- a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js +++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js @@ -242,7 +242,7 @@ function extractIssueNumberFromPull(pull) { } // Skip non-issue refs like "Run #123", "run #123", "attempt #2" const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); - if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + if (/\b(?:run|attempt|step|job|check|version|v)\s*$/i.test(preceding)) { continue; } candidates.push(match[1]); From 9e89707e896fde87d63bf2b99ad46a9e52162a4b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 05:13:51 +0000 Subject: [PATCH 12/15] fix: install js-yaml locally instead of globally in label sync workflow The label sync workflow (maint-69-sync-labels.yml) has been failing since Feb 2 because npm install -g js-yaml installs to the global prefix which actions/github-script can't resolve. Install locally so Node's module resolution finds it in node_modules/. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/maint-69-sync-labels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maint-69-sync-labels.yml b/.github/workflows/maint-69-sync-labels.yml index 965c4898f..8f73338eb 100644 --- a/.github/workflows/maint-69-sync-labels.yml +++ b/.github/workflows/maint-69-sync-labels.yml @@ -40,7 +40,7 @@ jobs: github_token: ${{ github.token }} - name: Install js-yaml - run: npm install -g js-yaml + run: npm install js-yaml - name: Parse labels-core.yml id: parse From 7a11836372e9e58e9bf8faf98770c68bad4c5133 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 15:09:48 +0000 Subject: [PATCH 13/15] Fix auto-pilot stall when belt dispatcher run is cancelled Two changes to prevent the issue stranske/Counter_Risk#34 scenario where a single cancelled belt dispatcher run strands an issue: 1. Capability-check step: dispatch the belt up to 3 times with a 15s verification window after each attempt. If the dispatched run is not queued/in_progress, retry. This catches silent cancellations before the auto-pilot moves on. 2. Branch-check loop: on the 2nd+ backoff iteration, check whether any belt dispatcher run is still active. If not, re-dispatch the belt before sleeping. This makes the loop self-healing instead of passively waiting for a run that was already cancelled. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .../.github/workflows/agents-auto-pilot.yml | 149 ++++++++++++++++-- 1 file changed, 132 insertions(+), 17 deletions(-) diff --git a/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml b/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml index d29bc9958..070c3e02c 100644 --- a/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml +++ b/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml @@ -1886,23 +1886,86 @@ jobs: return; } - // Force-dispatch Codex belt dispatcher to create the branch - try { - await withRetry((client) => client.rest.actions.createWorkflowDispatch({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'agents-71-codex-belt-dispatcher.yml', - ref: baseBranch, - inputs: { - agent_key: agentKey, - force_issue: issueNumber.toString(), - dry_run: 'false' + // Force-dispatch Codex belt dispatcher to create the branch. + // Retry up to 3 times because GitHub Actions can silently cancel + // queued runs before they receive a runner (observed on issue #34). + const maxDispatchAttempts = 3; + let dispatchSucceeded = false; + for (let attempt = 1; attempt <= maxDispatchAttempts; attempt++) { + try { + await withRetry((client) => client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: baseBranch, + inputs: { + agent_key: agentKey, + force_issue: issueNumber.toString(), + dry_run: 'false' + } + })); + core.info( + `Dispatched belt dispatcher (agent: ${agentKey}) ` + + `for issue #${issueNumber} (attempt ${attempt}/${maxDispatchAttempts})` + ); + } catch (dispatchError) { + core.warning( + `Belt dispatch attempt ${attempt} failed: ${dispatchError?.message}` + ); + if (attempt < maxDispatchAttempts) { + await new Promise(r => setTimeout(r, attempt * 5000)); } - })); - const prefix = `Dispatched belt dispatcher (agent: ${agentKey}) for issue`; - core.info(`${prefix} #${issueNumber}`); - } catch (dispatchError) { - core.warning(`Could not dispatch belt dispatcher: ${dispatchError?.message}`); + continue; + } + + // Wait briefly then verify the dispatched run is queued/in_progress + // (not cancelled before receiving a runner). + if (attempt < maxDispatchAttempts) { + await new Promise(r => setTimeout(r, 15000)); + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 5, + }) + ); + const alive = runs.workflow_runs.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (alive) { + core.info(`Belt dispatcher run ${alive.id} is ${alive.status}`); + dispatchSucceeded = true; + break; + } + // All recent runs completed — check if the latest one succeeded + const latest = runs.workflow_runs[0]; + if (latest && latest.conclusion === 'success') { + core.info(`Belt dispatcher run ${latest.id} already succeeded`); + dispatchSucceeded = true; + break; + } + core.warning( + `Belt dispatcher run not alive after attempt ${attempt}` + + ` (latest: ${latest?.id} ${latest?.conclusion}); retrying…` + ); + } catch (checkError) { + core.warning(`Could not verify dispatcher run: ${checkError?.message}`); + // Optimistically assume it's running + dispatchSucceeded = true; + break; + } + } else { + // Last attempt — no verification, assume success + dispatchSucceeded = true; + } + } + if (!dispatchSucceeded) { + core.warning( + `Belt dispatcher could not be confirmed after ${maxDispatchAttempts} attempts ` + + `for issue #${issueNumber}. The branch-check loop will re-dispatch if needed.` + ); } - name: Metrics - End capability check timer @@ -2304,6 +2367,58 @@ jobs: const actualBackoffMs = Math.min(backoffMs, maxBackoffMs); const actualMinutes = Math.round(actualBackoffMs / 60000); + // Re-dispatch the belt if no dispatcher run is active. + // This recovers from cases where the original dispatch was + // silently cancelled before receiving a runner (see issue #34). + let redispatched = false; + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 5, + }) + ); + const alive = runs.workflow_runs.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (!alive) { + core.info('No active belt dispatcher run found; re-dispatching'); + const { data: repoInfo } = await withRetry((client) => + client.rest.repos.get({ + owner: context.repo.owner, + repo: context.repo.repo, + }) + ); + const dispatchRef = repoInfo.default_branch || 'main'; + await withRetry((client) => + client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: dispatchRef, + inputs: { + agent_key: agentKey, + force_issue: String(issueNumber), + dry_run: 'false', + }, + }) + ); + redispatched = true; + core.info(`Re-dispatched belt for issue #${issueNumber}`); + } else { + core.info( + `Belt dispatcher run ${alive.id} still ${alive.status}; skipping re-dispatch` + ); + } + } catch (redispatchErr) { + core.warning(`Belt re-dispatch check failed: ${redispatchErr?.message}`); + } + + const redispatchNote = redispatched + ? ' Re-dispatched belt dispatcher.' + : ''; core.info(`Applying branch-creation backoff: waiting ${actualMinutes} minutes`); await withRetry((client) => client.rest.issues.createComment({ owner: context.repo.owner, @@ -2311,7 +2426,7 @@ jobs: issue_number: issueNumber, body: `🤖 **Auto-pilot**: Backoff delay (${actualMinutes}m) - Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}. + Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}.${redispatchNote} Waiting before retry...` })); From 5d4a9ad57e8ecb8defb66d54a879db8cb39f626d Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 16:27:46 +0000 Subject: [PATCH 14/15] Fix keepalive loop stuck in perpetual review after progress stall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a progress-review action, rounds_without_task_completion was never reset. The next keepalive trigger would re-evaluate, find the counter still above threshold, enter review again, increment the counter, and repeat — permanently trapping the loop in review mode with no agent work ever running again. This affected all 4 agent PRs (#266, #267, #268, #269) which each stalled at progress-review-N with uncompleted tasks. Fix: 1. keepalive_loop.js summary function: reset rounds_without_task_completion to 0 after a review action, so the next evaluate triggers a run instead of another review. The review already provided course-correction feedback — the agent needs a chance to act on it. 2. agents-keepalive-loop.yml: add progress-review as a dependency of the summary job so the state update waits for the review to complete before persisting the reset counter. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/scripts/keepalive_loop.js | 9 +++++++++ .github/workflows/agents-keepalive-loop.yml | 1 + .../consumer-repo/.github/scripts/keepalive_loop.js | 9 +++++++++ .../.github/workflows/agents-keepalive-loop.yml | 1 + 4 files changed, 20 insertions(+) diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js index ae5403a87..478c6a12b 100644 --- a/.github/scripts/keepalive_loop.js +++ b/.github/scripts/keepalive_loop.js @@ -2967,6 +2967,9 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in // tasks off. Re-derive the counter here with the authoritative counts. // When force_retry is active, honour the evaluate-step's reset to 0 and // do not overwrite it — the human explicitly wants a fresh start. + // After a review action, reset to 0 so the next evaluate triggers a run + // instead of another review (the review already provided course-correction + // feedback — the agent needs a chance to act on it). if (isForceRetry) { if (roundsWithoutTaskCompletion !== 0) { core?.info?.( @@ -2975,6 +2978,12 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in ); roundsWithoutTaskCompletion = 0; } + } else if (action === 'review') { + core?.info?.( + `[summary] review action completed — resetting rounds_without_task_completion ` + + `from ${roundsWithoutTaskCompletion} to 0 so next iteration runs the agent`, + ); + roundsWithoutTaskCompletion = 0; } else { const prevTasks = previousState?.tasks || {}; const prevUncheckedForCounter = toNumber(prevTasks.unchecked, tasksUnchecked); diff --git a/.github/workflows/agents-keepalive-loop.yml b/.github/workflows/agents-keepalive-loop.yml index 629e598f0..b7cdae91f 100644 --- a/.github/workflows/agents-keepalive-loop.yml +++ b/.github/workflows/agents-keepalive-loop.yml @@ -859,6 +859,7 @@ jobs: - evaluate - run-codex - run-claude + - progress-review # Run if PR exists, handle skipped/failed agent jobs gracefully # run-codex will be skipped when action != run/fix/conflict, which is expected if: | diff --git a/templates/consumer-repo/.github/scripts/keepalive_loop.js b/templates/consumer-repo/.github/scripts/keepalive_loop.js index ae5403a87..478c6a12b 100644 --- a/templates/consumer-repo/.github/scripts/keepalive_loop.js +++ b/templates/consumer-repo/.github/scripts/keepalive_loop.js @@ -2967,6 +2967,9 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in // tasks off. Re-derive the counter here with the authoritative counts. // When force_retry is active, honour the evaluate-step's reset to 0 and // do not overwrite it — the human explicitly wants a fresh start. + // After a review action, reset to 0 so the next evaluate triggers a run + // instead of another review (the review already provided course-correction + // feedback — the agent needs a chance to act on it). if (isForceRetry) { if (roundsWithoutTaskCompletion !== 0) { core?.info?.( @@ -2975,6 +2978,12 @@ async function updateKeepaliveLoopSummary({ github: rawGithub, context, core, in ); roundsWithoutTaskCompletion = 0; } + } else if (action === 'review') { + core?.info?.( + `[summary] review action completed — resetting rounds_without_task_completion ` + + `from ${roundsWithoutTaskCompletion} to 0 so next iteration runs the agent`, + ); + roundsWithoutTaskCompletion = 0; } else { const prevTasks = previousState?.tasks || {}; const prevUncheckedForCounter = toNumber(prevTasks.unchecked, tasksUnchecked); diff --git a/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml b/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml index 629e598f0..b7cdae91f 100644 --- a/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml +++ b/templates/consumer-repo/.github/workflows/agents-keepalive-loop.yml @@ -859,6 +859,7 @@ jobs: - evaluate - run-codex - run-claude + - progress-review # Run if PR exists, handle skipped/failed agent jobs gracefully # run-codex will be skipped when action != run/fix/conflict, which is expected if: | From 26eb35e912274dc4f020a966ceb75d5f32471c31 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 16:48:57 +0000 Subject: [PATCH 15/15] Address review feedback on belt dispatch verification Fixes from inline code review on PR #1665: 1. Scope dispatcher run verification to the current dispatch by filtering runs created after the dispatch timestamp (dispatchedAt). Previously an old successful run for a different issue would falsely satisfy the check, causing retries to stop early. 2. Verify all dispatch attempts including the final one. Previously the last attempt assumed success without checking, creating a false-positive path when the last run was also cancelled. 3. On verification errors (catch block), continue to the next retry attempt instead of optimistically breaking out of the loop. Transient API errors no longer mask failed dispatches. 4. Scope the branch-check re-dispatch to recent runs (last 30 minutes) instead of any active run. An unrelated dispatcher run for a different issue no longer suppresses re-dispatch. 5. Apply all auto-pilot changes to both .github/workflows/ and templates/consumer-repo/.github/workflows/ per sync conventions. 6. Use --no-save --no-package-lock for npm install in maint-69-sync-labels.yml per repo conventions. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/agents-auto-pilot.yml | 160 ++++++++++++++++-- .github/workflows/maint-69-sync-labels.yml | 2 +- .../.github/workflows/agents-auto-pilot.yml | 97 ++++++----- 3 files changed, 198 insertions(+), 61 deletions(-) diff --git a/.github/workflows/agents-auto-pilot.yml b/.github/workflows/agents-auto-pilot.yml index f39f35518..86218a2e6 100644 --- a/.github/workflows/agents-auto-pilot.yml +++ b/.github/workflows/agents-auto-pilot.yml @@ -1916,23 +1916,90 @@ jobs: return; } - // Force-dispatch Codex belt dispatcher to create the branch - try { - await withRetry((client) => client.rest.actions.createWorkflowDispatch({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'agents-71-codex-belt-dispatcher.yml', - ref: baseBranch, - inputs: { - agent_key: agentKey, - force_issue: issueNumber.toString(), - dry_run: 'false' + // Force-dispatch Codex belt dispatcher to create the branch. + // Retry up to 3 times because GitHub Actions can silently cancel + // queued runs before they receive a runner (observed on issue #34). + const maxDispatchAttempts = 3; + let dispatchSucceeded = false; + for (let attempt = 1; attempt <= maxDispatchAttempts; attempt++) { + const dispatchedAt = new Date(); + try { + await withRetry((client) => client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: baseBranch, + inputs: { + agent_key: agentKey, + force_issue: issueNumber.toString(), + dry_run: 'false' + } + })); + core.info( + `Dispatched belt dispatcher (agent: ${agentKey}) ` + + `for issue #${issueNumber} (attempt ${attempt}/${maxDispatchAttempts})` + ); + } catch (dispatchError) { + core.warning( + `Belt dispatch attempt ${attempt} failed: ${dispatchError?.message}` + ); + if (attempt < maxDispatchAttempts) { + await new Promise(r => setTimeout(r, attempt * 5000)); } - })); - const prefix = `Dispatched belt dispatcher (agent: ${agentKey}) for issue`; - core.info(`${prefix} #${issueNumber}`); - } catch (dispatchError) { - core.warning(`Could not dispatch belt dispatcher: ${dispatchError?.message}`); + continue; + } + + // Wait briefly then verify the dispatched run is queued/in_progress + // (not cancelled before receiving a runner). Only consider runs + // created after the dispatch timestamp to avoid matching stale runs + // for other issues. + await new Promise(r => setTimeout(r, 15000)); + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 5, + }) + ); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= dispatchedAt + ); + const alive = recentRuns.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (alive) { + core.info(`Belt dispatcher run ${alive.id} is ${alive.status}`); + dispatchSucceeded = true; + break; + } + const succeeded = recentRuns.find( + r => r.conclusion === 'success' + ); + if (succeeded) { + core.info(`Belt dispatcher run ${succeeded.id} already succeeded`); + dispatchSucceeded = true; + break; + } + const latest = recentRuns[0] || runs.workflow_runs[0]; + core.warning( + `Belt dispatcher run not alive after attempt ${attempt}` + + ` (latest: ${latest?.id} ${latest?.conclusion}); ` + + (attempt < maxDispatchAttempts ? 'retrying…' : 'no more attempts.') + ); + } catch (checkError) { + core.warning( + `Could not verify dispatcher run after attempt ${attempt}: ` + + `${checkError?.message}; status unknown, will retry.` + ); + } + } + if (!dispatchSucceeded) { + core.warning( + `Belt dispatcher could not be confirmed after ${maxDispatchAttempts} attempts ` + + `for issue #${issueNumber}. The branch-check loop will re-dispatch if needed.` + ); } - name: Metrics - End capability check timer @@ -2380,6 +2447,65 @@ jobs: const actualBackoffMs = Math.min(backoffMs, maxBackoffMs); const actualMinutes = Math.round(actualBackoffMs / 60000); + // Re-dispatch the belt if no recent dispatcher run is active + // for this issue. Only consider runs created in the last 30 + // minutes to avoid matching stale runs for other issues. + let redispatched = false; + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 10, + }) + ); + const cutoff = new Date(Date.now() - 30 * 60 * 1000); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= cutoff + ); + const alive = recentRuns.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (!alive) { + core.info( + `No active belt dispatcher run in last 30m ` + + `(${recentRuns.length} recent runs checked); re-dispatching` + ); + const { data: repoInfo } = await withRetry((client) => + client.rest.repos.get({ + owner: context.repo.owner, + repo: context.repo.repo, + }) + ); + const dispatchRef = repoInfo.default_branch || 'main'; + await withRetry((client) => + client.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + ref: dispatchRef, + inputs: { + agent_key: agentKey, + force_issue: String(issueNumber), + dry_run: 'false', + }, + }) + ); + redispatched = true; + core.info(`Re-dispatched belt for issue #${issueNumber}`); + } else { + core.info( + `Belt dispatcher run ${alive.id} still ${alive.status}; skipping re-dispatch` + ); + } + } catch (redispatchErr) { + core.warning(`Belt re-dispatch check failed: ${redispatchErr?.message}`); + } + + const redispatchNote = redispatched + ? ' Re-dispatched belt dispatcher.' + : ''; core.info(`Applying branch-creation backoff: waiting ${actualMinutes} minutes`); await withRetry((client) => client.rest.issues.createComment({ owner: context.repo.owner, @@ -2387,7 +2513,7 @@ jobs: issue_number: issueNumber, body: `🤖 **Auto-pilot**: Backoff delay (${actualMinutes}m) - Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}. + Branch not created yet. Attempt ${stallCount + 1}/${maxStallRetries}.${redispatchNote} Waiting before retry...` })); diff --git a/.github/workflows/maint-69-sync-labels.yml b/.github/workflows/maint-69-sync-labels.yml index 8f73338eb..21e732266 100644 --- a/.github/workflows/maint-69-sync-labels.yml +++ b/.github/workflows/maint-69-sync-labels.yml @@ -40,7 +40,7 @@ jobs: github_token: ${{ github.token }} - name: Install js-yaml - run: npm install js-yaml + run: npm install --no-save --no-package-lock js-yaml - name: Parse labels-core.yml id: parse diff --git a/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml b/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml index 070c3e02c..7c82d20cb 100644 --- a/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml +++ b/templates/consumer-repo/.github/workflows/agents-auto-pilot.yml @@ -1892,6 +1892,7 @@ jobs: const maxDispatchAttempts = 3; let dispatchSucceeded = false; for (let attempt = 1; attempt <= maxDispatchAttempts; attempt++) { + const dispatchedAt = new Date(); try { await withRetry((client) => client.rest.actions.createWorkflowDispatch({ owner: context.repo.owner, @@ -1919,46 +1920,49 @@ jobs: } // Wait briefly then verify the dispatched run is queued/in_progress - // (not cancelled before receiving a runner). - if (attempt < maxDispatchAttempts) { - await new Promise(r => setTimeout(r, 15000)); - try { - const { data: runs } = await withRetry((client) => - client.rest.actions.listWorkflowRuns({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'agents-71-codex-belt-dispatcher.yml', - per_page: 5, - }) - ); - const alive = runs.workflow_runs.find( - r => r.status === 'queued' || r.status === 'in_progress' - ); - if (alive) { - core.info(`Belt dispatcher run ${alive.id} is ${alive.status}`); - dispatchSucceeded = true; - break; - } - // All recent runs completed — check if the latest one succeeded - const latest = runs.workflow_runs[0]; - if (latest && latest.conclusion === 'success') { - core.info(`Belt dispatcher run ${latest.id} already succeeded`); - dispatchSucceeded = true; - break; - } - core.warning( - `Belt dispatcher run not alive after attempt ${attempt}` + - ` (latest: ${latest?.id} ${latest?.conclusion}); retrying…` - ); - } catch (checkError) { - core.warning(`Could not verify dispatcher run: ${checkError?.message}`); - // Optimistically assume it's running + // (not cancelled before receiving a runner). Only consider runs + // created after the dispatch timestamp to avoid matching stale runs + // for other issues. + await new Promise(r => setTimeout(r, 15000)); + try { + const { data: runs } = await withRetry((client) => + client.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'agents-71-codex-belt-dispatcher.yml', + per_page: 5, + }) + ); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= dispatchedAt + ); + const alive = recentRuns.find( + r => r.status === 'queued' || r.status === 'in_progress' + ); + if (alive) { + core.info(`Belt dispatcher run ${alive.id} is ${alive.status}`); dispatchSucceeded = true; break; } - } else { - // Last attempt — no verification, assume success - dispatchSucceeded = true; + const succeeded = recentRuns.find( + r => r.conclusion === 'success' + ); + if (succeeded) { + core.info(`Belt dispatcher run ${succeeded.id} already succeeded`); + dispatchSucceeded = true; + break; + } + const latest = recentRuns[0] || runs.workflow_runs[0]; + core.warning( + `Belt dispatcher run not alive after attempt ${attempt}` + + ` (latest: ${latest?.id} ${latest?.conclusion}); ` + + (attempt < maxDispatchAttempts ? 'retrying…' : 'no more attempts.') + ); + } catch (checkError) { + core.warning( + `Could not verify dispatcher run after attempt ${attempt}: ` + + `${checkError?.message}; status unknown, will retry.` + ); } } if (!dispatchSucceeded) { @@ -2367,9 +2371,9 @@ jobs: const actualBackoffMs = Math.min(backoffMs, maxBackoffMs); const actualMinutes = Math.round(actualBackoffMs / 60000); - // Re-dispatch the belt if no dispatcher run is active. - // This recovers from cases where the original dispatch was - // silently cancelled before receiving a runner (see issue #34). + // Re-dispatch the belt if no recent dispatcher run is active + // for this issue. Only consider runs created in the last 30 + // minutes to avoid matching stale runs for other issues. let redispatched = false; try { const { data: runs } = await withRetry((client) => @@ -2377,14 +2381,21 @@ jobs: owner: context.repo.owner, repo: context.repo.repo, workflow_id: 'agents-71-codex-belt-dispatcher.yml', - per_page: 5, + per_page: 10, }) ); - const alive = runs.workflow_runs.find( + const cutoff = new Date(Date.now() - 30 * 60 * 1000); + const recentRuns = runs.workflow_runs.filter( + r => new Date(r.created_at) >= cutoff + ); + const alive = recentRuns.find( r => r.status === 'queued' || r.status === 'in_progress' ); if (!alive) { - core.info('No active belt dispatcher run found; re-dispatching'); + core.info( + `No active belt dispatcher run in last 30m ` + + `(${recentRuns.length} recent runs checked); re-dispatching` + ); const { data: repoInfo } = await withRetry((client) => client.rest.repos.get({ owner: context.repo.owner,