From 02064f00341616080881da9a3cb81b279bfdee37 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 21:05:25 +0000 Subject: [PATCH 01/12] fix: avoid multi-line stderr in workflow annotations GitHub Actions ::warning:: commands truncate/mangle multi-line content. Emit a short annotation message and print full npm stderr in a collapsible ::group:: instead, so logs stay readable. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/actions/setup-api-client/action.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml index 3e43bc863..5fead358b 100644 --- a/.github/actions/setup-api-client/action.yml +++ b/.github/actions/setup-api-client/action.yml @@ -266,7 +266,10 @@ runs: npm_err=$(cat "$npm_output") rm -f "$npm_output" - echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed: $npm_err" + echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed (see logs)" + echo "::group::npm stderr (attempt $attempt)" + echo "$npm_err" + echo "::endgroup::" # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict if [ "$attempt" -eq 1 ]; then @@ -279,7 +282,10 @@ runs: fi npm_err_legacy=$(cat "$npm_output") rm -f "$npm_output" - echo "::warning::npm install with --legacy-peer-deps failed: $npm_err_legacy" + echo "::warning::npm install with --legacy-peer-deps failed (see logs)" + echo "::group::npm stderr (--legacy-peer-deps)" + echo "$npm_err_legacy" + echo "::endgroup::" fi if [ "$attempt" -lt "$NPM_MAX_RETRIES" ]; then From 4d84c1f97f3b7c231f11bb9b296c1d6a3427e7fc Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 21:22:43 +0000 Subject: [PATCH 02/12] fix: sync template setup-api-client with retry-with-backoff and annotation fixes Mirror the main setup-api-client changes into the consumer-repo template to prevent template drift: - Exponential backoff retry (3 attempts, 5s/10s) for transient npm errors - --legacy-peer-deps fallback on first failure - Short ::warning:: annotations with full stderr in collapsible ::group:: - Pin lru-cache@10.4.3 (was ^10.0.0) https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .../actions/setup-api-client/action.yml | 80 ++++++++++++++----- 1 file changed, 58 insertions(+), 22 deletions(-) diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml index b912fe4ad..24736497c 100644 --- a/templates/consumer-repo/.github/actions/setup-api-client/action.yml +++ b/templates/consumer-repo/.github/actions/setup-api-client/action.yml @@ -239,29 +239,65 @@ runs: # Install with pinned versions for consistency. # lru-cache is an explicit transitive dep of @octokit/auth-app required for - # GitHub App token minting; pin it here so npm always hoists it to the top - # level even if a prior cached node_modules state is missing it. - # Capture stderr for debugging if the command fails - npm_output=$(mktemp) - npm_cmd=(npm install --no-save --location=project \ - @octokit/rest@20.0.2 \ - @octokit/plugin-retry@6.0.1 \ - @octokit/plugin-paginate-rest@9.1.5 \ - @octokit/auth-app@6.0.3 \ - lru-cache@^10.0.0) - if "${npm_cmd[@]}" 2>"$npm_output"; then - rm -f "$npm_output" - else - echo "::warning::npm install failed with: $(cat "$npm_output")" - echo "::warning::Retrying with --legacy-peer-deps" + # GitHub App token minting; pin it here so npm always hoists a specific version + # even if a prior cached node_modules state is missing it. + # + # Retry with exponential backoff to survive transient npm registry errors + # (e.g. 403 Forbidden from CDN/rate-limit on safe-buffer, undici, etc.). + NPM_PACKAGES=( + @octokit/rest@20.0.2 + @octokit/plugin-retry@6.0.1 + @octokit/plugin-paginate-rest@9.1.5 + @octokit/auth-app@6.0.3 + lru-cache@10.4.3 + ) + NPM_MAX_RETRIES=3 + NPM_BACKOFF=5 # seconds; doubles each retry (5, 10) + npm_installed=false + + for (( attempt=1; attempt<=NPM_MAX_RETRIES; attempt++ )); do + npm_output=$(mktemp) + + if npm install --no-save --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then + rm -f "$npm_output" + npm_installed=true + break + fi + + npm_err=$(cat "$npm_output") rm -f "$npm_output" - npm_cmd=(npm install --no-save --legacy-peer-deps --location=project \ - @octokit/rest@20.0.2 \ - @octokit/plugin-retry@6.0.1 \ - @octokit/plugin-paginate-rest@9.1.5 \ - @octokit/auth-app@6.0.3 \ - lru-cache@^10.0.0) - "${npm_cmd[@]}" + echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed (see logs)" + echo "::group::npm stderr (attempt $attempt)" + echo "$npm_err" + echo "::endgroup::" + + # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict + if [ "$attempt" -eq 1 ]; then + echo "::warning::Retrying with --legacy-peer-deps" + npm_output=$(mktemp) + if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then + rm -f "$npm_output" + npm_installed=true + break + fi + npm_err_legacy=$(cat "$npm_output") + rm -f "$npm_output" + echo "::warning::npm install with --legacy-peer-deps failed (see logs)" + echo "::group::npm stderr (--legacy-peer-deps)" + echo "$npm_err_legacy" + echo "::endgroup::" + fi + + if [ "$attempt" -lt "$NPM_MAX_RETRIES" ]; then + echo "::notice::Waiting ${NPM_BACKOFF}s before retry..." + sleep "$NPM_BACKOFF" + NPM_BACKOFF=$((NPM_BACKOFF * 2)) + fi + done + + if [ "$npm_installed" != "true" ]; then + echo "::error::npm install failed after $NPM_MAX_RETRIES attempts" + exit 1 fi # Restore vendored package metadata that npm may have overwritten From c36adc17812f9a51f94ae474bde59b3458e212d8 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 21:50:29 +0000 Subject: [PATCH 03/12] fix: pre-timeout watchdog, robust parser import, and always-commit safeguard Three changes to reusable-codex-run.yml to prevent work loss on timeout: 1. Pre-timeout watchdog: A background timer fires 5 minutes before max_runtime_minutes, committing and pushing any uncommitted work so it survives the job cancellation. Killed automatically if Codex finishes before the timer fires. 2. Robust parser import: Replace sys.path-based import of codex_jsonl_parser with importlib.util.spec_from_file_location. Consumer repos (e.g. Counter_Risk) have their own tools/ package with __init__.py that shadows the Workflows tools/ on sys.path, causing "No module named 'tools.codex_jsonl_parser'". 3. Commit step always runs: Add if: always() to the "Commit and push changes" step so uncommitted work is captured even on non-zero exit codes (the watchdog handles timeout, this handles failures). https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-codex-run.yml | 69 ++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml index 11b6cad0c..63325e3e6 100644 --- a/.github/workflows/reusable-codex-run.yml +++ b/.github/workflows/reusable-codex-run.yml @@ -188,6 +188,7 @@ jobs: error-type: ${{ steps.classify_failure.outputs.error_type }} error-recovery: ${{ steps.classify_failure.outputs.error_recovery }} error-summary: ${{ steps.classify_failure.outputs.error_summary }} + watchdog-saved: ${{ steps.run_codex.outputs.watchdog-saved }} # LLM analysis outputs llm-analysis-run: ${{ steps.llm_analysis.outputs.llm-analysis-run }} llm-completed-tasks: ${{ steps.llm_analysis.outputs.completed-tasks }} @@ -938,6 +939,47 @@ jobs: echo "Extra args: provided (${#EXTRA_ARGS[@]} arg(s))" fi + # --- Pre-timeout watchdog --- + # When the job approaches the timeout limit, this background process + # commits and pushes any uncommitted work so it isn't lost to the + # job cancellation. It fires once, 5 minutes before max_runtime. + MAX_RUNTIME_MIN=${{ inputs.max_runtime_minutes }} + GRACE_MIN=5 + WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 )) + if [ "$WATCHDOG_DELAY" -gt 60 ]; then + ( + sleep "$WATCHDOG_DELAY" + echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)" + + CHANGED=$(git status --porcelain | wc -l) + UNPUSHED=0 + if git rev-parse FETCH_HEAD >/dev/null 2>&1; then + UNPUSHED=$(git rev-list FETCH_HEAD..HEAD --count 2>/dev/null || echo 0) + fi + + if [ "$CHANGED" -gt 0 ] || [ "$UNPUSHED" -gt 0 ]; then + echo "::notice::Watchdog saving ${CHANGED} uncommitted file(s) and ${UNPUSHED} unpushed commit(s)" + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + if [ "$CHANGED" -gt 0 ]; then + git add -A + git commit -m "chore(codex-keepalive): pre-timeout checkpoint (PR #${PR_NUM:-})" --no-verify || true + fi + TARGET_BRANCH="${{ inputs.pr_ref }}" + TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}" + PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}" + REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}" + git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" || \ + echo "::warning::Watchdog push failed" + echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" + else + echo "::notice::Watchdog: no uncommitted or unpushed work to save" + fi + ) & + WATCHDOG_PID=$! + echo "::notice::Pre-timeout watchdog started (PID ${WATCHDOG_PID}, fires in $((WATCHDOG_DELAY/60))m)" + fi + # Run codex exec with --json to capture rich session data # JSONL events stream to stdout, final message still goes to OUTPUT_FILE CODEX_EXIT=0 @@ -954,6 +996,12 @@ jobs: prompt_content="$(cat "$PROMPT_FILE")" "${cmd[@]}" "$prompt_content" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$? + # Kill watchdog if Codex finished before the timer fired + if [ -n "${WATCHDOG_PID:-}" ]; then + kill "$WATCHDOG_PID" 2>/dev/null || true + wait "$WATCHDOG_PID" 2>/dev/null || true + fi + echo "exit-code=${CODEX_EXIT}" >> "$GITHUB_OUTPUT" if [ "$CODEX_EXIT" -ne 0 ]; then @@ -1008,17 +1056,29 @@ jobs: # Basic parsing (always available) python3 << 'PYEOF' + import importlib.util import os import sys - # Add .workflows-lib to path for tools imports - sys.path.insert(0, '.workflows-lib') - sys.path.insert(0, '.') session_file = os.environ.get("SESSION_JSONL", "codex-session.jsonl") github_output = os.environ.get("GITHUB_OUTPUT", "/dev/null") try: - from tools.codex_jsonl_parser import parse_codex_jsonl_file + # Load codex_jsonl_parser from the Workflows checkout by exact path. + # Consumer repos (e.g. Counter_Risk) have their own tools/ package with + # __init__.py, which shadows the Workflows tools/ on sys.path. Using + # importlib.util.spec_from_file_location bypasses sys.path entirely. + _parser_path = os.path.join( + ".workflows-lib", "tools", "codex_jsonl_parser.py" + ) + _spec = importlib.util.spec_from_file_location( + "codex_jsonl_parser", _parser_path + ) + if _spec is None or _spec.loader is None: + raise ImportError(f"Cannot load spec from {_parser_path}") + _mod = importlib.util.module_from_spec(_spec) + _spec.loader.exec_module(_mod) + parse_codex_jsonl_file = _mod.parse_codex_jsonl_file session = parse_codex_jsonl_file(session_file) @@ -1150,6 +1210,7 @@ jobs: - name: Commit and push changes id: commit + if: always() env: MODE: ${{ inputs.mode }} PR_NUMBER: ${{ inputs.pr_number }} From 7924d1ea650b6919aa2e222c164f98d12ea495a6 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 01:35:02 +0000 Subject: [PATCH 04/12] fix: preserve indented checkbox states in PR Meta body sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parseCheckboxStates() and mergeCheckboxStates() only matched top-level checkboxes (^- \[), ignoring indented sub-tasks ( - \[). When PR Meta regenerated the PR body from the issue, auto-reconciled sub-task checkboxes were silently reverted to unchecked. This caused the keepalive loop to stall with rounds_without_task_completion: 8 despite the agent completing real work — PR #256 had 5 tasks auto-checked then immediately un-checked on every push. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/scripts/agents_pr_meta_update_body.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/scripts/agents_pr_meta_update_body.js b/.github/scripts/agents_pr_meta_update_body.js index ec9a585aa..3e987c10c 100644 --- a/.github/scripts/agents_pr_meta_update_body.js +++ b/.github/scripts/agents_pr_meta_update_body.js @@ -404,7 +404,7 @@ function parseCheckboxStates(block) { if (inCodeBlock) { continue; } - const match = line.match(/^- \[(x| )\]\s*(.+)$/i); + const match = line.match(/^\s*- \[(x| )\]\s*(.+)$/i); if (match) { const checked = match[1].toLowerCase() === 'x'; const text = match[2].trim(); @@ -461,12 +461,13 @@ function mergeCheckboxStates(newContent, existingStates) { updated.push(line); continue; } - const match = line.match(/^- \[( )\]\s*(.+)$/); + const match = line.match(/^(\s*)- \[( )\]\s*(.+)$/); if (match) { - const text = match[2].trim(); + const indent = match[1]; + const text = match[3].trim(); const normalized = text.replace(/^-\s*/, '').trim().toLowerCase(); if (existingStates.has(normalized)) { - updated.push(`- [x] ${text}`); + updated.push(`${indent}- [x] ${text}`); continue; } } From 6145c5e40686053b5a1d0e27322fa548d2be1d6a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 01:35:36 +0000 Subject: [PATCH 05/12] chore: sync template scripts --- .../.github/scripts/agents_pr_meta_update_body.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js index ec9a585aa..3e987c10c 100644 --- a/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js +++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js @@ -404,7 +404,7 @@ function parseCheckboxStates(block) { if (inCodeBlock) { continue; } - const match = line.match(/^- \[(x| )\]\s*(.+)$/i); + const match = line.match(/^\s*- \[(x| )\]\s*(.+)$/i); if (match) { const checked = match[1].toLowerCase() === 'x'; const text = match[2].trim(); @@ -461,12 +461,13 @@ function mergeCheckboxStates(newContent, existingStates) { updated.push(line); continue; } - const match = line.match(/^- \[( )\]\s*(.+)$/); + const match = line.match(/^(\s*)- \[( )\]\s*(.+)$/); if (match) { - const text = match[2].trim(); + const indent = match[1]; + const text = match[3].trim(); const normalized = text.replace(/^-\s*/, '').trim().toLowerCase(); if (existingStates.has(normalized)) { - updated.push(`- [x] ${text}`); + updated.push(`${indent}- [x] ${text}`); continue; } } From 68111870655048c0e0d1619356b90aceeafbbabf Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 01:41:53 +0000 Subject: [PATCH 06/12] fix: address review comments on watchdog pre-timeout mechanism - P1: Add fetch/rebase before watchdog push to avoid non-fast-forward rejection when another workflow updates the branch during the run. Includes one retry with re-fetch/rebase and merge fallback. - P2: Export watchdog-saved in on.workflow_call.outputs so callers of the reusable workflow can observe the signal. - Copilot: Add git fetch before checking FETCH_HEAD to ensure it exists and is current (actions/checkout doesn't set FETCH_HEAD). - Copilot: Initialize watchdog-saved=false before background subshell so downstream consumers always get a defined value. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-codex-run.yml | 42 ++++++++++++++++++++---- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml index 63325e3e6..f5dfa67be 100644 --- a/.github/workflows/reusable-codex-run.yml +++ b/.github/workflows/reusable-codex-run.yml @@ -124,6 +124,9 @@ on: error-recovery: description: 'Suggested recovery action if failure occurred' value: ${{ jobs.codex.outputs.error-recovery }} + watchdog-saved: + description: 'Whether the pre-timeout watchdog saved uncommitted work (true/false)' + value: ${{ jobs.codex.outputs.watchdog-saved }} # LLM task analysis outputs llm-analysis-run: description: 'Whether LLM analysis was performed' @@ -946,11 +949,20 @@ jobs: MAX_RUNTIME_MIN=${{ inputs.max_runtime_minutes }} GRACE_MIN=5 WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 )) + echo "watchdog-saved=false" >> "$GITHUB_OUTPUT" if [ "$WATCHDOG_DELAY" -gt 60 ]; then ( sleep "$WATCHDOG_DELAY" echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)" + TARGET_BRANCH="${{ inputs.pr_ref }}" + TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}" + PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}" + REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}" + + # Fetch to get current FETCH_HEAD before checking for unpushed work + git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true + CHANGED=$(git status --porcelain | wc -l) UNPUSHED=0 if git rev-parse FETCH_HEAD >/dev/null 2>&1; then @@ -965,12 +977,30 @@ jobs: git add -A git commit -m "chore(codex-keepalive): pre-timeout checkpoint (PR #${PR_NUM:-})" --no-verify || true fi - TARGET_BRANCH="${{ inputs.pr_ref }}" - TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}" - PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}" - REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}" - git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" || \ - echo "::warning::Watchdog push failed" + # Rebase onto remote before pushing to avoid non-fast-forward rejection + if git rev-parse FETCH_HEAD >/dev/null 2>&1; then + if ! git rebase FETCH_HEAD 2>/dev/null; then + echo "::warning::Watchdog rebase failed; attempting merge fallback." + git rebase --abort 2>/dev/null || true + git pull --no-rebase "${REMOTE_URL}" "${TARGET_BRANCH}" \ + --allow-unrelated-histories 2>/dev/null || true + fi + fi + # Push with one retry + if ! git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + echo "::warning::Watchdog push failed (attempt 1), retrying after fetch/rebase..." + sleep 3 + git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true + if git rev-parse FETCH_HEAD >/dev/null 2>&1; then + git rebase FETCH_HEAD 2>/dev/null || { + git rebase --abort 2>/dev/null || true + git pull --no-rebase "${REMOTE_URL}" "${TARGET_BRANCH}" \ + --allow-unrelated-histories 2>/dev/null || true + } + fi + git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null || \ + echo "::warning::Watchdog push failed after retry" + fi echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" else echo "::notice::Watchdog: no uncommitted or unpushed work to save" From 0b9a46cf508ad24c6b855cf7da13b5ddc41413fb Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 01:45:32 +0000 Subject: [PATCH 07/12] docs: add watchdog-saved to workflow outputs reference Update WORKFLOW_OUTPUTS.md to include the new watchdog-saved output from reusable-codex-run.yml, fixing the test_reusable_workflow_outputs_documented test. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- docs/ci/WORKFLOW_OUTPUTS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ci/WORKFLOW_OUTPUTS.md b/docs/ci/WORKFLOW_OUTPUTS.md index 543d15099..7e971cf5b 100644 --- a/docs/ci/WORKFLOW_OUTPUTS.md +++ b/docs/ci/WORKFLOW_OUTPUTS.md @@ -78,6 +78,7 @@ that only emit artifacts, see the "Workflows without workflow_call outputs" sect | `reusable-codex-run.yml` | `error-category` | string | Error category if failure occurred (transient/auth/resource/logic/unknown) | `needs.codex.outputs.error-category` | | `reusable-codex-run.yml` | `error-type` | string | Error type if failure occurred (codex/infrastructure/auth/unknown) | `needs.codex.outputs.error-type` | | `reusable-codex-run.yml` | `error-recovery` | string | Suggested recovery action if failure occurred | `needs.codex.outputs.error-recovery` | +| `reusable-codex-run.yml` | `watchdog-saved` | string (boolean-like) | Whether the pre-timeout watchdog saved uncommitted work (true/false) | `needs.codex.outputs.watchdog-saved` | | `reusable-codex-run.yml` | `llm-analysis-run` | string (boolean-like) | Whether LLM analysis was performed | `needs.codex.outputs.llm-analysis-run` | | `reusable-codex-run.yml` | `llm-provider` | string | LLM provider used for analysis (github-models, openai, regex-fallback) | `needs.codex.outputs.llm-provider` | | `reusable-codex-run.yml` | `llm-model` | string | Specific model used for analysis (e.g., gpt-4o, claude-3-5-sonnet) | `needs.codex.outputs.llm-model` | From 97056c028445b147ab35ce5d091fbd9dbcaec2b2 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 02:03:54 +0000 Subject: [PATCH 08/12] fix: skip non-issue refs like "Run #NNN" in extractIssueNumberFromPull The body scan in extractIssueNumberFromPull was treating patterns like "Run #2615" as issue references, causing the Upsert PR body sections check to fail with a 404 when trying to fetch non-existent issues. Add a preceding-word filter to skip #NNN when preceded by common non-issue words (run, attempt, step, job, check, task, version, v). Add 12 unit tests covering the extraction logic. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .../agents-pr-meta-keepalive.test.js | 63 ++++++++++++++++++- .github/scripts/agents_pr_meta_keepalive.js | 5 ++ .../scripts/agents_pr_meta_keepalive.js | 5 ++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js index 673a5d13c..64cec070d 100644 --- a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js +++ b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js @@ -3,7 +3,7 @@ const test = require('node:test'); const assert = require('node:assert/strict'); -const { detectKeepalive } = require('../agents_pr_meta_keepalive.js'); +const { detectKeepalive, extractIssueNumberFromPull } = require('../agents_pr_meta_keepalive.js'); function createCore(outputs) { return { @@ -543,3 +543,64 @@ test('detectKeepalive does not cache empty pull responses', async () => { assert.equal(outputsFirst.reason, 'pull-fetch-failed'); assert.equal(outputsSecond.reason, 'pull-fetch-failed'); }); + +// --- extractIssueNumberFromPull tests --- + +test('extractIssueNumberFromPull returns null for null input', () => { + assert.equal(extractIssueNumberFromPull(null), null); +}); + +test('extractIssueNumberFromPull extracts from meta comment', () => { + const pull = { body: 'Some text more text', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 42); +}); + +test('extractIssueNumberFromPull extracts from branch name', () => { + const pull = { body: '', head: { ref: 'codex/issue-99' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 99); +}); + +test('extractIssueNumberFromPull extracts from title', () => { + const pull = { body: '', head: { ref: 'feature' }, title: 'fix: resolve #55' }; + assert.equal(extractIssueNumberFromPull(pull), 55); +}); + +test('extractIssueNumberFromPull extracts from body hash ref', () => { + const pull = { body: 'Fixes #123', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 123); +}); + +test('extractIssueNumberFromPull skips "Run #NNN" in body', () => { + const pull = { body: 'Run #2615 timed out after 45 minutes', head: { ref: 'claude/fix-something' }, title: 'fix: pre-timeout watchdog' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "run #NNN" case-insensitive', () => { + const pull = { body: 'The run #500 failed', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "attempt #N" in body', () => { + const pull = { body: 'attempt #3 was successful', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "step #N" in body', () => { + const pull = { body: 'step #2 completed', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull skips "version #N" in body', () => { + const pull = { body: 'Upgraded to version #4', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), null); +}); + +test('extractIssueNumberFromPull prefers meta comment over "Run #NNN"', () => { + const pull = { body: ' Run #2615 timed out', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 77); +}); + +test('extractIssueNumberFromPull finds real issue after skipping Run ref', () => { + const pull = { body: 'Run #2615 timed out. Relates to #88', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 88); +}); diff --git a/.github/scripts/agents_pr_meta_keepalive.js b/.github/scripts/agents_pr_meta_keepalive.js index 7a2197536..10eab2c81 100644 --- a/.github/scripts/agents_pr_meta_keepalive.js +++ b/.github/scripts/agents_pr_meta_keepalive.js @@ -240,6 +240,11 @@ function extractIssueNumberFromPull(pull) { if (match.index > 0 && /\w/.test(bodyText[match.index - 1])) { continue; } + // Skip non-issue refs like "Run #123", "run #123", "attempt #2" + const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); + if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + continue; + } candidates.push(match[1]); } diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js index 7a2197536..10eab2c81 100644 --- a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js +++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js @@ -240,6 +240,11 @@ function extractIssueNumberFromPull(pull) { if (match.index > 0 && /\w/.test(bodyText[match.index - 1])) { continue; } + // Skip non-issue refs like "Run #123", "run #123", "attempt #2" + const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); + if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + continue; + } candidates.push(match[1]); } From 96e5f6ec2ec143c047674e2caaf4c35ec763d40e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 02:29:53 +0000 Subject: [PATCH 09/12] feat: add session analysis, completion comment, and error diagnostics to Claude runner Closes the three remaining feature gaps between the Claude and Codex runners identified in issue #1646: 1. **Session analysis (LLM-powered)**: Reuses analyze_codex_session.py which auto-detects Claude's plain-text session log (data_source=summary) and feeds it through the same LLM analysis pipeline for structured task completion assessment. Outputs feed into the keepalive loop. 2. **Completion checkpoint comment**: Posts a PR comment summarizing completed tasks and acceptance criteria using the shared post_completion_comment.js script. Supports both claude-prompt*.md and codex-prompt*.md file names. 3. **Error diagnostics**: Adds GITHUB_STEP_SUMMARY with error table, creates a diagnostics artifact (JSON + agent output), and posts a structured PR comment on non-transient failures with recovery guidance and log links. Uses a distinct marker. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-claude-run.yml | 409 +++++++++++++++++++++- 1 file changed, 400 insertions(+), 9 deletions(-) diff --git a/.github/workflows/reusable-claude-run.yml b/.github/workflows/reusable-claude-run.yml index 490eac122..9827d6819 100644 --- a/.github/workflows/reusable-claude-run.yml +++ b/.github/workflows/reusable-claude-run.yml @@ -1207,18 +1207,209 @@ jobs: claude-session*.jsonl if-no-files-found: ignore - - name: Compatibility outputs (LLM analysis placeholders) + - name: Analyze Claude session + id: analyze_session + if: always() + env: + PYTHONPATH: ${{ github.workspace }}/.workflows-lib:${{ github.workspace }} + PR_NUM: ${{ inputs.pr_number }} + run: | + set -euo pipefail + + if [ -n "${PR_NUM}" ]; then + SESSION_LOG="claude-session-${PR_NUM}.log" + else + SESSION_LOG="claude-session.log" + fi + + # Check if session file exists and has content + if [ ! -f "$SESSION_LOG" ] || [ ! -s "$SESSION_LOG" ]; then + echo "No Claude session log found or file is empty" + echo "session-available=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "Session log captured: $(wc -l < "$SESSION_LOG") lines, $(wc -c < "$SESSION_LOG") bytes" + echo "session-available=true" >> "$GITHUB_OUTPUT" + echo "session-file=$SESSION_LOG" >> "$GITHUB_OUTPUT" + + - name: Analyze task completion with LLM + id: llm_analysis + if: >- + always() && + steps.analyze_session.outputs.session-available == 'true' && + inputs.pr_number != '' + env: + PYTHONPATH: ${{ github.workspace }}/.workflows-lib:${{ github.workspace }} + PR_NUM: ${{ inputs.pr_number }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CLAUDE_API_STRANSKE: ${{ secrets.CLAUDE_API_STRANSKE }} + run: | + set -euo pipefail + + SESSION_LOG="${{ steps.analyze_session.outputs.session-file }}" + ANALYSIS_FILE="claude-analysis-${PR_NUM}.json" + + # Fetch PR body to extract tasks + echo "Fetching PR #${PR_NUM} body..." + set +e + PR_BODY=$(gh pr view "${PR_NUM}" --json body --jq '.body' 2>&1) + fetch_exit=$? + set -e + + if [ $fetch_exit -ne 0 ] || [ -z "$PR_BODY" ]; then + echo "::warning::Could not fetch PR body for #${PR_NUM} (exit code: $fetch_exit)" + echo "::warning::LLM task completion analysis will be skipped" + if [ $fetch_exit -ne 0 ]; then + echo "Error output: $PR_BODY" + fi + echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Save PR body to temp file + echo "$PR_BODY" > pr_body.md + + # Run full LLM analysis and save JSON output + # The analyze_codex_session.py script auto-detects text vs JSONL input, + # so it works with Claude's plain-text session logs (data_source=summary). + echo "Running LLM-powered task completion analysis..." + if [ ! -f .workflows-lib/scripts/analyze_codex_session.py ]; then + echo "::error::Analysis script not found." + echo "::error::Missing: .workflows-lib/scripts/analyze_codex_session.py" + echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + python3 .workflows-lib/scripts/analyze_codex_session.py \ + --session-file "$SESSION_LOG" \ + --pr-body-file pr_body.md \ + --output json > "$ANALYSIS_FILE" || { + echo "::warning::LLM analysis failed, continuing without it" + cat "$ANALYSIS_FILE" 2>/dev/null || true + echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT" + rm -f "$ANALYSIS_FILE" + exit 0 + } + + # Also output to GitHub Actions for visibility + python3 .workflows-lib/scripts/analyze_codex_session.py \ + --session-file "$SESSION_LOG" \ + --pr-body-file pr_body.md \ + --output github-actions || true + + echo "llm-analysis-run=true" >> "$GITHUB_OUTPUT" + echo "analysis-file=$ANALYSIS_FILE" >> "$GITHUB_OUTPUT" + + # Extract key fields for downstream use + if [ -f "$ANALYSIS_FILE" ]; then + python3 - "$ANALYSIS_FILE" >> "$GITHUB_OUTPUT" <<'PY' + import json + import sys + + analysis_path = sys.argv[1] + with open(analysis_path, encoding='utf-8') as handle: + data = json.load(handle) + + completed_tasks = json.dumps(data.get('completed_tasks', [])) + quality_warnings = json.dumps(data.get('quality_warnings', [])) + + print(f"completed-tasks={completed_tasks}") + print(f"provider={data.get('provider', 'unknown')}") + print(f"model={data.get('model', 'unknown')}") + print(f"confidence={data.get('confidence', 0)}") + print(f"raw-confidence={data.get('raw_confidence', data.get('confidence', 0))}") + print(f"effort-score={data.get('effort_score', 0)}") + print(f"data-quality={data.get('data_quality', 'unknown')}") + print(f"analysis-text-length={data.get('analysis_text_length', 0)}") + print(f"quality-warnings={quality_warnings}") + PY + fi + + - name: Compatibility outputs (LLM analysis) id: compat if: always() run: | - { - echo "llm-analysis-run=false" - echo "llm-provider=" - echo "llm-model=" - echo "llm-confidence=" - echo "llm-completed-tasks=[]" - echo "llm-has-completions=false" - } >> "$GITHUB_OUTPUT" + # If LLM analysis ran, forward its outputs; otherwise emit placeholders. + if [ "${{ steps.llm_analysis.outputs.llm-analysis-run }}" = "true" ]; then + { + echo "llm-analysis-run=true" + echo "llm-provider=${{ steps.llm_analysis.outputs.provider }}" + echo "llm-model=${{ steps.llm_analysis.outputs.model }}" + echo "llm-confidence=${{ steps.llm_analysis.outputs.confidence }}" + echo "llm-completed-tasks=${{ steps.llm_analysis.outputs.completed-tasks }}" + has_completions="false" + tasks='${{ steps.llm_analysis.outputs.completed-tasks }}' + if [ -n "$tasks" ] && [ "$tasks" != "[]" ]; then + has_completions="true" + fi + echo "llm-has-completions=${has_completions}" + } >> "$GITHUB_OUTPUT" + else + { + echo "llm-analysis-run=false" + echo "llm-provider=" + echo "llm-model=" + echo "llm-confidence=" + echo "llm-completed-tasks=[]" + echo "llm-has-completions=false" + } >> "$GITHUB_OUTPUT" + fi + + - name: Post completion checkpoint comment + id: completion_comment + if: steps.commit.outputs.changes-made == 'true' && inputs.pr_number != '' + uses: actions/github-script@v8 + env: + PR_NUMBER: ${{ inputs.pr_number }} + COMMIT_SHA: ${{ steps.commit.outputs.commit-sha }} + ITERATION: ${{ inputs.iteration || '' }} + with: + script: | + // Try .workflows-lib first (consumer repos), fall back to local copy + const fs = require('fs'); + const modulePath = fs.existsSync('./.workflows-lib/.github/scripts/post_completion_comment.js') + ? './.workflows-lib/.github/scripts/post_completion_comment.js' + : './.github/scripts/post_completion_comment.js'; + const { postCompletionComment } = require(modulePath); + + // Determine prompt file — Claude uses claude-prompt*.md + const prNumber = process.env.PR_NUMBER || ''; + let promptFile = 'claude-prompt.md'; + if (prNumber) { + const prSpecific = `claude-prompt-${prNumber}.md`; + if (fs.existsSync(prSpecific)) { + promptFile = prSpecific; + } + } + // Fall back to codex-prompt*.md if claude variant not found + if (!fs.existsSync(promptFile)) { + const codexPrompt = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md'; + if (fs.existsSync(codexPrompt)) { + promptFile = codexPrompt; + } + } + + const result = await postCompletionComment({ + github, context, core, + inputs: { + pr_number: process.env.PR_NUMBER, + commit_sha: process.env.COMMIT_SHA, + iteration: process.env.ITERATION, + prompt_file: promptFile, + }, + }); + core.setOutput('posted', result.posted ? 'true' : 'false'); + core.setOutput('tasks', String(result.tasks || 0)); + core.setOutput('acceptance', String(result.acceptance || 0)); + if (result.posted) { + core.info( + `Posted completion checkpoint: ${result.tasks} tasks, ` + + `${result.acceptance} acceptance criteria`, + ); + } - name: Classify failure id: classify_failure @@ -1275,7 +1466,207 @@ jobs: errorInfo.category === ERROR_CATEGORIES.transient ? 'true' : 'false'; core.setOutput('is_transient', isTransient); + core.setOutput('error_summary', summary || ''); + console.log(`Error Classification:`); console.log(` Category: ${errorInfo.category}`); console.log(` Type: ${errorType}`); console.log(` Recovery: ${errorInfo.recovery}`); + + - name: Write error summary to GITHUB_STEP_SUMMARY + if: always() && steps.run_claude.outputs.exit-code != '0' + env: + EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }} + OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }} + ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }} + ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }} + ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }} + MODE: ${{ inputs.mode }} + PR_NUMBER: ${{ inputs.pr_number }} + run: | + set -euo pipefail + { + echo "## Claude Run Failed" + echo "" + echo "| Field | Value |" + echo "|-------|-------|" + echo "| Mode | ${MODE:-unknown} |" + echo "| Exit Code | ${EXIT_CODE:-unknown} |" + echo "| Error Category | ${ERROR_CATEGORY:-unknown} |" + echo "| Error Type | ${ERROR_TYPE:-unknown} |" + if [ -n "${PR_NUMBER:-}" ]; then + echo "| PR | #${PR_NUMBER} |" + fi + echo "" + echo "### Recovery Guidance" + echo "" + echo "${ERROR_RECOVERY:-Check logs for more details.}" + echo "" + if [ -n "${OUTPUT_SUMMARY:-}" ]; then + echo "### Output Summary" + echo "" + echo '```' + echo "${OUTPUT_SUMMARY}" + echo '```' + fi + } >> "$GITHUB_STEP_SUMMARY" + + - name: Create error diagnostics artifact + if: always() && steps.run_claude.outputs.exit-code != '0' + env: + EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }} + OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }} + ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }} + ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }} + ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }} + IS_TRANSIENT: ${{ steps.classify_failure.outputs.is_transient }} + MODE: ${{ inputs.mode }} + PR_NUMBER: ${{ inputs.pr_number }} + RUN_URL: >- + ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: | + set -euo pipefail + mkdir -p error-diagnostics + + # Create JSON diagnostics file + cat > error-diagnostics/diagnostics.json << JSONEOF + { + "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", + "run_id": "${{ github.run_id }}", + "run_url": "${RUN_URL}", + "agent": "claude", + "mode": "${MODE:-unknown}", + "pr_number": "${PR_NUMBER:-}", + "exit_code": "${EXIT_CODE:-unknown}", + "error_category": "${ERROR_CATEGORY:-unknown}", + "error_type": "${ERROR_TYPE:-unknown}", + "is_transient": ${IS_TRANSIENT:-false}, + "recovery_guidance": "${ERROR_RECOVERY:-unknown}" + } + JSONEOF + + # Copy claude output if available + for f in claude-output*.md; do + [ -f "$f" ] && cp "$f" error-diagnostics/ && break + done 2>/dev/null || true + + echo "Created error diagnostics in error-diagnostics/" + + - name: Upload error diagnostics + if: always() && steps.run_claude.outputs.exit-code != '0' + uses: actions/upload-artifact@v6 + with: + name: error-diagnostics-${{ inputs.mode }}-${{ github.run_id }} + path: error-diagnostics/ + retention-days: 30 + + - name: Post PR comment on non-transient failure + if: >- + always() && steps.run_claude.outputs.exit-code != '0' && + steps.classify_failure.outputs.is_transient != 'true' && inputs.pr_number != '' + uses: actions/github-script@v8 + env: + PR_NUMBER: ${{ inputs.pr_number }} + EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }} + ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }} + ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }} + ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }} + OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }} + MODE: ${{ inputs.mode }} + RUN_URL: >- + ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + with: + script: | + const fs = require('fs'); + let withRetry; + const retryPath = './.workflows-lib/.github/scripts/github-api-with-retry.js'; + const localRetryPath = './.github/scripts/github-api-with-retry.js'; + if (fs.existsSync(retryPath)) { + ({ withRetry } = require(retryPath)); + } else if (fs.existsSync(localRetryPath)) { + ({ withRetry } = require(localRetryPath)); + } else { + // Inline fallback: single attempt, no retry + withRetry = (fn) => fn(); + } + + const prNumber = parseInt(process.env.PR_NUMBER, 10); + if (!prNumber || prNumber <= 0) { + console.log('No valid PR number, skipping comment'); + return; + } + + const exitCode = process.env.EXIT_CODE || 'unknown'; + const category = process.env.ERROR_CATEGORY || 'unknown'; + const errorType = process.env.ERROR_TYPE || 'unknown'; + const recovery = process.env.ERROR_RECOVERY || 'Check logs for details.'; + const summary = process.env.OUTPUT_SUMMARY || 'No output captured'; + const mode = process.env.MODE || 'unknown'; + const runUrl = process.env.RUN_URL || ''; + + const marker = ''; + + const body = `${marker} + ## Claude ${mode} run failed + + | Field | Value | + |-------|-------| + | Exit Code | \`${exitCode}\` | + | Error Category | \`${category}\` | + | Error Type | \`${errorType}\` | + | Run | [View logs](${runUrl}) | + + ### Suggested Recovery + + ${recovery} + + ### What to do + + 1. Check the [workflow logs](${runUrl}) for detailed error output + 2. If this is a configuration issue, update the relevant settings + 3. If the error persists, consider adding the \`needs-human\` label for manual review + 4. Re-run the workflow once the issue is resolved + +
+ Output summary + + \`\`\` + ${summary.slice(0, 500)} + \`\`\` + +
+ `.trim().split('\n').map(l => l.trim()).join('\n'); + + // Check if we already have a failure comment + const { data: comments } = await withRetry(() => + github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + per_page: 100, + }) + ); + + const existingComment = comments.find(c => c.body && c.body.includes(marker)); + + if (existingComment) { + await withRetry(() => + github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existingComment.id, + body, + }) + ); + console.log(`Updated existing failure comment: ${existingComment.html_url}`); + } else { + const { data: newComment } = await withRetry(() => + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body, + }) + ); + console.log(`Created failure comment: ${newComment.html_url}`); + } From 92809586dc13721dd7fcf4c259ecd2c9720c3c9f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 03:21:00 +0000 Subject: [PATCH 10/12] fix: address code review feedback from Codex and Copilot Claude runner (reusable-claude-run.yml): - Fix shell quoting of completed-tasks JSON by using env vars instead of inline ${{ }} expansion which breaks on apostrophes in task names - Declare OPENAI_API_KEY and CLAUDE_API_STRANSKE in workflow_call.secrets so callers can pass them (matches Codex runner) - Use printf instead of echo when writing PR body to disk to avoid mangling of -n/-e prefixes or backslashes - Add info log when falling back to codex-prompt file Codex runner (reusable-codex-run.yml): - Gate watchdog-saved=true on actual push success instead of emitting it unconditionally after push attempts that may have both failed - Use a fired-flag file so the watchdog kill only terminates the background process if it's still sleeping (hasn't started its commit/push work yet) https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/reusable-claude-run.yml | 42 ++++++++++++++++------- .github/workflows/reusable-codex-run.yml | 26 +++++++++++--- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/.github/workflows/reusable-claude-run.yml b/.github/workflows/reusable-claude-run.yml index 9827d6819..9e1517c26 100644 --- a/.github/workflows/reusable-claude-run.yml +++ b/.github/workflows/reusable-claude-run.yml @@ -105,6 +105,13 @@ on: required: false WORKFLOWS_APP_PRIVATE_KEY: required: false + OPENAI_API_KEY: + required: false + description: >- + OpenAI API key for LLM analysis (enables model selection beyond GitHub Models) + CLAUDE_API_STRANSKE: + required: false + description: 'Anthropic API key for LLM analysis (enables Claude slot)' outputs: final-message: description: 'Full Claude output message (base64 encoded)' @@ -1270,7 +1277,7 @@ jobs: fi # Save PR body to temp file - echo "$PR_BODY" > pr_body.md + printf '%s' "$PR_BODY" > pr_body.md # Run full LLM analysis and save JSON output # The analyze_codex_session.py script auto-detects text vs JSONL input, @@ -1331,18 +1338,23 @@ jobs: - name: Compatibility outputs (LLM analysis) id: compat if: always() + env: + LLM_RAN: ${{ steps.llm_analysis.outputs.llm-analysis-run }} + LLM_PROVIDER: ${{ steps.llm_analysis.outputs.provider }} + LLM_MODEL: ${{ steps.llm_analysis.outputs.model }} + LLM_CONFIDENCE: ${{ steps.llm_analysis.outputs.confidence }} + LLM_COMPLETED_TASKS: ${{ steps.llm_analysis.outputs.completed-tasks }} run: | # If LLM analysis ran, forward its outputs; otherwise emit placeholders. - if [ "${{ steps.llm_analysis.outputs.llm-analysis-run }}" = "true" ]; then + if [ "${LLM_RAN}" = "true" ]; then { echo "llm-analysis-run=true" - echo "llm-provider=${{ steps.llm_analysis.outputs.provider }}" - echo "llm-model=${{ steps.llm_analysis.outputs.model }}" - echo "llm-confidence=${{ steps.llm_analysis.outputs.confidence }}" - echo "llm-completed-tasks=${{ steps.llm_analysis.outputs.completed-tasks }}" + echo "llm-provider=${LLM_PROVIDER}" + echo "llm-model=${LLM_MODEL}" + echo "llm-confidence=${LLM_CONFIDENCE}" + echo "llm-completed-tasks=${LLM_COMPLETED_TASKS}" has_completions="false" - tasks='${{ steps.llm_analysis.outputs.completed-tasks }}' - if [ -n "$tasks" ] && [ "$tasks" != "[]" ]; then + if [ -n "${LLM_COMPLETED_TASKS}" ] && [ "${LLM_COMPLETED_TASKS}" != "[]" ]; then has_completions="true" fi echo "llm-has-completions=${has_completions}" @@ -1375,7 +1387,9 @@ jobs: : './.github/scripts/post_completion_comment.js'; const { postCompletionComment } = require(modulePath); - // Determine prompt file — Claude uses claude-prompt*.md + // Determine prompt file — prefer PR-specific variant, then generic. + // The prompt file name is passed to postCompletionComment which uses + // it as the base name; it also checks for PR-specific variants internally. const prNumber = process.env.PR_NUMBER || ''; let promptFile = 'claude-prompt.md'; if (prNumber) { @@ -1384,11 +1398,13 @@ jobs: promptFile = prSpecific; } } - // Fall back to codex-prompt*.md if claude variant not found + // Also check codex-prompt as shared belt PRs use that naming. + // Only fall back when no claude-prompt variant exists at all. if (!fs.existsSync(promptFile)) { - const codexPrompt = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md'; - if (fs.existsSync(codexPrompt)) { - promptFile = codexPrompt; + const codexFallback = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md'; + if (fs.existsSync(codexFallback)) { + core.info(`No claude-prompt file found; using ${codexFallback}`); + promptFile = codexFallback; } } diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml index f5dfa67be..c69303a34 100644 --- a/.github/workflows/reusable-codex-run.yml +++ b/.github/workflows/reusable-codex-run.yml @@ -951,8 +951,10 @@ jobs: WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 )) echo "watchdog-saved=false" >> "$GITHUB_OUTPUT" if [ "$WATCHDOG_DELAY" -gt 60 ]; then + WATCHDOG_FIRED_FLAG="/tmp/.watchdog-fired-$$" ( sleep "$WATCHDOG_DELAY" + touch "$WATCHDOG_FIRED_FLAG" echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)" TARGET_BRANCH="${{ inputs.pr_ref }}" @@ -987,7 +989,10 @@ jobs: fi fi # Push with one retry - if ! git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + watchdog_push_ok=false + if git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + watchdog_push_ok=true + else echo "::warning::Watchdog push failed (attempt 1), retrying after fetch/rebase..." sleep 3 git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true @@ -998,10 +1003,18 @@ jobs: --allow-unrelated-histories 2>/dev/null || true } fi - git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null || \ + if git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then + watchdog_push_ok=true + else echo "::warning::Watchdog push failed after retry" + fi + fi + if [ "$watchdog_push_ok" = "true" ]; then + echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" + else + echo "::error::Watchdog: committed locally but failed to push" + echo "watchdog-saved=false" >> "$GITHUB_OUTPUT" fi - echo "watchdog-saved=true" >> "$GITHUB_OUTPUT" else echo "::notice::Watchdog: no uncommitted or unpushed work to save" fi @@ -1026,9 +1039,12 @@ jobs: prompt_content="$(cat "$PROMPT_FILE")" "${cmd[@]}" "$prompt_content" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$? - # Kill watchdog if Codex finished before the timer fired + # Kill watchdog only if it hasn't fired yet. If it has already + # fired (flag file exists), it may be committing/pushing — let it finish. if [ -n "${WATCHDOG_PID:-}" ]; then - kill "$WATCHDOG_PID" 2>/dev/null || true + if [ ! -f "${WATCHDOG_FIRED_FLAG:-/tmp/.no-such-flag}" ]; then + kill "$WATCHDOG_PID" 2>/dev/null || true + fi wait "$WATCHDOG_PID" 2>/dev/null || true fi From 65e89c51a3b1c1003daf8596e319df12844fcd44 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 03:53:16 +0000 Subject: [PATCH 11/12] fix: address sync PR review feedback from coding agents - Remove "task" from the non-issue prefix filter in extractIssueNumberFromPull so "Task #123" is correctly treated as an issue reference (flagged by Codex on PAEM sync PR) - Make --legacy-peer-deps retry conditional on ERESOLVE/peer-dep errors instead of only firing on the first attempt (flagged by Copilot on TMP sync PR) - Add test for "Task #N" being treated as a valid issue ref https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/actions/setup-api-client/action.yml | 6 +++--- .github/scripts/__tests__/agents-pr-meta-keepalive.test.js | 5 +++++ .github/scripts/agents_pr_meta_keepalive.js | 2 +- .../.github/actions/setup-api-client/action.yml | 6 +++--- .../.github/scripts/agents_pr_meta_keepalive.js | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml index 5fead358b..b9539f4a9 100644 --- a/.github/actions/setup-api-client/action.yml +++ b/.github/actions/setup-api-client/action.yml @@ -271,9 +271,9 @@ runs: echo "$npm_err" echo "::endgroup::" - # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict - if [ "$attempt" -eq 1 ]; then - echo "::warning::Retrying with --legacy-peer-deps" + # On peer-dep / ERESOLVE failures, also try --legacy-peer-deps + if echo "$npm_err" | grep -qiE 'ERESOLVE|peer dep|Could not resolve dependency'; then + echo "::warning::Detected peer dependency conflict, retrying with --legacy-peer-deps" npm_output=$(mktemp) if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then rm -f "$npm_output" diff --git a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js index 64cec070d..e653529bb 100644 --- a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js +++ b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js @@ -590,6 +590,11 @@ test('extractIssueNumberFromPull skips "step #N" in body', () => { assert.equal(extractIssueNumberFromPull(pull), null); }); +test('extractIssueNumberFromPull treats "Task #N" as a valid issue ref', () => { + const pull = { body: 'Task #42 is ready for review', head: { ref: 'feature' }, title: 'stuff' }; + assert.equal(extractIssueNumberFromPull(pull), 42); +}); + test('extractIssueNumberFromPull skips "version #N" in body', () => { const pull = { body: 'Upgraded to version #4', head: { ref: 'feature' }, title: 'stuff' }; assert.equal(extractIssueNumberFromPull(pull), null); diff --git a/.github/scripts/agents_pr_meta_keepalive.js b/.github/scripts/agents_pr_meta_keepalive.js index 10eab2c81..32cfa95c8 100644 --- a/.github/scripts/agents_pr_meta_keepalive.js +++ b/.github/scripts/agents_pr_meta_keepalive.js @@ -242,7 +242,7 @@ function extractIssueNumberFromPull(pull) { } // Skip non-issue refs like "Run #123", "run #123", "attempt #2" const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); - if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + if (/\b(?:run|attempt|step|job|check|version|v)\s*$/i.test(preceding)) { continue; } candidates.push(match[1]); diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml index 24736497c..b6ed888ad 100644 --- a/templates/consumer-repo/.github/actions/setup-api-client/action.yml +++ b/templates/consumer-repo/.github/actions/setup-api-client/action.yml @@ -271,9 +271,9 @@ runs: echo "$npm_err" echo "::endgroup::" - # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict - if [ "$attempt" -eq 1 ]; then - echo "::warning::Retrying with --legacy-peer-deps" + # On peer-dep / ERESOLVE failures, also try --legacy-peer-deps + if echo "$npm_err" | grep -qiE 'ERESOLVE|peer dep|Could not resolve dependency'; then + echo "::warning::Detected peer dependency conflict, retrying with --legacy-peer-deps" npm_output=$(mktemp) if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then rm -f "$npm_output" diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js index 10eab2c81..32cfa95c8 100644 --- a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js +++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js @@ -242,7 +242,7 @@ function extractIssueNumberFromPull(pull) { } // Skip non-issue refs like "Run #123", "run #123", "attempt #2" const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index); - if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) { + if (/\b(?:run|attempt|step|job|check|version|v)\s*$/i.test(preceding)) { continue; } candidates.push(match[1]); From 9e89707e896fde87d63bf2b99ad46a9e52162a4b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Feb 2026 05:13:51 +0000 Subject: [PATCH 12/12] fix: install js-yaml locally instead of globally in label sync workflow The label sync workflow (maint-69-sync-labels.yml) has been failing since Feb 2 because npm install -g js-yaml installs to the global prefix which actions/github-script can't resolve. Install locally so Node's module resolution finds it in node_modules/. https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6 --- .github/workflows/maint-69-sync-labels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maint-69-sync-labels.yml b/.github/workflows/maint-69-sync-labels.yml index 965c4898f..8f73338eb 100644 --- a/.github/workflows/maint-69-sync-labels.yml +++ b/.github/workflows/maint-69-sync-labels.yml @@ -40,7 +40,7 @@ jobs: github_token: ${{ github.token }} - name: Install js-yaml - run: npm install -g js-yaml + run: npm install js-yaml - name: Parse labels-core.yml id: parse