From 02064f00341616080881da9a3cb81b279bfdee37 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 25 Feb 2026 21:05:25 +0000
Subject: [PATCH 01/12] fix: avoid multi-line stderr in workflow annotations

GitHub Actions ::warning:: commands truncate/mangle multi-line content.
Emit a short annotation message and print full npm stderr in a
collapsible ::group:: instead, so logs stay readable.

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/actions/setup-api-client/action.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml
index 3e43bc863..5fead358b 100644
--- a/.github/actions/setup-api-client/action.yml
+++ b/.github/actions/setup-api-client/action.yml
@@ -266,7 +266,10 @@ runs:
 
             npm_err=$(cat "$npm_output")
             rm -f "$npm_output"
-            echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed: $npm_err"
+            echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed (see logs)"
+            echo "::group::npm stderr (attempt $attempt)"
+            echo "$npm_err"
+            echo "::endgroup::"
 
             # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict
             if [ "$attempt" -eq 1 ]; then
@@ -279,7 +282,10 @@ runs:
               fi
               npm_err_legacy=$(cat "$npm_output")
               rm -f "$npm_output"
-              echo "::warning::npm install with --legacy-peer-deps failed: $npm_err_legacy"
+              echo "::warning::npm install with --legacy-peer-deps failed (see logs)"
+              echo "::group::npm stderr (--legacy-peer-deps)"
+              echo "$npm_err_legacy"
+              echo "::endgroup::"
             fi
 
             if [ "$attempt" -lt "$NPM_MAX_RETRIES" ]; then

From 4d84c1f97f3b7c231f11bb9b296c1d6a3427e7fc Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 25 Feb 2026 21:22:43 +0000
Subject: [PATCH 02/12] fix: sync template setup-api-client with
 retry-with-backoff and annotation fixes

Mirror the main setup-api-client changes into the consumer-repo template
to prevent template drift:
- Exponential backoff retry (3 attempts, 5s/10s) for transient npm errors
- --legacy-peer-deps fallback on first failure
- Short ::warning:: annotations with full stderr in collapsible ::group::
- Pin lru-cache@10.4.3 (was ^10.0.0)

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .../actions/setup-api-client/action.yml       | 80 ++++++++++++++-----
 1 file changed, 58 insertions(+), 22 deletions(-)

diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml
index b912fe4ad..24736497c 100644
--- a/templates/consumer-repo/.github/actions/setup-api-client/action.yml
+++ b/templates/consumer-repo/.github/actions/setup-api-client/action.yml
@@ -239,29 +239,65 @@ runs:
 
           # Install with pinned versions for consistency.
           # lru-cache is an explicit transitive dep of @octokit/auth-app required for
-          # GitHub App token minting; pin it here so npm always hoists it to the top
-          # level even if a prior cached node_modules state is missing it.
-          # Capture stderr for debugging if the command fails
-          npm_output=$(mktemp)
-          npm_cmd=(npm install --no-save --location=project \
-            @octokit/rest@20.0.2 \
-            @octokit/plugin-retry@6.0.1 \
-            @octokit/plugin-paginate-rest@9.1.5 \
-            @octokit/auth-app@6.0.3 \
-            lru-cache@^10.0.0)
-          if "${npm_cmd[@]}" 2>"$npm_output"; then
-            rm -f "$npm_output"
-          else
-            echo "::warning::npm install failed with: $(cat "$npm_output")"
-            echo "::warning::Retrying with --legacy-peer-deps"
+          # GitHub App token minting; pin it here so npm always hoists a specific version
+          # even if a prior cached node_modules state is missing it.
+          #
+          # Retry with exponential backoff to survive transient npm registry errors
+          # (e.g. 403 Forbidden from CDN/rate-limit on safe-buffer, undici, etc.).
+          NPM_PACKAGES=(
+            @octokit/rest@20.0.2
+            @octokit/plugin-retry@6.0.1
+            @octokit/plugin-paginate-rest@9.1.5
+            @octokit/auth-app@6.0.3
+            lru-cache@10.4.3
+          )
+          NPM_MAX_RETRIES=3
+          NPM_BACKOFF=5  # seconds; doubles each retry (5, 10)
+          npm_installed=false
+
+          for (( attempt=1; attempt<=NPM_MAX_RETRIES; attempt++ )); do
+            npm_output=$(mktemp)
+
+            if npm install --no-save --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then
+              rm -f "$npm_output"
+              npm_installed=true
+              break
+            fi
+
+            npm_err=$(cat "$npm_output")
             rm -f "$npm_output"
-            npm_cmd=(npm install --no-save --legacy-peer-deps --location=project \
-              @octokit/rest@20.0.2 \
-              @octokit/plugin-retry@6.0.1 \
-              @octokit/plugin-paginate-rest@9.1.5 \
-              @octokit/auth-app@6.0.3 \
-              lru-cache@^10.0.0)
-            "${npm_cmd[@]}"
+            echo "::warning::npm install attempt $attempt/$NPM_MAX_RETRIES failed (see logs)"
+            echo "::group::npm stderr (attempt $attempt)"
+            echo "$npm_err"
+            echo "::endgroup::"
+
+            # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict
+            if [ "$attempt" -eq 1 ]; then
+              echo "::warning::Retrying with --legacy-peer-deps"
+              npm_output=$(mktemp)
+              if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then
+                rm -f "$npm_output"
+                npm_installed=true
+                break
+              fi
+              npm_err_legacy=$(cat "$npm_output")
+              rm -f "$npm_output"
+              echo "::warning::npm install with --legacy-peer-deps failed (see logs)"
+              echo "::group::npm stderr (--legacy-peer-deps)"
+              echo "$npm_err_legacy"
+              echo "::endgroup::"
+            fi
+
+            if [ "$attempt" -lt "$NPM_MAX_RETRIES" ]; then
+              echo "::notice::Waiting ${NPM_BACKOFF}s before retry..."
+              sleep "$NPM_BACKOFF"
+              NPM_BACKOFF=$((NPM_BACKOFF * 2))
+            fi
+          done
+
+          if [ "$npm_installed" != "true" ]; then
+            echo "::error::npm install failed after $NPM_MAX_RETRIES attempts"
+            exit 1
           fi
 
           # Restore vendored package metadata that npm may have overwritten

From c36adc17812f9a51f94ae474bde59b3458e212d8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 25 Feb 2026 21:50:29 +0000
Subject: [PATCH 03/12] fix: pre-timeout watchdog, robust parser import, and
 always-commit safeguard

Three changes to reusable-codex-run.yml to prevent work loss on timeout:

1. Pre-timeout watchdog: A background timer fires 5 minutes before
   max_runtime_minutes, committing and pushing any uncommitted work
   so it survives the job cancellation. Killed automatically if
   Codex finishes before the timer fires.

2. Robust parser import: Replace sys.path-based import of
   codex_jsonl_parser with importlib.util.spec_from_file_location.
   Consumer repos (e.g. Counter_Risk) have their own tools/ package
   with __init__.py that shadows the Workflows tools/ on sys.path,
   causing "No module named 'tools.codex_jsonl_parser'".

3. Commit step always runs: Add if: always() to the "Commit and push
   changes" step so uncommitted work is captured even on non-zero
   exit codes (the watchdog handles timeout, this handles failures).

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/workflows/reusable-codex-run.yml | 69 ++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml
index 11b6cad0c..63325e3e6 100644
--- a/.github/workflows/reusable-codex-run.yml
+++ b/.github/workflows/reusable-codex-run.yml
@@ -188,6 +188,7 @@ jobs:
       error-type: ${{ steps.classify_failure.outputs.error_type }}
       error-recovery: ${{ steps.classify_failure.outputs.error_recovery }}
       error-summary: ${{ steps.classify_failure.outputs.error_summary }}
+      watchdog-saved: ${{ steps.run_codex.outputs.watchdog-saved }}
       # LLM analysis outputs
       llm-analysis-run: ${{ steps.llm_analysis.outputs.llm-analysis-run }}
       llm-completed-tasks: ${{ steps.llm_analysis.outputs.completed-tasks }}
@@ -938,6 +939,47 @@ jobs:
             echo "Extra args: provided (${#EXTRA_ARGS[@]} arg(s))"
           fi
 
+          # --- Pre-timeout watchdog ---
+          # When the job approaches the timeout limit, this background process
+          # commits and pushes any uncommitted work so it isn't lost to the
+          # job cancellation.  It fires once, 5 minutes before max_runtime.
+          MAX_RUNTIME_MIN=${{ inputs.max_runtime_minutes }}
+          GRACE_MIN=5
+          WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 ))
+          if [ "$WATCHDOG_DELAY" -gt 60 ]; then
+            (
+              sleep "$WATCHDOG_DELAY"
+              echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)"
+
+              CHANGED=$(git status --porcelain | wc -l)
+              UNPUSHED=0
+              if git rev-parse FETCH_HEAD >/dev/null 2>&1; then
+                UNPUSHED=$(git rev-list FETCH_HEAD..HEAD --count 2>/dev/null || echo 0)
+              fi
+
+              if [ "$CHANGED" -gt 0 ] || [ "$UNPUSHED" -gt 0 ]; then
+                echo "::notice::Watchdog saving ${CHANGED} uncommitted file(s) and ${UNPUSHED} unpushed commit(s)"
+                git config user.name  "github-actions[bot]"
+                git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+                if [ "$CHANGED" -gt 0 ]; then
+                  git add -A
+                  git commit -m "chore(codex-keepalive): pre-timeout checkpoint (PR #${PR_NUM:-})" --no-verify || true
+                fi
+                TARGET_BRANCH="${{ inputs.pr_ref }}"
+                TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}"
+                PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}"
+                REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}"
+                git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" || \
+                  echo "::warning::Watchdog push failed"
+                echo "watchdog-saved=true" >> "$GITHUB_OUTPUT"
+              else
+                echo "::notice::Watchdog: no uncommitted or unpushed work to save"
+              fi
+            ) &
+            WATCHDOG_PID=$!
+            echo "::notice::Pre-timeout watchdog started (PID ${WATCHDOG_PID}, fires in $((WATCHDOG_DELAY/60))m)"
+          fi
+
           # Run codex exec with --json to capture rich session data
           # JSONL events stream to stdout, final message still goes to OUTPUT_FILE
           CODEX_EXIT=0
@@ -954,6 +996,12 @@ jobs:
           prompt_content="$(cat "$PROMPT_FILE")"
           "${cmd[@]}" "$prompt_content" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$?
 
+          # Kill watchdog if Codex finished before the timer fired
+          if [ -n "${WATCHDOG_PID:-}" ]; then
+            kill "$WATCHDOG_PID" 2>/dev/null || true
+            wait "$WATCHDOG_PID" 2>/dev/null || true
+          fi
+
           echo "exit-code=${CODEX_EXIT}" >> "$GITHUB_OUTPUT"
 
           if [ "$CODEX_EXIT" -ne 0 ]; then
@@ -1008,17 +1056,29 @@ jobs:
 
           # Basic parsing (always available)
           python3 << 'PYEOF'
+          import importlib.util
           import os
           import sys
-          # Add .workflows-lib to path for tools imports
-          sys.path.insert(0, '.workflows-lib')
-          sys.path.insert(0, '.')
 
           session_file = os.environ.get("SESSION_JSONL", "codex-session.jsonl")
           github_output = os.environ.get("GITHUB_OUTPUT", "/dev/null")
 
           try:
-              from tools.codex_jsonl_parser import parse_codex_jsonl_file
+              # Load codex_jsonl_parser from the Workflows checkout by exact path.
+              # Consumer repos (e.g. Counter_Risk) have their own tools/ package with
+              # __init__.py, which shadows the Workflows tools/ on sys.path.  Using
+              # importlib.util.spec_from_file_location bypasses sys.path entirely.
+              _parser_path = os.path.join(
+                  ".workflows-lib", "tools", "codex_jsonl_parser.py"
+              )
+              _spec = importlib.util.spec_from_file_location(
+                  "codex_jsonl_parser", _parser_path
+              )
+              if _spec is None or _spec.loader is None:
+                  raise ImportError(f"Cannot load spec from {_parser_path}")
+              _mod = importlib.util.module_from_spec(_spec)
+              _spec.loader.exec_module(_mod)
+              parse_codex_jsonl_file = _mod.parse_codex_jsonl_file
 
               session = parse_codex_jsonl_file(session_file)
 
@@ -1150,6 +1210,7 @@ jobs:
 
       - name: Commit and push changes
         id: commit
+        if: always()
         env:
           MODE: ${{ inputs.mode }}
           PR_NUMBER: ${{ inputs.pr_number }}

From 7924d1ea650b6919aa2e222c164f98d12ea495a6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 01:35:02 +0000
Subject: [PATCH 04/12] fix: preserve indented checkbox states in PR Meta body
 sync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parseCheckboxStates() and mergeCheckboxStates() only matched top-level
checkboxes (^- \[), ignoring indented sub-tasks (  - \[). When PR Meta
regenerated the PR body from the issue, auto-reconciled sub-task
checkboxes were silently reverted to unchecked. This caused the keepalive
loop to stall with rounds_without_task_completion: 8 despite the agent
completing real work — PR #256 had 5 tasks auto-checked then immediately
un-checked on every push.

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/scripts/agents_pr_meta_update_body.js | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/agents_pr_meta_update_body.js b/.github/scripts/agents_pr_meta_update_body.js
index ec9a585aa..3e987c10c 100644
--- a/.github/scripts/agents_pr_meta_update_body.js
+++ b/.github/scripts/agents_pr_meta_update_body.js
@@ -404,7 +404,7 @@ function parseCheckboxStates(block) {
     if (inCodeBlock) {
       continue;
     }
-    const match = line.match(/^- \[(x| )\]\s*(.+)$/i);
+    const match = line.match(/^\s*- \[(x| )\]\s*(.+)$/i);
     if (match) {
       const checked = match[1].toLowerCase() === 'x';
       const text = match[2].trim();
@@ -461,12 +461,13 @@ function mergeCheckboxStates(newContent, existingStates) {
       updated.push(line);
       continue;
     }
-    const match = line.match(/^- \[( )\]\s*(.+)$/);
+    const match = line.match(/^(\s*)- \[( )\]\s*(.+)$/);
     if (match) {
-      const text = match[2].trim();
+      const indent = match[1];
+      const text = match[3].trim();
       const normalized = text.replace(/^-\s*/, '').trim().toLowerCase();
       if (existingStates.has(normalized)) {
-        updated.push(`- [x] ${text}`);
+        updated.push(`${indent}- [x] ${text}`);
         continue;
       }
     }

From 6145c5e40686053b5a1d0e27322fa548d2be1d6a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 26 Feb 2026 01:35:36 +0000
Subject: [PATCH 05/12] chore: sync template scripts

---
 .../.github/scripts/agents_pr_meta_update_body.js        | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js
index ec9a585aa..3e987c10c 100644
--- a/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js
+++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_update_body.js
@@ -404,7 +404,7 @@ function parseCheckboxStates(block) {
     if (inCodeBlock) {
       continue;
     }
-    const match = line.match(/^- \[(x| )\]\s*(.+)$/i);
+    const match = line.match(/^\s*- \[(x| )\]\s*(.+)$/i);
     if (match) {
       const checked = match[1].toLowerCase() === 'x';
       const text = match[2].trim();
@@ -461,12 +461,13 @@ function mergeCheckboxStates(newContent, existingStates) {
       updated.push(line);
       continue;
     }
-    const match = line.match(/^- \[( )\]\s*(.+)$/);
+    const match = line.match(/^(\s*)- \[( )\]\s*(.+)$/);
     if (match) {
-      const text = match[2].trim();
+      const indent = match[1];
+      const text = match[3].trim();
       const normalized = text.replace(/^-\s*/, '').trim().toLowerCase();
       if (existingStates.has(normalized)) {
-        updated.push(`- [x] ${text}`);
+        updated.push(`${indent}- [x] ${text}`);
         continue;
       }
     }

From 68111870655048c0e0d1619356b90aceeafbbabf Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 01:41:53 +0000
Subject: [PATCH 06/12] fix: address review comments on watchdog pre-timeout
 mechanism

- P1: Add fetch/rebase before watchdog push to avoid non-fast-forward
  rejection when another workflow updates the branch during the run.
  Includes one retry with re-fetch/rebase and merge fallback.
- P2: Export watchdog-saved in on.workflow_call.outputs so callers
  of the reusable workflow can observe the signal.
- Copilot: Add git fetch before checking FETCH_HEAD to ensure it
  exists and is current (actions/checkout doesn't set FETCH_HEAD).
- Copilot: Initialize watchdog-saved=false before background subshell
  so downstream consumers always get a defined value.

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/workflows/reusable-codex-run.yml | 42 ++++++++++++++++++++----
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml
index 63325e3e6..f5dfa67be 100644
--- a/.github/workflows/reusable-codex-run.yml
+++ b/.github/workflows/reusable-codex-run.yml
@@ -124,6 +124,9 @@ on:
       error-recovery:
         description: 'Suggested recovery action if failure occurred'
         value: ${{ jobs.codex.outputs.error-recovery }}
+      watchdog-saved:
+        description: 'Whether the pre-timeout watchdog saved uncommitted work (true/false)'
+        value: ${{ jobs.codex.outputs.watchdog-saved }}
       # LLM task analysis outputs
       llm-analysis-run:
         description: 'Whether LLM analysis was performed'
@@ -946,11 +949,20 @@ jobs:
           MAX_RUNTIME_MIN=${{ inputs.max_runtime_minutes }}
           GRACE_MIN=5
           WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 ))
+          echo "watchdog-saved=false" >> "$GITHUB_OUTPUT"
           if [ "$WATCHDOG_DELAY" -gt 60 ]; then
             (
               sleep "$WATCHDOG_DELAY"
               echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)"
 
+              TARGET_BRANCH="${{ inputs.pr_ref }}"
+              TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}"
+              PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}"
+              REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}"
+
+              # Fetch to get current FETCH_HEAD before checking for unpushed work
+              git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true
+
               CHANGED=$(git status --porcelain | wc -l)
               UNPUSHED=0
               if git rev-parse FETCH_HEAD >/dev/null 2>&1; then
@@ -965,12 +977,30 @@ jobs:
                   git add -A
                   git commit -m "chore(codex-keepalive): pre-timeout checkpoint (PR #${PR_NUM:-})" --no-verify || true
                 fi
-                TARGET_BRANCH="${{ inputs.pr_ref }}"
-                TARGET_BRANCH="${TARGET_BRANCH#refs/heads/}"
-                PUSH_TOKEN="${{ steps.auth_token.outputs.push_token }}"
-                REMOTE_URL="https://x-access-token:${PUSH_TOKEN}@github.com/${{ github.repository }}"
-                git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" || \
-                  echo "::warning::Watchdog push failed"
+                # Rebase onto remote before pushing to avoid non-fast-forward rejection
+                if git rev-parse FETCH_HEAD >/dev/null 2>&1; then
+                  if ! git rebase FETCH_HEAD 2>/dev/null; then
+                    echo "::warning::Watchdog rebase failed; attempting merge fallback."
+                    git rebase --abort 2>/dev/null || true
+                    git pull --no-rebase "${REMOTE_URL}" "${TARGET_BRANCH}" \
+                      --allow-unrelated-histories 2>/dev/null || true
+                  fi
+                fi
+                # Push with one retry
+                if ! git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then
+                  echo "::warning::Watchdog push failed (attempt 1), retrying after fetch/rebase..."
+                  sleep 3
+                  git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true
+                  if git rev-parse FETCH_HEAD >/dev/null 2>&1; then
+                    git rebase FETCH_HEAD 2>/dev/null || {
+                      git rebase --abort 2>/dev/null || true
+                      git pull --no-rebase "${REMOTE_URL}" "${TARGET_BRANCH}" \
+                        --allow-unrelated-histories 2>/dev/null || true
+                    }
+                  fi
+                  git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null || \
+                    echo "::warning::Watchdog push failed after retry"
+                fi
                 echo "watchdog-saved=true" >> "$GITHUB_OUTPUT"
               else
                 echo "::notice::Watchdog: no uncommitted or unpushed work to save"

From 0b9a46cf508ad24c6b855cf7da13b5ddc41413fb Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 01:45:32 +0000
Subject: [PATCH 07/12] docs: add watchdog-saved to workflow outputs reference

Update WORKFLOW_OUTPUTS.md to include the new watchdog-saved output
from reusable-codex-run.yml, fixing the test_reusable_workflow_outputs_documented
test.

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 docs/ci/WORKFLOW_OUTPUTS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/ci/WORKFLOW_OUTPUTS.md b/docs/ci/WORKFLOW_OUTPUTS.md
index 543d15099..7e971cf5b 100644
--- a/docs/ci/WORKFLOW_OUTPUTS.md
+++ b/docs/ci/WORKFLOW_OUTPUTS.md
@@ -78,6 +78,7 @@ that only emit artifacts, see the "Workflows without workflow_call outputs" sect
 | `reusable-codex-run.yml` | `error-category` | string | Error category if failure occurred (transient/auth/resource/logic/unknown) | `needs.codex.outputs.error-category` |
 | `reusable-codex-run.yml` | `error-type` | string | Error type if failure occurred (codex/infrastructure/auth/unknown) | `needs.codex.outputs.error-type` |
 | `reusable-codex-run.yml` | `error-recovery` | string | Suggested recovery action if failure occurred | `needs.codex.outputs.error-recovery` |
+| `reusable-codex-run.yml` | `watchdog-saved` | string (boolean-like) | Whether the pre-timeout watchdog saved uncommitted work (true/false) | `needs.codex.outputs.watchdog-saved` |
 | `reusable-codex-run.yml` | `llm-analysis-run` | string (boolean-like) | Whether LLM analysis was performed | `needs.codex.outputs.llm-analysis-run` |
 | `reusable-codex-run.yml` | `llm-provider` | string | LLM provider used for analysis (github-models, openai, regex-fallback) | `needs.codex.outputs.llm-provider` |
 | `reusable-codex-run.yml` | `llm-model` | string | Specific model used for analysis (e.g., gpt-4o, claude-3-5-sonnet) | `needs.codex.outputs.llm-model` |

From 97056c028445b147ab35ce5d091fbd9dbcaec2b2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 02:03:54 +0000
Subject: [PATCH 08/12] fix: skip non-issue refs like "Run #NNN" in
 extractIssueNumberFromPull

The body scan in extractIssueNumberFromPull was treating patterns like
"Run #2615" as issue references, causing the Upsert PR body sections
check to fail with a 404 when trying to fetch non-existent issues.

Add a preceding-word filter to skip #NNN when preceded by common
non-issue words (run, attempt, step, job, check, task, version, v).
Add 12 unit tests covering the extraction logic.

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .../agents-pr-meta-keepalive.test.js          | 63 ++++++++++++++++++-
 .github/scripts/agents_pr_meta_keepalive.js   |  5 ++
 .../scripts/agents_pr_meta_keepalive.js       |  5 ++
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js
index 673a5d13c..64cec070d 100644
--- a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js
+++ b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js
@@ -3,7 +3,7 @@
 const test = require('node:test');
 const assert = require('node:assert/strict');
 
-const { detectKeepalive } = require('../agents_pr_meta_keepalive.js');
+const { detectKeepalive, extractIssueNumberFromPull } = require('../agents_pr_meta_keepalive.js');
 
 function createCore(outputs) {
   return {
@@ -543,3 +543,64 @@ test('detectKeepalive does not cache empty pull responses', async () => {
   assert.equal(outputsFirst.reason, 'pull-fetch-failed');
   assert.equal(outputsSecond.reason, 'pull-fetch-failed');
 });
+
+// --- extractIssueNumberFromPull tests ---
+
+test('extractIssueNumberFromPull returns null for null input', () => {
+  assert.equal(extractIssueNumberFromPull(null), null);
+});
+
+test('extractIssueNumberFromPull extracts from meta comment', () => {
+  const pull = { body: 'Some text <!-- meta:issue:42 --> more text', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), 42);
+});
+
+test('extractIssueNumberFromPull extracts from branch name', () => {
+  const pull = { body: '', head: { ref: 'codex/issue-99' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), 99);
+});
+
+test('extractIssueNumberFromPull extracts from title', () => {
+  const pull = { body: '', head: { ref: 'feature' }, title: 'fix: resolve #55' };
+  assert.equal(extractIssueNumberFromPull(pull), 55);
+});
+
+test('extractIssueNumberFromPull extracts from body hash ref', () => {
+  const pull = { body: 'Fixes #123', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), 123);
+});
+
+test('extractIssueNumberFromPull skips "Run #NNN" in body', () => {
+  const pull = { body: 'Run #2615 timed out after 45 minutes', head: { ref: 'claude/fix-something' }, title: 'fix: pre-timeout watchdog' };
+  assert.equal(extractIssueNumberFromPull(pull), null);
+});
+
+test('extractIssueNumberFromPull skips "run #NNN" case-insensitive', () => {
+  const pull = { body: 'The run #500 failed', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), null);
+});
+
+test('extractIssueNumberFromPull skips "attempt #N" in body', () => {
+  const pull = { body: 'attempt #3 was successful', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), null);
+});
+
+test('extractIssueNumberFromPull skips "step #N" in body', () => {
+  const pull = { body: 'step #2 completed', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), null);
+});
+
+test('extractIssueNumberFromPull skips "version #N" in body', () => {
+  const pull = { body: 'Upgraded to version #4', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), null);
+});
+
+test('extractIssueNumberFromPull prefers meta comment over "Run #NNN"', () => {
+  const pull = { body: '<!-- meta:issue:77 --> Run #2615 timed out', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), 77);
+});
+
+test('extractIssueNumberFromPull finds real issue after skipping Run ref', () => {
+  const pull = { body: 'Run #2615 timed out. Relates to #88', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), 88);
+});
diff --git a/.github/scripts/agents_pr_meta_keepalive.js b/.github/scripts/agents_pr_meta_keepalive.js
index 7a2197536..10eab2c81 100644
--- a/.github/scripts/agents_pr_meta_keepalive.js
+++ b/.github/scripts/agents_pr_meta_keepalive.js
@@ -240,6 +240,11 @@ function extractIssueNumberFromPull(pull) {
     if (match.index > 0 && /\w/.test(bodyText[match.index - 1])) {
       continue;
     }
+    // Skip non-issue refs like "Run #123", "run #123", "attempt #2"
+    const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index);
+    if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) {
+      continue;
+    }
     candidates.push(match[1]);
   }
 
diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js
index 7a2197536..10eab2c81 100644
--- a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js
+++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js
@@ -240,6 +240,11 @@ function extractIssueNumberFromPull(pull) {
     if (match.index > 0 && /\w/.test(bodyText[match.index - 1])) {
       continue;
     }
+    // Skip non-issue refs like "Run #123", "run #123", "attempt #2"
+    const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index);
+    if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) {
+      continue;
+    }
     candidates.push(match[1]);
   }
 

From 96e5f6ec2ec143c047674e2caaf4c35ec763d40e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 02:29:53 +0000
Subject: [PATCH 09/12] feat: add session analysis, completion comment, and
 error diagnostics to Claude runner

Closes the three remaining feature gaps between the Claude and Codex runners
identified in issue #1646:

1. **Session analysis (LLM-powered)**: Reuses analyze_codex_session.py which
   auto-detects Claude's plain-text session log (data_source=summary) and
   feeds it through the same LLM analysis pipeline for structured task
   completion assessment. Outputs feed into the keepalive loop.

2. **Completion checkpoint comment**: Posts a PR comment summarizing completed
   tasks and acceptance criteria using the shared post_completion_comment.js
   script. Supports both claude-prompt*.md and codex-prompt*.md file names.

3. **Error diagnostics**: Adds GITHUB_STEP_SUMMARY with error table, creates
   a diagnostics artifact (JSON + agent output), and posts a structured PR
   comment on non-transient failures with recovery guidance and log links.
   Uses a distinct <!-- claude-failure-notification --> marker.

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/workflows/reusable-claude-run.yml | 409 +++++++++++++++++++++-
 1 file changed, 400 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/reusable-claude-run.yml b/.github/workflows/reusable-claude-run.yml
index 490eac122..9827d6819 100644
--- a/.github/workflows/reusable-claude-run.yml
+++ b/.github/workflows/reusable-claude-run.yml
@@ -1207,18 +1207,209 @@ jobs:
             claude-session*.jsonl
           if-no-files-found: ignore
 
-      - name: Compatibility outputs (LLM analysis placeholders)
+      - name: Analyze Claude session
+        id: analyze_session
+        if: always()
+        env:
+          PYTHONPATH: ${{ github.workspace }}/.workflows-lib:${{ github.workspace }}
+          PR_NUM: ${{ inputs.pr_number }}
+        run: |
+          set -euo pipefail
+
+          if [ -n "${PR_NUM}" ]; then
+            SESSION_LOG="claude-session-${PR_NUM}.log"
+          else
+            SESSION_LOG="claude-session.log"
+          fi
+
+          # Check if session file exists and has content
+          if [ ! -f "$SESSION_LOG" ] || [ ! -s "$SESSION_LOG" ]; then
+            echo "No Claude session log found or file is empty"
+            echo "session-available=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          echo "Session log captured: $(wc -l < "$SESSION_LOG") lines, $(wc -c < "$SESSION_LOG") bytes"
+          echo "session-available=true" >> "$GITHUB_OUTPUT"
+          echo "session-file=$SESSION_LOG" >> "$GITHUB_OUTPUT"
+
+      - name: Analyze task completion with LLM
+        id: llm_analysis
+        if: >-
+          always() &&
+          steps.analyze_session.outputs.session-available == 'true' &&
+          inputs.pr_number != ''
+        env:
+          PYTHONPATH: ${{ github.workspace }}/.workflows-lib:${{ github.workspace }}
+          PR_NUM: ${{ inputs.pr_number }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          CLAUDE_API_STRANSKE: ${{ secrets.CLAUDE_API_STRANSKE }}
+        run: |
+          set -euo pipefail
+
+          SESSION_LOG="${{ steps.analyze_session.outputs.session-file }}"
+          ANALYSIS_FILE="claude-analysis-${PR_NUM}.json"
+
+          # Fetch PR body to extract tasks
+          echo "Fetching PR #${PR_NUM} body..."
+          set +e
+          PR_BODY=$(gh pr view "${PR_NUM}" --json body --jq '.body' 2>&1)
+          fetch_exit=$?
+          set -e
+
+          if [ $fetch_exit -ne 0 ] || [ -z "$PR_BODY" ]; then
+            echo "::warning::Could not fetch PR body for #${PR_NUM} (exit code: $fetch_exit)"
+            echo "::warning::LLM task completion analysis will be skipped"
+            if [ $fetch_exit -ne 0 ]; then
+              echo "Error output: $PR_BODY"
+            fi
+            echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          # Save PR body to temp file
+          echo "$PR_BODY" > pr_body.md
+
+          # Run full LLM analysis and save JSON output
+          # The analyze_codex_session.py script auto-detects text vs JSONL input,
+          # so it works with Claude's plain-text session logs (data_source=summary).
+          echo "Running LLM-powered task completion analysis..."
+          if [ ! -f .workflows-lib/scripts/analyze_codex_session.py ]; then
+            echo "::error::Analysis script not found."
+            echo "::error::Missing: .workflows-lib/scripts/analyze_codex_session.py"
+            echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          python3 .workflows-lib/scripts/analyze_codex_session.py \
+            --session-file "$SESSION_LOG" \
+            --pr-body-file pr_body.md \
+            --output json > "$ANALYSIS_FILE" || {
+              echo "::warning::LLM analysis failed, continuing without it"
+              cat "$ANALYSIS_FILE" 2>/dev/null || true
+              echo "llm-analysis-run=false" >> "$GITHUB_OUTPUT"
+              rm -f "$ANALYSIS_FILE"
+              exit 0
+            }
+
+          # Also output to GitHub Actions for visibility
+          python3 .workflows-lib/scripts/analyze_codex_session.py \
+            --session-file "$SESSION_LOG" \
+            --pr-body-file pr_body.md \
+            --output github-actions || true
+
+          echo "llm-analysis-run=true" >> "$GITHUB_OUTPUT"
+          echo "analysis-file=$ANALYSIS_FILE" >> "$GITHUB_OUTPUT"
+
+          # Extract key fields for downstream use
+          if [ -f "$ANALYSIS_FILE" ]; then
+            python3 - "$ANALYSIS_FILE" >> "$GITHUB_OUTPUT" <<'PY'
+          import json
+          import sys
+
+          analysis_path = sys.argv[1]
+          with open(analysis_path, encoding='utf-8') as handle:
+              data = json.load(handle)
+
+          completed_tasks = json.dumps(data.get('completed_tasks', []))
+          quality_warnings = json.dumps(data.get('quality_warnings', []))
+
+          print(f"completed-tasks={completed_tasks}")
+          print(f"provider={data.get('provider', 'unknown')}")
+          print(f"model={data.get('model', 'unknown')}")
+          print(f"confidence={data.get('confidence', 0)}")
+          print(f"raw-confidence={data.get('raw_confidence', data.get('confidence', 0))}")
+          print(f"effort-score={data.get('effort_score', 0)}")
+          print(f"data-quality={data.get('data_quality', 'unknown')}")
+          print(f"analysis-text-length={data.get('analysis_text_length', 0)}")
+          print(f"quality-warnings={quality_warnings}")
+          PY
+          fi
+
+      - name: Compatibility outputs (LLM analysis)
         id: compat
         if: always()
         run: |
-          {
-            echo "llm-analysis-run=false"
-            echo "llm-provider="
-            echo "llm-model="
-            echo "llm-confidence="
-            echo "llm-completed-tasks=[]"
-            echo "llm-has-completions=false"
-          } >> "$GITHUB_OUTPUT"
+          # If LLM analysis ran, forward its outputs; otherwise emit placeholders.
+          if [ "${{ steps.llm_analysis.outputs.llm-analysis-run }}" = "true" ]; then
+            {
+              echo "llm-analysis-run=true"
+              echo "llm-provider=${{ steps.llm_analysis.outputs.provider }}"
+              echo "llm-model=${{ steps.llm_analysis.outputs.model }}"
+              echo "llm-confidence=${{ steps.llm_analysis.outputs.confidence }}"
+              echo "llm-completed-tasks=${{ steps.llm_analysis.outputs.completed-tasks }}"
+              has_completions="false"
+              tasks='${{ steps.llm_analysis.outputs.completed-tasks }}'
+              if [ -n "$tasks" ] && [ "$tasks" != "[]" ]; then
+                has_completions="true"
+              fi
+              echo "llm-has-completions=${has_completions}"
+            } >> "$GITHUB_OUTPUT"
+          else
+            {
+              echo "llm-analysis-run=false"
+              echo "llm-provider="
+              echo "llm-model="
+              echo "llm-confidence="
+              echo "llm-completed-tasks=[]"
+              echo "llm-has-completions=false"
+            } >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Post completion checkpoint comment
+        id: completion_comment
+        if: steps.commit.outputs.changes-made == 'true' && inputs.pr_number != ''
+        uses: actions/github-script@v8
+        env:
+          PR_NUMBER: ${{ inputs.pr_number }}
+          COMMIT_SHA: ${{ steps.commit.outputs.commit-sha }}
+          ITERATION: ${{ inputs.iteration || '' }}
+        with:
+          script: |
+            // Try .workflows-lib first (consumer repos), fall back to local copy
+            const fs = require('fs');
+            const modulePath = fs.existsSync('./.workflows-lib/.github/scripts/post_completion_comment.js')
+              ? './.workflows-lib/.github/scripts/post_completion_comment.js'
+              : './.github/scripts/post_completion_comment.js';
+            const { postCompletionComment } = require(modulePath);
+
+            // Determine prompt file — Claude uses claude-prompt*.md
+            const prNumber = process.env.PR_NUMBER || '';
+            let promptFile = 'claude-prompt.md';
+            if (prNumber) {
+              const prSpecific = `claude-prompt-${prNumber}.md`;
+              if (fs.existsSync(prSpecific)) {
+                promptFile = prSpecific;
+              }
+            }
+            // Fall back to codex-prompt*.md if claude variant not found
+            if (!fs.existsSync(promptFile)) {
+              const codexPrompt = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md';
+              if (fs.existsSync(codexPrompt)) {
+                promptFile = codexPrompt;
+              }
+            }
+
+            const result = await postCompletionComment({
+              github, context, core,
+              inputs: {
+                pr_number: process.env.PR_NUMBER,
+                commit_sha: process.env.COMMIT_SHA,
+                iteration: process.env.ITERATION,
+                prompt_file: promptFile,
+              },
+            });
+            core.setOutput('posted', result.posted ? 'true' : 'false');
+            core.setOutput('tasks', String(result.tasks || 0));
+            core.setOutput('acceptance', String(result.acceptance || 0));
+            if (result.posted) {
+              core.info(
+                `Posted completion checkpoint: ${result.tasks} tasks, ` +
+                  `${result.acceptance} acceptance criteria`,
+              );
+            }
 
       - name: Classify failure
         id: classify_failure
@@ -1275,7 +1466,207 @@ jobs:
               errorInfo.category === ERROR_CATEGORIES.transient ? 'true' : 'false';
             core.setOutput('is_transient', isTransient);
 
+            core.setOutput('error_summary', summary || '');
+
             console.log(`Error Classification:`);
             console.log(`  Category: ${errorInfo.category}`);
             console.log(`  Type: ${errorType}`);
             console.log(`  Recovery: ${errorInfo.recovery}`);
+
+      - name: Write error summary to GITHUB_STEP_SUMMARY
+        if: always() && steps.run_claude.outputs.exit-code != '0'
+        env:
+          EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }}
+          OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }}
+          ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }}
+          ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }}
+          ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }}
+          MODE: ${{ inputs.mode }}
+          PR_NUMBER: ${{ inputs.pr_number }}
+        run: |
+          set -euo pipefail
+          {
+            echo "## Claude Run Failed"
+            echo ""
+            echo "| Field | Value |"
+            echo "|-------|-------|"
+            echo "| Mode | ${MODE:-unknown} |"
+            echo "| Exit Code | ${EXIT_CODE:-unknown} |"
+            echo "| Error Category | ${ERROR_CATEGORY:-unknown} |"
+            echo "| Error Type | ${ERROR_TYPE:-unknown} |"
+            if [ -n "${PR_NUMBER:-}" ]; then
+              echo "| PR | #${PR_NUMBER} |"
+            fi
+            echo ""
+            echo "### Recovery Guidance"
+            echo ""
+            echo "${ERROR_RECOVERY:-Check logs for more details.}"
+            echo ""
+            if [ -n "${OUTPUT_SUMMARY:-}" ]; then
+              echo "### Output Summary"
+              echo ""
+              echo '```'
+              echo "${OUTPUT_SUMMARY}"
+              echo '```'
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Create error diagnostics artifact
+        if: always() && steps.run_claude.outputs.exit-code != '0'
+        env:
+          EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }}
+          OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }}
+          ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }}
+          ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }}
+          ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }}
+          IS_TRANSIENT: ${{ steps.classify_failure.outputs.is_transient }}
+          MODE: ${{ inputs.mode }}
+          PR_NUMBER: ${{ inputs.pr_number }}
+          RUN_URL: >-
+            ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        run: |
+          set -euo pipefail
+          mkdir -p error-diagnostics
+
+          # Create JSON diagnostics file
+          cat > error-diagnostics/diagnostics.json << JSONEOF
+          {
+            "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
+            "run_id": "${{ github.run_id }}",
+            "run_url": "${RUN_URL}",
+            "agent": "claude",
+            "mode": "${MODE:-unknown}",
+            "pr_number": "${PR_NUMBER:-}",
+            "exit_code": "${EXIT_CODE:-unknown}",
+            "error_category": "${ERROR_CATEGORY:-unknown}",
+            "error_type": "${ERROR_TYPE:-unknown}",
+            "is_transient": ${IS_TRANSIENT:-false},
+            "recovery_guidance": "${ERROR_RECOVERY:-unknown}"
+          }
+          JSONEOF
+
+          # Copy claude output if available
+          for f in claude-output*.md; do
+            [ -f "$f" ] && cp "$f" error-diagnostics/ && break
+          done 2>/dev/null || true
+
+          echo "Created error diagnostics in error-diagnostics/"
+
+      - name: Upload error diagnostics
+        if: always() && steps.run_claude.outputs.exit-code != '0'
+        uses: actions/upload-artifact@v6
+        with:
+          name: error-diagnostics-${{ inputs.mode }}-${{ github.run_id }}
+          path: error-diagnostics/
+          retention-days: 30
+
+      - name: Post PR comment on non-transient failure
+        if: >-
+          always() && steps.run_claude.outputs.exit-code != '0' &&
+          steps.classify_failure.outputs.is_transient != 'true' && inputs.pr_number != ''
+        uses: actions/github-script@v8
+        env:
+          PR_NUMBER: ${{ inputs.pr_number }}
+          EXIT_CODE: ${{ steps.run_claude.outputs.exit-code }}
+          ERROR_CATEGORY: ${{ steps.classify_failure.outputs.error_category }}
+          ERROR_TYPE: ${{ steps.classify_failure.outputs.error_type }}
+          ERROR_RECOVERY: ${{ steps.classify_failure.outputs.error_recovery }}
+          OUTPUT_SUMMARY: ${{ steps.run_claude.outputs.final-message-summary }}
+          MODE: ${{ inputs.mode }}
+          RUN_URL: >-
+            ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        with:
+          script: |
+            const fs = require('fs');
+            let withRetry;
+            const retryPath = './.workflows-lib/.github/scripts/github-api-with-retry.js';
+            const localRetryPath = './.github/scripts/github-api-with-retry.js';
+            if (fs.existsSync(retryPath)) {
+              ({ withRetry } = require(retryPath));
+            } else if (fs.existsSync(localRetryPath)) {
+              ({ withRetry } = require(localRetryPath));
+            } else {
+              // Inline fallback: single attempt, no retry
+              withRetry = (fn) => fn();
+            }
+
+            const prNumber = parseInt(process.env.PR_NUMBER, 10);
+            if (!prNumber || prNumber <= 0) {
+              console.log('No valid PR number, skipping comment');
+              return;
+            }
+
+            const exitCode = process.env.EXIT_CODE || 'unknown';
+            const category = process.env.ERROR_CATEGORY || 'unknown';
+            const errorType = process.env.ERROR_TYPE || 'unknown';
+            const recovery = process.env.ERROR_RECOVERY || 'Check logs for details.';
+            const summary = process.env.OUTPUT_SUMMARY || 'No output captured';
+            const mode = process.env.MODE || 'unknown';
+            const runUrl = process.env.RUN_URL || '';
+
+            const marker = '<!-- claude-failure-notification -->';
+
+            const body = `${marker}
+            ## Claude ${mode} run failed
+
+            | Field | Value |
+            |-------|-------|
+            | Exit Code | \`${exitCode}\` |
+            | Error Category | \`${category}\` |
+            | Error Type | \`${errorType}\` |
+            | Run | [View logs](${runUrl}) |
+
+            ### Suggested Recovery
+
+            ${recovery}
+
+            ### What to do
+
+            1. Check the [workflow logs](${runUrl}) for detailed error output
+            2. If this is a configuration issue, update the relevant settings
+            3. If the error persists, consider adding the \`needs-human\` label for manual review
+            4. Re-run the workflow once the issue is resolved
+
+            <details>
+            <summary>Output summary</summary>
+
+            \`\`\`
+            ${summary.slice(0, 500)}
+            \`\`\`
+
+            </details>
+            `.trim().split('\n').map(l => l.trim()).join('\n');
+
+            // Check if we already have a failure comment
+            const { data: comments } = await withRetry(() =>
+              github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                per_page: 100,
+              })
+            );
+
+            const existingComment = comments.find(c => c.body && c.body.includes(marker));
+
+            if (existingComment) {
+              await withRetry(() =>
+                github.rest.issues.updateComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  comment_id: existingComment.id,
+                  body,
+                })
+              );
+              console.log(`Updated existing failure comment: ${existingComment.html_url}`);
+            } else {
+              const { data: newComment } = await withRetry(() =>
+                github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: prNumber,
+                  body,
+                })
+              );
+              console.log(`Created failure comment: ${newComment.html_url}`);
+            }

From 92809586dc13721dd7fcf4c259ecd2c9720c3c9f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 03:21:00 +0000
Subject: [PATCH 10/12] fix: address code review feedback from Codex and
 Copilot

Claude runner (reusable-claude-run.yml):
- Fix shell quoting of completed-tasks JSON by using env vars instead
  of inline ${{ }} expansion which breaks on apostrophes in task names
- Declare OPENAI_API_KEY and CLAUDE_API_STRANSKE in workflow_call.secrets
  so callers can pass them (matches Codex runner)
- Use printf instead of echo when writing PR body to disk to avoid
  mangling of -n/-e prefixes or backslashes
- Add info log when falling back to codex-prompt file

Codex runner (reusable-codex-run.yml):
- Gate watchdog-saved=true on actual push success instead of emitting
  it unconditionally after push attempts that may have both failed
- Use a fired-flag file so the watchdog kill only terminates the
  background process if it's still sleeping (hasn't started its
  commit/push work yet)

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/workflows/reusable-claude-run.yml | 42 ++++++++++++++++-------
 .github/workflows/reusable-codex-run.yml  | 26 +++++++++++---
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/reusable-claude-run.yml b/.github/workflows/reusable-claude-run.yml
index 9827d6819..9e1517c26 100644
--- a/.github/workflows/reusable-claude-run.yml
+++ b/.github/workflows/reusable-claude-run.yml
@@ -105,6 +105,13 @@ on:
         required: false
       WORKFLOWS_APP_PRIVATE_KEY:
         required: false
+      OPENAI_API_KEY:
+        required: false
+        description: >-
+          OpenAI API key for LLM analysis (enables model selection beyond GitHub Models)
+      CLAUDE_API_STRANSKE:
+        required: false
+        description: 'Anthropic API key for LLM analysis (enables Claude slot)'
     outputs:
       final-message:
         description: 'Full Claude output message (base64 encoded)'
@@ -1270,7 +1277,7 @@ jobs:
           fi
 
           # Save PR body to temp file
-          echo "$PR_BODY" > pr_body.md
+          printf '%s' "$PR_BODY" > pr_body.md
 
           # Run full LLM analysis and save JSON output
           # The analyze_codex_session.py script auto-detects text vs JSONL input,
@@ -1331,18 +1338,23 @@ jobs:
       - name: Compatibility outputs (LLM analysis)
         id: compat
         if: always()
+        env:
+          LLM_RAN: ${{ steps.llm_analysis.outputs.llm-analysis-run }}
+          LLM_PROVIDER: ${{ steps.llm_analysis.outputs.provider }}
+          LLM_MODEL: ${{ steps.llm_analysis.outputs.model }}
+          LLM_CONFIDENCE: ${{ steps.llm_analysis.outputs.confidence }}
+          LLM_COMPLETED_TASKS: ${{ steps.llm_analysis.outputs.completed-tasks }}
         run: |
           # If LLM analysis ran, forward its outputs; otherwise emit placeholders.
-          if [ "${{ steps.llm_analysis.outputs.llm-analysis-run }}" = "true" ]; then
+          if [ "${LLM_RAN}" = "true" ]; then
             {
               echo "llm-analysis-run=true"
-              echo "llm-provider=${{ steps.llm_analysis.outputs.provider }}"
-              echo "llm-model=${{ steps.llm_analysis.outputs.model }}"
-              echo "llm-confidence=${{ steps.llm_analysis.outputs.confidence }}"
-              echo "llm-completed-tasks=${{ steps.llm_analysis.outputs.completed-tasks }}"
+              echo "llm-provider=${LLM_PROVIDER}"
+              echo "llm-model=${LLM_MODEL}"
+              echo "llm-confidence=${LLM_CONFIDENCE}"
+              echo "llm-completed-tasks=${LLM_COMPLETED_TASKS}"
               has_completions="false"
-              tasks='${{ steps.llm_analysis.outputs.completed-tasks }}'
-              if [ -n "$tasks" ] && [ "$tasks" != "[]" ]; then
+              if [ -n "${LLM_COMPLETED_TASKS}" ] && [ "${LLM_COMPLETED_TASKS}" != "[]" ]; then
                 has_completions="true"
               fi
               echo "llm-has-completions=${has_completions}"
@@ -1375,7 +1387,9 @@ jobs:
               : './.github/scripts/post_completion_comment.js';
             const { postCompletionComment } = require(modulePath);
 
-            // Determine prompt file — Claude uses claude-prompt*.md
+            // Determine prompt file — prefer PR-specific variant, then generic.
+            // The prompt file name is passed to postCompletionComment which uses
+            // it as the base name; it also checks for PR-specific variants internally.
             const prNumber = process.env.PR_NUMBER || '';
             let promptFile = 'claude-prompt.md';
             if (prNumber) {
@@ -1384,11 +1398,13 @@ jobs:
                 promptFile = prSpecific;
               }
             }
-            // Fall back to codex-prompt*.md if claude variant not found
+            // Also check codex-prompt as shared belt PRs use that naming.
+            // Only fall back when no claude-prompt variant exists at all.
             if (!fs.existsSync(promptFile)) {
-              const codexPrompt = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md';
-              if (fs.existsSync(codexPrompt)) {
-                promptFile = codexPrompt;
+              const codexFallback = prNumber ? `codex-prompt-${prNumber}.md` : 'codex-prompt.md';
+              if (fs.existsSync(codexFallback)) {
+                core.info(`No claude-prompt file found; using ${codexFallback}`);
+                promptFile = codexFallback;
               }
             }
 
diff --git a/.github/workflows/reusable-codex-run.yml b/.github/workflows/reusable-codex-run.yml
index f5dfa67be..c69303a34 100644
--- a/.github/workflows/reusable-codex-run.yml
+++ b/.github/workflows/reusable-codex-run.yml
@@ -951,8 +951,10 @@ jobs:
           WATCHDOG_DELAY=$(( (MAX_RUNTIME_MIN - GRACE_MIN) * 60 ))
           echo "watchdog-saved=false" >> "$GITHUB_OUTPUT"
           if [ "$WATCHDOG_DELAY" -gt 60 ]; then
+            WATCHDOG_FIRED_FLAG="/tmp/.watchdog-fired-$$"
             (
               sleep "$WATCHDOG_DELAY"
+              touch "$WATCHDOG_FIRED_FLAG"
               echo "::warning::Pre-timeout watchdog fired (${GRACE_MIN}m before ${MAX_RUNTIME_MIN}m limit)"
 
               TARGET_BRANCH="${{ inputs.pr_ref }}"
@@ -987,7 +989,10 @@ jobs:
                   fi
                 fi
                 # Push with one retry
-                if ! git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then
+                watchdog_push_ok=false
+                if git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then
+                  watchdog_push_ok=true
+                else
                   echo "::warning::Watchdog push failed (attempt 1), retrying after fetch/rebase..."
                   sleep 3
                   git fetch "${REMOTE_URL}" "${TARGET_BRANCH}" 2>/dev/null || true
@@ -998,10 +1003,18 @@ jobs:
                         --allow-unrelated-histories 2>/dev/null || true
                     }
                   fi
-                  git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null || \
+                  if git push "${REMOTE_URL}" "HEAD:${TARGET_BRANCH}" 2>/dev/null; then
+                    watchdog_push_ok=true
+                  else
                     echo "::warning::Watchdog push failed after retry"
+                  fi
+                fi
+                if [ "$watchdog_push_ok" = "true" ]; then
+                  echo "watchdog-saved=true" >> "$GITHUB_OUTPUT"
+                else
+                  echo "::error::Watchdog: committed locally but failed to push"
+                  echo "watchdog-saved=false" >> "$GITHUB_OUTPUT"
                 fi
-                echo "watchdog-saved=true" >> "$GITHUB_OUTPUT"
               else
                 echo "::notice::Watchdog: no uncommitted or unpushed work to save"
               fi
@@ -1026,9 +1039,12 @@ jobs:
           prompt_content="$(cat "$PROMPT_FILE")"
           "${cmd[@]}" "$prompt_content" > "$SESSION_JSONL" 2>&1 || CODEX_EXIT=$?
 
-          # Kill watchdog if Codex finished before the timer fired
+          # Kill watchdog only if it hasn't fired yet. If it has already
+          # fired (flag file exists), it may be committing/pushing — let it finish.
           if [ -n "${WATCHDOG_PID:-}" ]; then
-            kill "$WATCHDOG_PID" 2>/dev/null || true
+            if [ ! -f "${WATCHDOG_FIRED_FLAG:-/tmp/.no-such-flag}" ]; then
+              kill "$WATCHDOG_PID" 2>/dev/null || true
+            fi
             wait "$WATCHDOG_PID" 2>/dev/null || true
           fi
 

From 65e89c51a3b1c1003daf8596e319df12844fcd44 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 03:53:16 +0000
Subject: [PATCH 11/12] fix: address sync PR review feedback from coding agents

- Remove "task" from the non-issue prefix filter in
  extractIssueNumberFromPull so "Task #123" is correctly treated as
  an issue reference (flagged by Codex on PAEM sync PR)
- Make --legacy-peer-deps retry conditional on ERESOLVE/peer-dep
  errors instead of only firing on the first attempt (flagged by
  Copilot on TMP sync PR)
- Add test for "Task #N" being treated as a valid issue ref

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/actions/setup-api-client/action.yml                 | 6 +++---
 .github/scripts/__tests__/agents-pr-meta-keepalive.test.js  | 5 +++++
 .github/scripts/agents_pr_meta_keepalive.js                 | 2 +-
 .../.github/actions/setup-api-client/action.yml             | 6 +++---
 .../.github/scripts/agents_pr_meta_keepalive.js             | 2 +-
 5 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml
index 5fead358b..b9539f4a9 100644
--- a/.github/actions/setup-api-client/action.yml
+++ b/.github/actions/setup-api-client/action.yml
@@ -271,9 +271,9 @@ runs:
             echo "$npm_err"
             echo "::endgroup::"
 
-            # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict
-            if [ "$attempt" -eq 1 ]; then
-              echo "::warning::Retrying with --legacy-peer-deps"
+            # On peer-dep / ERESOLVE failures, also try --legacy-peer-deps
+            if echo "$npm_err" | grep -qiE 'ERESOLVE|peer dep|Could not resolve dependency'; then
+              echo "::warning::Detected peer dependency conflict, retrying with --legacy-peer-deps"
               npm_output=$(mktemp)
               if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then
                 rm -f "$npm_output"
diff --git a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js
index 64cec070d..e653529bb 100644
--- a/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js
+++ b/.github/scripts/__tests__/agents-pr-meta-keepalive.test.js
@@ -590,6 +590,11 @@ test('extractIssueNumberFromPull skips "step #N" in body', () => {
   assert.equal(extractIssueNumberFromPull(pull), null);
 });
 
+test('extractIssueNumberFromPull treats "Task #N" as a valid issue ref', () => {
+  const pull = { body: 'Task #42 is ready for review', head: { ref: 'feature' }, title: 'stuff' };
+  assert.equal(extractIssueNumberFromPull(pull), 42);
+});
+
 test('extractIssueNumberFromPull skips "version #N" in body', () => {
   const pull = { body: 'Upgraded to version #4', head: { ref: 'feature' }, title: 'stuff' };
   assert.equal(extractIssueNumberFromPull(pull), null);
diff --git a/.github/scripts/agents_pr_meta_keepalive.js b/.github/scripts/agents_pr_meta_keepalive.js
index 10eab2c81..32cfa95c8 100644
--- a/.github/scripts/agents_pr_meta_keepalive.js
+++ b/.github/scripts/agents_pr_meta_keepalive.js
@@ -242,7 +242,7 @@ function extractIssueNumberFromPull(pull) {
     }
     // Skip non-issue refs like "Run #123", "run #123", "attempt #2"
     const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index);
-    if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) {
+    if (/\b(?:run|attempt|step|job|check|version|v)\s*$/i.test(preceding)) {
       continue;
     }
     candidates.push(match[1]);
diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml
index 24736497c..b6ed888ad 100644
--- a/templates/consumer-repo/.github/actions/setup-api-client/action.yml
+++ b/templates/consumer-repo/.github/actions/setup-api-client/action.yml
@@ -271,9 +271,9 @@ runs:
             echo "$npm_err"
             echo "::endgroup::"
 
-            # On first failure, also try --legacy-peer-deps in case it's a peer dep conflict
-            if [ "$attempt" -eq 1 ]; then
-              echo "::warning::Retrying with --legacy-peer-deps"
+            # On peer-dep / ERESOLVE failures, also try --legacy-peer-deps
+            if echo "$npm_err" | grep -qiE 'ERESOLVE|peer dep|Could not resolve dependency'; then
+              echo "::warning::Detected peer dependency conflict, retrying with --legacy-peer-deps"
               npm_output=$(mktemp)
               if npm install --no-save --legacy-peer-deps --location=project "${NPM_PACKAGES[@]}" 2>"$npm_output"; then
                 rm -f "$npm_output"
diff --git a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js
index 10eab2c81..32cfa95c8 100644
--- a/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js
+++ b/templates/consumer-repo/.github/scripts/agents_pr_meta_keepalive.js
@@ -242,7 +242,7 @@ function extractIssueNumberFromPull(pull) {
     }
     // Skip non-issue refs like "Run #123", "run #123", "attempt #2"
     const preceding = bodyText.slice(Math.max(0, match.index - 20), match.index);
-    if (/\b(?:run|attempt|step|job|check|task|version|v)\s*$/i.test(preceding)) {
+    if (/\b(?:run|attempt|step|job|check|version|v)\s*$/i.test(preceding)) {
       continue;
     }
     candidates.push(match[1]);

From 9e89707e896fde87d63bf2b99ad46a9e52162a4b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Feb 2026 05:13:51 +0000
Subject: [PATCH 12/12] fix: install js-yaml locally instead of globally in
 label sync workflow

The label sync workflow (maint-69-sync-labels.yml) has been failing
since Feb 2 because npm install -g js-yaml installs to the global
prefix which actions/github-script can't resolve. Install locally
so Node's module resolution finds it in node_modules/.

https://claude.ai/code/session_01JhCWWDJG8PqwaSbVPCGfm6
---
 .github/workflows/maint-69-sync-labels.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/maint-69-sync-labels.yml b/.github/workflows/maint-69-sync-labels.yml
index 965c4898f..8f73338eb 100644
--- a/.github/workflows/maint-69-sync-labels.yml
+++ b/.github/workflows/maint-69-sync-labels.yml
@@ -40,7 +40,7 @@ jobs:
           github_token: ${{ github.token }}
 
       - name: Install js-yaml
-        run: npm install -g js-yaml
+        run: npm install js-yaml
 
       - name: Parse labels-core.yml
         id: parse