diff --git a/.github/scripts/__tests__/agents-verifier-context.test.js b/.github/scripts/__tests__/agents-verifier-context.test.js index 4a6b69397..88479f200 100644 --- a/.github/scripts/__tests__/agents-verifier-context.test.js +++ b/.github/scripts/__tests__/agents-verifier-context.test.js @@ -424,16 +424,22 @@ test('buildVerifierContext selects CI results for the merge commit SHA', async ( workflow_name: 'Gate', conclusion: 'success', run_url: 'https://ci/gate-merge', + error_category: '', + error_message: '', }, { workflow_name: 'Selftest CI', conclusion: 'success', run_url: 'https://ci/selftest-merge', + error_category: '', + error_message: '', }, { workflow_name: 'PR 11 - Minimal invariant CI', conclusion: 'success', run_url: 'https://ci/pr11-merge', + error_category: '', + error_message: '', }, ]); @@ -546,16 +552,22 @@ test('buildVerifierContext falls back to head SHA when merge runs are missing', workflow_name: 'Gate', conclusion: 'success', run_url: 'https://ci/pr-00-gate.yml', + error_category: '', + error_message: '', }, { workflow_name: 'Selftest CI', conclusion: 'success', run_url: 'https://ci/selftest-ci.yml', + error_category: '', + error_message: '', }, { workflow_name: 'PR 11 - Minimal invariant CI', conclusion: 'success', run_url: 'https://ci/pr-11-ci-smoke.yml', + error_category: '', + error_message: '', }, ]); for (const workflowId of workflowIds) { @@ -627,16 +639,22 @@ test('buildVerifierContext uses merge commit SHA for push events', async () => { workflow_name: 'Gate', conclusion: 'success', run_url: 'https://ci/gate-push', + error_category: '', + error_message: '', }, { workflow_name: 'Selftest CI', conclusion: 'success', run_url: 'https://ci/selftest-push', + error_category: '', + error_message: '', }, { workflow_name: 'PR 11 - Minimal invariant CI', conclusion: 'success', run_url: 'https://ci/pr11-push', + error_category: '', + error_message: '', }, ]); diff --git a/.github/scripts/__tests__/verifier-ci-query.test.js b/.github/scripts/__tests__/verifier-ci-query.test.js index 2b29a91ee..1f39b38ec 100644 --- a/.github/scripts/__tests__/verifier-ci-query.test.js +++ b/.github/scripts/__tests__/verifier-ci-query.test.js @@ -14,7 +14,9 @@ const buildGithubStub = ({ actions: { async listWorkflowRuns({ workflow_id: workflowId, head_sha: headSha }) { if (errorWorkflow && workflowId === errorWorkflow) { - throw new Error('boom'); + const error = new Error('boom'); + error.status = 404; + throw error; } if (listWorkflowRunsHook) { const hooked = await listWorkflowRunsHook({ workflow_id: workflowId, head_sha: headSha }); @@ -59,16 +61,22 @@ test('queryVerifierCiResults selects runs and reports conclusions', async () => workflow_name: 'Gate', conclusion: 'success', run_url: 'gate-url', + error_category: '', + error_message: '', }); assert.deepEqual(results[1], { workflow_name: 'Selftest CI', conclusion: 'in_progress', run_url: 'selftest-url', + error_category: '', + error_message: '', }); assert.deepEqual(results[2], { workflow_name: 'PR 11', conclusion: 'not_found', run_url: '', + error_category: '', + error_message: '', }); }); @@ -97,11 +105,13 @@ test('queryVerifierCiResults supports workflowId/workflowName aliases', async () workflow_name: 'Selftest CI', conclusion: 'success', run_url: 'selftest-alias-url', + error_category: '', + error_message: '', }, ]); }); -test('queryVerifierCiResults treats query errors as not_found', async () => { +test('queryVerifierCiResults treats query errors as api_error', async () => { const github = buildGithubStub({ errorWorkflow: 'pr-00-gate.yml' }); const context = { repo: { owner: 'octo', repo: 'workflows' } }; const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }]; @@ -116,8 +126,10 @@ test('queryVerifierCiResults treats query errors as not_found', async () => { assert.deepEqual(results, [ { workflow_name: 'Gate', - conclusion: 'not_found', + conclusion: 'api_error', run_url: '', + error_category: 'resource', + error_message: 'listWorkflowRuns:pr-00-gate.yml failed after 1 attempt(s): boom', }, ]); }); @@ -145,6 +157,8 @@ test('queryVerifierCiResults uses latest run when no target SHA is provided', as workflow_name: 'Gate', conclusion: 'success', run_url: 'gate-latest-url', + error_category: '', + error_message: '', }, ]); }); @@ -184,6 +198,8 @@ test('queryVerifierCiResults falls back to secondary SHA when primary has no run workflow_name: 'Gate', conclusion: 'success', run_url: 'head-url', + error_category: '', + error_message: '', }, ]); assert.deepEqual(headShas, ['merge-sha', 'head-sha']); @@ -216,16 +232,22 @@ test('queryVerifierCiResults falls back to default workflows', async () => { workflow_name: 'Gate', conclusion: 'success', run_url: 'gate-default-url', + error_category: '', + error_message: '', }, { workflow_name: 'Selftest CI', conclusion: 'failure', run_url: 'selftest-default-url', + error_category: '', + error_message: '', }, { workflow_name: 'PR 11 - Minimal invariant CI', conclusion: 'success', run_url: 'pr11-default-url', + error_category: '', + error_message: '', }, ]); }); @@ -251,6 +273,8 @@ test('queryVerifierCiResults uses API url when html_url is missing', async () => workflow_name: 'Gate', conclusion: 'success', run_url: 'api-url', + error_category: '', + error_message: '', }, ]); }); @@ -276,6 +300,89 @@ test('queryVerifierCiResults treats completed runs without conclusion as unknown workflow_name: 'Gate', conclusion: 'unknown', run_url: 'gate-url', + error_category: '', + error_message: '', + }, + ]); +}); + +test('queryVerifierCiResults retries transient errors and returns success', async () => { + let attempts = 0; + const warnings = []; + const github = buildGithubStub({ + listWorkflowRunsHook: async () => { + attempts += 1; + if (attempts < 3) { + const error = new Error('Service unavailable'); + error.status = 503; + throw error; + } + return { + data: { + workflow_runs: [ + { head_sha: 'retry-sha', conclusion: 'success', html_url: 'retry-url' }, + ], + }, + }; + }, + }); + const context = { repo: { owner: 'octo', repo: 'workflows' } }; + const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }]; + + const results = await queryVerifierCiResults({ + github, + context, + targetSha: 'retry-sha', + workflows, + core: { warning: (message) => warnings.push(String(message)) }, + retryOptions: { sleepFn: async () => {} }, + }); + + assert.equal(attempts, 3); + assert.equal(warnings.length, 2); + assert.deepEqual(results, [ + { + workflow_name: 'Gate', + conclusion: 'success', + run_url: 'retry-url', + error_category: '', + error_message: '', + }, + ]); +}); + +test('queryVerifierCiResults returns api_error after max retries', async () => { + let attempts = 0; + const warnings = []; + const github = buildGithubStub({ + listWorkflowRunsHook: async () => { + attempts += 1; + const error = new Error('timeout'); + error.status = 504; + throw error; + }, + }); + const context = { repo: { owner: 'octo', repo: 'workflows' } }; + const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }]; + + const results = await queryVerifierCiResults({ + github, + context, + targetSha: 'retry-sha', + workflows, + core: { warning: (message) => warnings.push(String(message)) }, + retryOptions: { sleepFn: async () => {} }, + }); + + assert.equal(attempts, 4); + assert.equal(warnings.length, 4); + assert.deepEqual(results, [ + { + workflow_name: 'Gate', + conclusion: 'api_error', + run_url: '', + error_category: 'transient', + error_message: 'listWorkflowRuns:pr-00-gate.yml failed after 4 attempt(s): timeout', }, ]); }); diff --git a/.github/scripts/verifier_ci_query.js b/.github/scripts/verifier_ci_query.js index 17bdc3876..19a833290 100644 --- a/.github/scripts/verifier_ci_query.js +++ b/.github/scripts/verifier_ci_query.js @@ -1,11 +1,16 @@ 'use strict'; +const { classifyError, ERROR_CATEGORIES } = require('./error_classifier'); + const DEFAULT_WORKFLOWS = [ { workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }, { workflow_name: 'Selftest CI', workflow_id: 'selftest-ci.yml' }, { workflow_name: 'PR 11 - Minimal invariant CI', workflow_id: 'pr-11-ci-smoke.yml' }, ]; +const DEFAULT_RETRY_DELAYS_MS = [1000, 2000, 4000]; +const DEFAULT_MAX_RETRIES = DEFAULT_RETRY_DELAYS_MS.length; + function normalizeConclusion(run) { if (!run) { return 'not_found'; @@ -19,42 +24,114 @@ function normalizeConclusion(run) { return 'unknown'; } -async function fetchWorkflowRun({ github, owner, repo, workflowId, headShas, core }) { +function getErrorCategory(error) { + if (error && error.category) { + return error.category; + } + return classifyError(error).category; +} + +async function sleep(ms) { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + +function buildRetryError(error, category, label, attempts) { + const message = error?.message || 'Unknown error'; + const retryError = new Error(`${label} failed after ${attempts} attempt(s): ${message}`); + retryError.cause = error; + retryError.category = category; + return retryError; +} + +async function withRetry(apiCall, options = {}) { + const { + label = 'GitHub API call', + delays = DEFAULT_RETRY_DELAYS_MS, + core = null, + sleepFn = sleep, + } = options; + + let lastError = null; + for (let attempt = 0; attempt <= delays.length; attempt += 1) { + try { + return await apiCall(); + } catch (error) { + lastError = error; + const category = getErrorCategory(error); + const canRetry = category === ERROR_CATEGORIES.transient && attempt < delays.length; + + if (!canRetry) { + throw buildRetryError(error, category, label, attempt + 1); + } + + const delayMs = delays[attempt]; + if (core?.warning) { + core.warning( + `Retrying ${label}; category=${category} attempt=${attempt + 1}/${delays.length + 1} delayMs=${delayMs}` + ); + } + await sleepFn(delayMs); + } + } + + throw buildRetryError(lastError || new Error('Unknown error'), ERROR_CATEGORIES.unknown, label, delays.length + 1); +} + +async function fetchWorkflowRun({ + github, + owner, + repo, + workflowId, + headShas, + core, + retryOptions, +}) { const candidates = Array.isArray(headShas) ? headShas.map((sha) => String(sha || '').trim()).filter(Boolean) : []; try { if (!candidates.length) { - const response = await github.rest.actions.listWorkflowRuns({ - owner, - repo, - workflow_id: workflowId, - per_page: 10, - }); + const response = await withRetry( + () => + github.rest.actions.listWorkflowRuns({ + owner, + repo, + workflow_id: workflowId, + per_page: 10, + }), + { label: `listWorkflowRuns:${workflowId}`, core, ...retryOptions } + ); const runs = response?.data?.workflow_runs || []; - return runs[0] || null; + return { run: runs[0] || null, error: null }; } for (const sha of candidates) { - const response = await github.rest.actions.listWorkflowRuns({ - owner, - repo, - workflow_id: workflowId, - head_sha: sha, - per_page: 10, - }); + const response = await withRetry( + () => + github.rest.actions.listWorkflowRuns({ + owner, + repo, + workflow_id: workflowId, + head_sha: sha, + per_page: 10, + }), + { label: `listWorkflowRuns:${workflowId}`, core, ...retryOptions } + ); const runs = response?.data?.workflow_runs || []; if (!runs.length) { continue; } const exact = runs.find((run) => run.head_sha === sha); - return exact || runs[0]; + return { run: exact || runs[0], error: null }; } - return null; + return { run: null, error: null }; } catch (error) { - core?.warning?.(`Failed to fetch workflow runs for ${workflowId}: ${error.message}`); - return null; + const category = getErrorCategory(error); + core?.warning?.( + `Failed to fetch workflow runs for ${workflowId}: ${error.message}; category=${category}` + ); + return { run: null, error: { category, message: error.message } }; } } @@ -65,6 +142,7 @@ async function queryVerifierCiResults({ targetSha, targetShas, workflows, + retryOptions, } = {}) { const { owner, repo } = context.repo; const candidates = []; @@ -86,18 +164,22 @@ async function queryVerifierCiResults({ for (const target of targets) { const workflowId = target.workflow_id || target.workflowId; const workflowName = target.workflow_name || target.workflowName || workflowId || 'workflow'; - const run = await fetchWorkflowRun({ + const { run, error } = await fetchWorkflowRun({ github, owner, repo, workflowId, headShas: candidates, core, + retryOptions, }); + const conclusion = error ? 'api_error' : normalizeConclusion(run); results.push({ workflow_name: workflowName, - conclusion: normalizeConclusion(run), + conclusion, run_url: run?.html_url || run?.url || '', + error_category: error?.category || '', + error_message: error?.message || '', }); } diff --git a/agents/codex-164.md b/agents/codex-164.md new file mode 100644 index 000000000..408c743f2 --- /dev/null +++ b/agents/codex-164.md @@ -0,0 +1 @@ + diff --git a/codex-output.md b/codex-output.md index 41ec4ca5f..665ff2be9 100644 --- a/codex-output.md +++ b/codex-output.md @@ -1,4 +1,6 @@ -Updated verifier guidance to explicitly require marking test-related criteria NOT MET when CI evidence is missing, and tightened the prompt test to enforce the new instruction. I also reconciled the task checklist and acceptance criteria in `codex-prompt.md` to reflect the verified CI integration work. +Updated the verifier context expectations to include the new `error_category`/`error_message` fields emitted by `queryVerifierCiResults`, so the deep-equal assertions match the current output shape in `.github/scripts/__tests__/agents-verifier-context.test.js`. -Tests: -- `node --test .github/scripts/__tests__/verifier-acceptance-prompt.test.js .github/scripts/__tests__/verifier-ci-query.test.js .github/scripts/__tests__/agents-verifier-context.test.js` \ No newline at end of file +Tests run: +- `node .github/scripts/__tests__/agents-verifier-context.test.js` + +If you want, I can re-run the full suite with `node --test .github/scripts/__tests__/*.test.js`. \ No newline at end of file diff --git a/codex-prompt.md b/codex-prompt.md index bad0d6809..6554849af 100644 --- a/codex-prompt.md +++ b/codex-prompt.md @@ -101,83 +101,23 @@ You should assume you're running in `agent-standard` unless explicitly told othe # Task Prompt -# Keepalive Next Task +# Autofix from CI failure -Your objective is to satisfy the **Acceptance Criteria** by completing each **Task** within the defined **Scope**. +You are Codex running in autofix mode after a CI failure. Use the available logs and repository context to repair the failing checks. -**This round you MUST:** -1. Implement actual code or test changes that advance at least one incomplete task toward acceptance. -2. Commit meaningful source code (.py, .yml, .js, etc.)—not just status/docs updates. -3. Mark a task checkbox complete ONLY after verifying the implementation works. -4. Focus on the FIRST unchecked task unless blocked, then move to the next. - -**Guidelines:** -- Keep edits scoped to the current task rather than reshaping the entire PR. -- Use repository instructions, conventions, and tests to validate work. -- Prefer small, reviewable commits; leave clear notes when follow-up is required. -- Do NOT work on unrelated improvements until all PR tasks are complete. - -**The Tasks and Acceptance Criteria are provided in the appendix below.** Work through them in order. +Guidance: +- Inspect the latest CI output provided by the caller (logs or summaries) to pinpoint the root cause. +- Focus on minimal, targeted fixes that unblock the failing job. +- Leave diagnostic breadcrumbs when a failure cannot be reproduced or fully addressed. +- Re-run or suggest the smallest relevant checks to verify the fix. ## Run context ---- -## PR Tasks and Acceptance Criteria - -**Progress:** 21/21 tasks complete, 0 remaining - -### ⚠️ IMPORTANT: Task Reconciliation Required - -The previous iteration changed **2 file(s)** but did not update task checkboxes. - -**Before continuing, you MUST:** -1. Review the recent commits to understand what was changed -2. Determine which task checkboxes should be marked complete -3. Update the PR body to check off completed tasks -4. Then continue with remaining tasks - -_Failure to update checkboxes means progress is not being tracked properly._ - -### Scope -- [ ] The post-merge verifier (agents-verifier.yml) currently runs tests locally in a read-only sandbox to verify acceptance criteria. This approach has critical flaws exposed by PR #154: -- [ ] 1. **Stale State**: Verifier may run against incomplete merge state, causing false test failures -- [ ] 2. **No CI Access**: Cannot verify "Selftest CI passes" criterion - always marks as NOT MET -- [ ] 3. **Duplicate Work**: Re-runs tests that CI already validated, wasting compute -- [ ] 4. **False Negatives**: PR #154 created Issue #155 claiming 4 test suites failed when all 301 tests actually pass -- [ ] ### Evidence from Issue #155 -- [ ] The verifier incorrectly reported: -- [ ] - `agents-verifier-context.test.js` failing (unrelated to PR scope) -- [ ] - `comment-dedupe.test.js` failing (unrelated to PR scope) -- [ ] - `coverage-normalize.test.js` failing (unrelated to PR scope) -- [ ] - `maint-post-ci.test.js` failing (unrelated to PR scope) -- [ ] Reality: All tests pass. The verifier created a bogus follow-up issue that would have caused duplicate work. - -### Tasks -Complete these in order. Mark checkbox done ONLY after implementation is verified: - -- [x] ### Round 1: Add CI result querying -- [x] Create `verifier_ci_query.js` script to fetch workflow run results for a commit -- [x] Query Gate, Selftest CI, and PR 11 workflow conclusions -- [x] Return structured data: `{ workflow_name, conclusion, run_url }` -- [x] ### Round 2: Integrate into verifier context -- [x] Modify `agents_verifier_context.js` to include CI results in context -- [x] Add "CI Verification" section to verifier prompt with actual results -- [x] Remove instruction to run tests locally (rely on CI results) -- [x] ### Round 3: Update verifier prompt -- [x] Update `.github/codex/prompts/verifier_acceptance_check.md` -- [x] Instruct verifier to check CI results section instead of running tests -- [x] Keep file existence and pattern checks as local verification -- [x] ### Round 4: Testing -- [x] Add tests for `verifier_ci_query.js` -- [x] Test with a merged PR to verify CI results are correctly fetched -- [x] Verify verifier no longer produces false negatives - -### Acceptance Criteria -The PR is complete when ALL of these are satisfied: - -- [x] Verifier context includes CI workflow results (Gate, Selftest CI conclusions) -- [x] Verifier prompt instructs to use CI results for test pass/fail verification -- [x] "Selftest CI passes" criterion can be verified as PASS when CI actually passed -- [x] No false negatives from stale local test runs -- [x] Tests exist for the new CI query functionality - ---- +Gate run: https://github.com/stranske/Workflows/actions/runs/20515491056 +Conclusion: failure +PR: #166 +Head SHA: cb79002f67016ab2f196efa524734a75d832223a +Autofix attempts for this head: 1 / 3 +Fix scope: src/, tests/, tools/, scripts/, agents/, templates/, .github/ +Failing jobs: +- github scripts tests (failure) + - steps: Run node --test .github/scripts/__tests__/*.test.js (failure)