diff --git a/.github/scripts/__tests__/verifier-ci-query.test.js b/.github/scripts/__tests__/verifier-ci-query.test.js index 1f39b38ec..1de96c270 100644 --- a/.github/scripts/__tests__/verifier-ci-query.test.js +++ b/.github/scripts/__tests__/verifier-ci-query.test.js @@ -351,38 +351,44 @@ test('queryVerifierCiResults retries transient errors and returns success', asyn ]); }); -test('queryVerifierCiResults returns api_error after max retries', async () => { - let attempts = 0; - const warnings = []; - const github = buildGithubStub({ - listWorkflowRunsHook: async () => { - attempts += 1; - const error = new Error('timeout'); - error.status = 504; - throw error; - }, - }); - const context = { repo: { owner: 'octo', repo: 'workflows' } }; - const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }]; - - const results = await queryVerifierCiResults({ - github, - context, - targetSha: 'retry-sha', - workflows, - core: { warning: (message) => warnings.push(String(message)) }, - retryOptions: { sleepFn: async () => {} }, - }); - - assert.equal(attempts, 4); - assert.equal(warnings.length, 4); - assert.deepEqual(results, [ - { - workflow_name: 'Gate', - conclusion: 'api_error', - run_url: '', - error_category: 'transient', - error_message: 'listWorkflowRuns:pr-00-gate.yml failed after 4 attempt(s): timeout', - }, - ]); +test('queryVerifierCiResults returns api_error after max retries', async (t) => { + const statuses = [429, 500, 502, 503, 504]; + + for (const status of statuses) { + await t.test(`retries and fails for status ${status}`, async () => { + let attempts = 0; + const warnings = []; + const github = buildGithubStub({ + listWorkflowRunsHook: async () => { + attempts += 1; + const error = new Error(`status-${status}`); + error.status = status; + throw error; + }, + }); + const context = { repo: { owner: 'octo', repo: 'workflows' } }; + const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }]; + + const results = await queryVerifierCiResults({ + github, + context, + targetSha: 'retry-sha', + workflows, + core: { warning: (message) => warnings.push(String(message)) }, + retryOptions: { sleepFn: async () => {} }, + }); + + assert.equal(attempts, 4); + assert.equal(warnings.length, 4); + assert.deepEqual(results, [ + { + workflow_name: 'Gate', + conclusion: 'api_error', + run_url: '', + error_category: 'transient', + error_message: `listWorkflowRuns:pr-00-gate.yml failed after 4 attempt(s): status-${status}`, + }, + ]); + }); + } }); diff --git a/.github/scripts/verifier_ci_query.js b/.github/scripts/verifier_ci_query.js index 19a833290..44df20c84 100644 --- a/.github/scripts/verifier_ci_query.js +++ b/.github/scripts/verifier_ci_query.js @@ -8,8 +8,8 @@ const DEFAULT_WORKFLOWS = [ { workflow_name: 'PR 11 - Minimal invariant CI', workflow_id: 'pr-11-ci-smoke.yml' }, ]; -const DEFAULT_RETRY_DELAYS_MS = [1000, 2000, 4000]; -const DEFAULT_MAX_RETRIES = DEFAULT_RETRY_DELAYS_MS.length; +const DEFAULT_BASE_DELAY_MS = 1000; +const DEFAULT_MAX_RETRIES = 3; function normalizeConclusion(run) { if (!run) { @@ -35,6 +35,16 @@ async function sleep(ms) { await new Promise((resolve) => setTimeout(resolve, ms)); } +function buildRetryDelays(maxRetries, baseDelayMs) { + const delays = []; + const safeRetries = Math.max(0, Number(maxRetries) || 0); + const safeBaseDelayMs = Math.max(0, Number(baseDelayMs) || 0); + for (let attempt = 0; attempt < safeRetries; attempt += 1) { + delays.push(safeBaseDelayMs * Math.pow(2, attempt)); + } + return delays; +} + function buildRetryError(error, category, label, attempts) { const message = error?.message || 'Unknown error'; const retryError = new Error(`${label} failed after ${attempts} attempt(s): ${message}`); @@ -46,35 +56,45 @@ function buildRetryError(error, category, label, attempts) { async function withRetry(apiCall, options = {}) { const { label = 'GitHub API call', - delays = DEFAULT_RETRY_DELAYS_MS, + delays = null, + maxRetries = DEFAULT_MAX_RETRIES, + baseDelayMs = DEFAULT_BASE_DELAY_MS, core = null, sleepFn = sleep, } = options; + const retryDelays = Array.isArray(delays) && delays.length + ? delays + : buildRetryDelays(maxRetries, baseDelayMs); let lastError = null; - for (let attempt = 0; attempt <= delays.length; attempt += 1) { + for (let attempt = 0; attempt <= retryDelays.length; attempt += 1) { try { return await apiCall(); } catch (error) { lastError = error; const category = getErrorCategory(error); - const canRetry = category === ERROR_CATEGORIES.transient && attempt < delays.length; + const canRetry = category === ERROR_CATEGORIES.transient && attempt < retryDelays.length; if (!canRetry) { throw buildRetryError(error, category, label, attempt + 1); } - const delayMs = delays[attempt]; + const delayMs = retryDelays[attempt]; if (core?.warning) { core.warning( - `Retrying ${label}; category=${category} attempt=${attempt + 1}/${delays.length + 1} delayMs=${delayMs}` + `Retrying ${label}; category=${category} attempt=${attempt + 1}/${retryDelays.length + 1} delayMs=${delayMs}` ); } await sleepFn(delayMs); } } - throw buildRetryError(lastError || new Error('Unknown error'), ERROR_CATEGORIES.unknown, label, delays.length + 1); + throw buildRetryError( + lastError || new Error('Unknown error'), + ERROR_CATEGORIES.unknown, + label, + retryDelays.length + 1 + ); } async function fetchWorkflowRun({ diff --git a/agents/codex-167.md b/agents/codex-167.md new file mode 100644 index 000000000..0a14b7024 --- /dev/null +++ b/agents/codex-167.md @@ -0,0 +1 @@ + diff --git a/codex-output.md b/codex-output.md index 665ff2be9..a48118c09 100644 --- a/codex-output.md +++ b/codex-output.md @@ -1,6 +1,8 @@ -Updated the verifier context expectations to include the new `error_category`/`error_message` fields emitted by `queryVerifierCiResults`, so the deep-equal assertions match the current output shape in `.github/scripts/__tests__/agents-verifier-context.test.js`. +Made retry delays derive from a configurable max-retries/base-delay fallback so the verifier retry loop is explicit and still defaults to 3 exponential attempts, and updated the PR checklist to reflect the verified retry behavior and tests. This keeps the retry/backoff logic in `.github/scripts/verifier_ci_query.js` clear and consistent with the acceptance criteria, while `.github/scripts/__tests__/verifier-ci-query.test.js` continues to validate success and exhaustion cases. Tests run: -- `node .github/scripts/__tests__/agents-verifier-context.test.js` +- `node --test .github/scripts/__tests__/verifier-ci-query.test.js` -If you want, I can re-run the full suite with `node --test .github/scripts/__tests__/*.test.js`. \ No newline at end of file +Next steps: +1. Run the full selftest CI workflow to satisfy the remaining “Selftest CI passes” checkbox. +2. If you want broader local coverage, run `node --test .github/scripts/__tests__/*.test.js`. \ No newline at end of file diff --git a/codex-prompt.md b/codex-prompt.md index 6554849af..4276007a9 100644 --- a/codex-prompt.md +++ b/codex-prompt.md @@ -101,23 +101,75 @@ You should assume you're running in `agent-standard` unless explicitly told othe # Task Prompt -# Autofix from CI failure +# Keepalive Next Task -You are Codex running in autofix mode after a CI failure. Use the available logs and repository context to repair the failing checks. +Your objective is to satisfy the **Acceptance Criteria** by completing each **Task** within the defined **Scope**. -Guidance: -- Inspect the latest CI output provided by the caller (logs or summaries) to pinpoint the root cause. -- Focus on minimal, targeted fixes that unblock the failing job. -- Leave diagnostic breadcrumbs when a failure cannot be reproduced or fully addressed. -- Re-run or suggest the smallest relevant checks to verify the fix. +**This round you MUST:** +1. Implement actual code or test changes that advance at least one incomplete task toward acceptance. +2. Commit meaningful source code (.py, .yml, .js, etc.)—not just status/docs updates. +3. Mark a task checkbox complete ONLY after verifying the implementation works. +4. Focus on the FIRST unchecked task unless blocked, then move to the next. + +**Guidelines:** +- Keep edits scoped to the current task rather than reshaping the entire PR. +- Use repository instructions, conventions, and tests to validate work. +- Prefer small, reviewable commits; leave clear notes when follow-up is required. +- Do NOT work on unrelated improvements until all PR tasks are complete. + +**The Tasks and Acceptance Criteria are provided in the appendix below.** Work through them in order. ## Run context -Gate run: https://github.com/stranske/Workflows/actions/runs/20515491056 -Conclusion: failure -PR: #166 -Head SHA: cb79002f67016ab2f196efa524734a75d832223a -Autofix attempts for this head: 1 / 3 -Fix scope: src/, tests/, tools/, scripts/, agents/, templates/, .github/ -Failing jobs: -- github scripts tests (failure) - - steps: Run node --test .github/scripts/__tests__/*.test.js (failure) +--- +## PR Tasks and Acceptance Criteria + +**Progress:** 10/12 tasks complete, 2 remaining + +### ⚠️ IMPORTANT: Task Reconciliation Required + +The previous iteration changed **2 file(s)** but did not update task checkboxes. + +**Before continuing, you MUST:** +1. Review the recent commits to understand what was changed +2. Determine which task checkboxes should be marked complete +3. Update the PR body to check off completed tasks +4. Then continue with remaining tasks + +_Failure to update checkboxes means progress is not being tracked properly._ + +### Scope +- [ ] +- [ ] Address unmet acceptance criteria from PR #166. +- [ ] Original scope: +- [ ] The verifier CI query (`verifier_ci_query.js`) currently makes a single API call to fetch workflow run results. If the GitHub API returns a transient error (rate limit, timeout, network hiccup), the query fails silently and the verifier sees missing CI results. +- [ ] This can cause false negatives where the verifier marks test-related criteria as NOT MET due to API failures rather than actual CI failures. +- [ ] ### Current Behavior +- [ ] - Single API call per workflow +- [ ] - Failures logged as warnings but not retried +- [ ] - Missing results treated as "not found" +- [ ] ### Desired Behavior +- [ ] - Retry transient failures with exponential backoff +- [ ] - Distinguish between "CI not run" and "API error" +- [ ] - Log retry attempts for debugging + +### Tasks +Complete these in order. Mark checkbox done ONLY after implementation is verified: + +- [x] +- [x] Satisfy: Transient API failures (429, 500, 502, 503, 504) are retried up to 3 times +- [x] Satisfy: Successful retry results in correct CI data being returned +- [x] Satisfy: Max retry exceeded results in clear error message, not silent "not found" +- [x] Satisfy: Tests cover retry success and retry exhaustion scenarios +- [ ] Satisfy: Selftest CI passes + +### Acceptance Criteria +The PR is complete when ALL of these are satisfied: + +- [x] +- [x] Transient API failures (429, 500, 502, 503, 504) are retried up to 3 times +- [x] Successful retry results in correct CI data being returned +- [x] Max retry exceeded results in clear error message, not silent "not found" +- [x] Tests cover retry success and retry exhaustion scenarios +- [ ] Selftest CI passes + +---