stranske · stranske · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025
@@ -351,38 +351,44 @@ test('queryVerifierCiResults retries transient errors and returns success', asyn
   ]);
 });
 
-test('queryVerifierCiResults returns api_error after max retries', async () => {
-  let attempts = 0;
-  const warnings = [];
-  const github = buildGithubStub({
-    listWorkflowRunsHook: async () => {
-      attempts += 1;
-      const error = new Error('timeout');
-      error.status = 504;
-      throw error;
-    },
-  });
-  const context = { repo: { owner: 'octo', repo: 'workflows' } };
-  const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }];
-
-  const results = await queryVerifierCiResults({
-    github,
-    context,
-    targetSha: 'retry-sha',
-    workflows,
-    core: { warning: (message) => warnings.push(String(message)) },
-    retryOptions: { sleepFn: async () => {} },
-  });
-
-  assert.equal(attempts, 4);
-  assert.equal(warnings.length, 4);
-  assert.deepEqual(results, [
-    {
-      workflow_name: 'Gate',
-      conclusion: 'api_error',
-      run_url: '',
-      error_category: 'transient',
-      error_message: 'listWorkflowRuns:pr-00-gate.yml failed after 4 attempt(s): timeout',
-    },
-  ]);
+test('queryVerifierCiResults returns api_error after max retries', async (t) => {
+  const statuses = [429, 500, 502, 503, 504];
+
+  for (const status of statuses) {
+    await t.test(`retries and fails for status ${status}`, async () => {
+      let attempts = 0;
+      const warnings = [];
+      const github = buildGithubStub({
+        listWorkflowRunsHook: async () => {
+          attempts += 1;
+          const error = new Error(`status-${status}`);
+          error.status = status;
+          throw error;
+        },
+      });
+      const context = { repo: { owner: 'octo', repo: 'workflows' } };
+      const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }];
+
+      const results = await queryVerifierCiResults({
+        github,
+        context,
+        targetSha: 'retry-sha',
+        workflows,
+        core: { warning: (message) => warnings.push(String(message)) },
+        retryOptions: { sleepFn: async () => {} },
+      });
+
+      assert.equal(attempts, 4);
+      assert.equal(warnings.length, 4);
+      assert.deepEqual(results, [
+        {
+          workflow_name: 'Gate',
+          conclusion: 'api_error',
+          run_url: '',
+          error_category: 'transient',
+          error_message: `listWorkflowRuns:pr-00-gate.yml failed after 4 attempt(s): status-${status}`,
+        },
+      ]);
+    });
+  }
 });
@@ -8,8 +8,8 @@ const DEFAULT_WORKFLOWS = [
   { workflow_name: 'PR 11 - Minimal invariant CI', workflow_id: 'pr-11-ci-smoke.yml' },
 ];
 
-const DEFAULT_RETRY_DELAYS_MS = [1000, 2000, 4000];
-const DEFAULT_MAX_RETRIES = DEFAULT_RETRY_DELAYS_MS.length;
+const DEFAULT_BASE_DELAY_MS = 1000;
+const DEFAULT_MAX_RETRIES = 3;
 
 function normalizeConclusion(run) {
   if (!run) {
@@ -35,6 +35,16 @@ async function sleep(ms) {
   await new Promise((resolve) => setTimeout(resolve, ms));
 }
 
+function buildRetryDelays(maxRetries, baseDelayMs) {
+  const delays = [];
+  const safeRetries = Math.max(0, Number(maxRetries) || 0);
+  const safeBaseDelayMs = Math.max(0, Number(baseDelayMs) || 0);
+  for (let attempt = 0; attempt < safeRetries; attempt += 1) {
+    delays.push(safeBaseDelayMs * Math.pow(2, attempt));
+  }
+  return delays;
+}
+
 function buildRetryError(error, category, label, attempts) {
   const message = error?.message || 'Unknown error';
   const retryError = new Error(`${label} failed after ${attempts} attempt(s): ${message}`);
@@ -46,35 +56,45 @@ function buildRetryError(error, category, label, attempts) {
 async function withRetry(apiCall, options = {}) {
   const {
     label = 'GitHub API call',
-    delays = DEFAULT_RETRY_DELAYS_MS,
+    delays = null,
+    maxRetries = DEFAULT_MAX_RETRIES,
+    baseDelayMs = DEFAULT_BASE_DELAY_MS,
     core = null,
     sleepFn = sleep,
   } = options;
 
+  const retryDelays = Array.isArray(delays) && delays.length
+    ? delays
+    : buildRetryDelays(maxRetries, baseDelayMs);
   let lastError = null;
-  for (let attempt = 0; attempt <= delays.length; attempt += 1) {
+  for (let attempt = 0; attempt <= retryDelays.length; attempt += 1) {
     try {
       return await apiCall();
     } catch (error) {
       lastError = error;
       const category = getErrorCategory(error);
-      const canRetry = category === ERROR_CATEGORIES.transient && attempt < delays.length;
+      const canRetry = category === ERROR_CATEGORIES.transient && attempt < retryDelays.length;
 
       if (!canRetry) {
         throw buildRetryError(error, category, label, attempt + 1);
       }
 
-      const delayMs = delays[attempt];
+      const delayMs = retryDelays[attempt];
       if (core?.warning) {
         core.warning(
-          `Retrying ${label}; category=${category} attempt=${attempt + 1}/${delays.length + 1} delayMs=${delayMs}`
+          `Retrying ${label}; category=${category} attempt=${attempt + 1}/${retryDelays.length + 1} delayMs=${delayMs}`
         );
       }
       await sleepFn(delayMs);
     }
   }
 
-  throw buildRetryError(lastError || new Error('Unknown error'), ERROR_CATEGORIES.unknown, label, delays.length + 1);
+  throw buildRetryError(
+    lastError || new Error('Unknown error'),
+    ERROR_CATEGORIES.unknown,
+    label,
+    retryDelays.length + 1
+  );
 }
 
 async function fetchWorkflowRun({

diff --git a/agents/codex-167.md b/agents/codex-167.md
@@ -0,0 +1 @@
+<!-- bootstrap for codex on issue #167 -->
diff --git a/codex-output.md b/codex-output.md
@@ -1,6 +1,8 @@
-Updated the verifier context expectations to include the new `error_category`/`error_message` fields emitted by `queryVerifierCiResults`, so the deep-equal assertions match the current output shape in `.github/scripts/__tests__/agents-verifier-context.test.js`.
+Made retry delays derive from a configurable max-retries/base-delay fallback so the verifier retry loop is explicit and still defaults to 3 exponential attempts, and updated the PR checklist to reflect the verified retry behavior and tests. This keeps the retry/backoff logic in `.github/scripts/verifier_ci_query.js` clear and consistent with the acceptance criteria, while `.github/scripts/__tests__/verifier-ci-query.test.js` continues to validate success and exhaustion cases.
 
 Tests run:
-- `node .github/scripts/__tests__/agents-verifier-context.test.js`
+- `node --test .github/scripts/__tests__/verifier-ci-query.test.js`
 
-If you want, I can re-run the full suite with `node --test .github/scripts/__tests__/*.test.js`.
+Next steps:
+1. Run the full selftest CI workflow to satisfy the remaining “Selftest CI passes” checkbox.
+2. If you want broader local coverage, run `node --test .github/scripts/__tests__/*.test.js`.
diff --git a/codex-prompt.md b/codex-prompt.md
@@ -101,23 +101,75 @@ You should assume you're running in `agent-standard` unless explicitly told othe
 
 # Task Prompt
 
-# Autofix from CI failure
+# Keepalive Next Task
 
-You are Codex running in autofix mode after a CI failure. Use the available logs and repository context to repair the failing checks.
+Your objective is to satisfy the **Acceptance Criteria** by completing each **Task** within the defined **Scope**.
 
-Guidance:
-- Inspect the latest CI output provided by the caller (logs or summaries) to pinpoint the root cause.
-- Focus on minimal, targeted fixes that unblock the failing job.
-- Leave diagnostic breadcrumbs when a failure cannot be reproduced or fully addressed.
-- Re-run or suggest the smallest relevant checks to verify the fix.
+**This round you MUST:**
+1. Implement actual code or test changes that advance at least one incomplete task toward acceptance.
+2. Commit meaningful source code (.py, .yml, .js, etc.)—not just status/docs updates.
+3. Mark a task checkbox complete ONLY after verifying the implementation works.
+4. Focus on the FIRST unchecked task unless blocked, then move to the next.
+
+**Guidelines:**
+- Keep edits scoped to the current task rather than reshaping the entire PR.
+- Use repository instructions, conventions, and tests to validate work.
+- Prefer small, reviewable commits; leave clear notes when follow-up is required.
+- Do NOT work on unrelated improvements until all PR tasks are complete.
+
+**The Tasks and Acceptance Criteria are provided in the appendix below.** Work through them in order.
 
 ## Run context
-Gate run: https://github.com/stranske/Workflows/actions/runs/20515491056
-Conclusion: failure
-PR: #166
-Head SHA: cb79002f67016ab2f196efa524734a75d832223a
-Autofix attempts for this head: 1 / 3
-Fix scope: src/, tests/, tools/, scripts/, agents/, templates/, .github/
-Failing jobs:
-- github scripts tests (failure)
-  - steps: Run node --test .github/scripts/__tests__/*.test.js (failure)
+---
+## PR Tasks and Acceptance Criteria
+
+**Progress:** 10/12 tasks complete, 2 remaining
+
+### ⚠️ IMPORTANT: Task Reconciliation Required
+
+The previous iteration changed **2 file(s)** but did not update task checkboxes.
+
+**Before continuing, you MUST:**
+1. Review the recent commits to understand what was changed
+2. Determine which task checkboxes should be marked complete
+3. Update the PR body to check off completed tasks
+4. Then continue with remaining tasks
+
+_Failure to update checkboxes means progress is not being tracked properly._
+
+### Scope
+- [ ] <!-- Updated scope for this follow-up -->
+- [ ] Address unmet acceptance criteria from PR #166.
+- [ ] Original scope:
+- [ ] The verifier CI query (`verifier_ci_query.js`) currently makes a single API call to fetch workflow run results. If the GitHub API returns a transient error (rate limit, timeout, network hiccup), the query fails silently and the verifier sees missing CI results.
+- [ ] This can cause false negatives where the verifier marks test-related criteria as NOT MET due to API failures rather than actual CI failures.
+- [ ] ### Current Behavior
+- [ ] - Single API call per workflow
+- [ ] - Failures logged as warnings but not retried
+- [ ] - Missing results treated as "not found"
+- [ ] ### Desired Behavior
+- [ ] - Retry transient failures with exponential backoff
+- [ ] - Distinguish between "CI not run" and "API error"
+- [ ] - Log retry attempts for debugging
+
+### Tasks
+Complete these in order. Mark checkbox done ONLY after implementation is verified:
+
+- [x] <!-- New tasks to address unmet acceptance criteria -->
+- [x] Satisfy: Transient API failures (429, 500, 502, 503, 504) are retried up to 3 times
+- [x] Satisfy: Successful retry results in correct CI data being returned
+- [x] Satisfy: Max retry exceeded results in clear error message, not silent "not found"
+- [x] Satisfy: Tests cover retry success and retry exhaustion scenarios
+- [ ] Satisfy: Selftest CI passes
+
+### Acceptance Criteria
+The PR is complete when ALL of these are satisfied:
+
+- [x] <!-- Criteria verified as unmet by verifier -->
+- [x] Transient API failures (429, 500, 502, 503, 504) are retried up to 3 times
+- [x] Successful retry results in correct CI data being returned
+- [x] Max retry exceeded results in clear error message, not silent "not found"
+- [x] Tests cover retry success and retry exhaustion scenarios
+- [ ] Selftest CI passes
+
+---