Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 40 additions & 34 deletions .github/scripts/__tests__/verifier-ci-query.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -351,38 +351,44 @@ test('queryVerifierCiResults retries transient errors and returns success', asyn
]);
});

test('queryVerifierCiResults returns api_error after max retries', async () => {
let attempts = 0;
const warnings = [];
const github = buildGithubStub({
listWorkflowRunsHook: async () => {
attempts += 1;
const error = new Error('timeout');
error.status = 504;
throw error;
},
});
const context = { repo: { owner: 'octo', repo: 'workflows' } };
const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }];

const results = await queryVerifierCiResults({
github,
context,
targetSha: 'retry-sha',
workflows,
core: { warning: (message) => warnings.push(String(message)) },
retryOptions: { sleepFn: async () => {} },
});

assert.equal(attempts, 4);
assert.equal(warnings.length, 4);
assert.deepEqual(results, [
{
workflow_name: 'Gate',
conclusion: 'api_error',
run_url: '',
error_category: 'transient',
error_message: 'listWorkflowRuns:pr-00-gate.yml failed after 4 attempt(s): timeout',
},
]);
test('queryVerifierCiResults returns api_error after max retries', async (t) => {
const statuses = [429, 500, 502, 503, 504];

for (const status of statuses) {
await t.test(`retries and fails for status ${status}`, async () => {
let attempts = 0;
const warnings = [];
const github = buildGithubStub({
listWorkflowRunsHook: async () => {
attempts += 1;
const error = new Error(`status-${status}`);
error.status = status;
throw error;
},
});
const context = { repo: { owner: 'octo', repo: 'workflows' } };
const workflows = [{ workflow_name: 'Gate', workflow_id: 'pr-00-gate.yml' }];

const results = await queryVerifierCiResults({
github,
context,
targetSha: 'retry-sha',
workflows,
core: { warning: (message) => warnings.push(String(message)) },
retryOptions: { sleepFn: async () => {} },
});

assert.equal(attempts, 4);
assert.equal(warnings.length, 4);
assert.deepEqual(results, [
{
workflow_name: 'Gate',
conclusion: 'api_error',
run_url: '',
error_category: 'transient',
error_message: `listWorkflowRuns:pr-00-gate.yml failed after 4 attempt(s): status-${status}`,
},
]);
});
}
});
36 changes: 28 additions & 8 deletions .github/scripts/verifier_ci_query.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ const DEFAULT_WORKFLOWS = [
{ workflow_name: 'PR 11 - Minimal invariant CI', workflow_id: 'pr-11-ci-smoke.yml' },
];

const DEFAULT_RETRY_DELAYS_MS = [1000, 2000, 4000];
const DEFAULT_MAX_RETRIES = DEFAULT_RETRY_DELAYS_MS.length;
const DEFAULT_BASE_DELAY_MS = 1000;
const DEFAULT_MAX_RETRIES = 3;

function normalizeConclusion(run) {
if (!run) {
Expand All @@ -35,6 +35,16 @@ async function sleep(ms) {
await new Promise((resolve) => setTimeout(resolve, ms));
}

function buildRetryDelays(maxRetries, baseDelayMs) {
const delays = [];
const safeRetries = Math.max(0, Number(maxRetries) || 0);
const safeBaseDelayMs = Math.max(0, Number(baseDelayMs) || 0);
for (let attempt = 0; attempt < safeRetries; attempt += 1) {
delays.push(safeBaseDelayMs * Math.pow(2, attempt));
}
return delays;
}

function buildRetryError(error, category, label, attempts) {
const message = error?.message || 'Unknown error';
const retryError = new Error(`${label} failed after ${attempts} attempt(s): ${message}`);
Expand All @@ -46,35 +56,45 @@ function buildRetryError(error, category, label, attempts) {
async function withRetry(apiCall, options = {}) {
const {
label = 'GitHub API call',
delays = DEFAULT_RETRY_DELAYS_MS,
delays = null,
maxRetries = DEFAULT_MAX_RETRIES,
baseDelayMs = DEFAULT_BASE_DELAY_MS,
core = null,
sleepFn = sleep,
} = options;

const retryDelays = Array.isArray(delays) && delays.length
? delays
: buildRetryDelays(maxRetries, baseDelayMs);
let lastError = null;
for (let attempt = 0; attempt <= delays.length; attempt += 1) {
for (let attempt = 0; attempt <= retryDelays.length; attempt += 1) {
try {
return await apiCall();
} catch (error) {
lastError = error;
const category = getErrorCategory(error);
const canRetry = category === ERROR_CATEGORIES.transient && attempt < delays.length;
const canRetry = category === ERROR_CATEGORIES.transient && attempt < retryDelays.length;

if (!canRetry) {
throw buildRetryError(error, category, label, attempt + 1);
}

const delayMs = delays[attempt];
const delayMs = retryDelays[attempt];
if (core?.warning) {
core.warning(
`Retrying ${label}; category=${category} attempt=${attempt + 1}/${delays.length + 1} delayMs=${delayMs}`
`Retrying ${label}; category=${category} attempt=${attempt + 1}/${retryDelays.length + 1} delayMs=${delayMs}`
);
}
await sleepFn(delayMs);
}
}

throw buildRetryError(lastError || new Error('Unknown error'), ERROR_CATEGORIES.unknown, label, delays.length + 1);
throw buildRetryError(
lastError || new Error('Unknown error'),
ERROR_CATEGORIES.unknown,
label,
retryDelays.length + 1
);
}

async function fetchWorkflowRun({
Expand Down
1 change: 1 addition & 0 deletions agents/codex-167.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<!-- bootstrap for codex on issue #167 -->
8 changes: 5 additions & 3 deletions codex-output.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Updated the verifier context expectations to include the new `error_category`/`error_message` fields emitted by `queryVerifierCiResults`, so the deep-equal assertions match the current output shape in `.github/scripts/__tests__/agents-verifier-context.test.js`.
Made retry delays derive from a configurable max-retries/base-delay fallback so the verifier retry loop is explicit and still defaults to 3 exponential attempts, and updated the PR checklist to reflect the verified retry behavior and tests. This keeps the retry/backoff logic in `.github/scripts/verifier_ci_query.js` clear and consistent with the acceptance criteria, while `.github/scripts/__tests__/verifier-ci-query.test.js` continues to validate success and exhaustion cases.

Tests run:
- `node .github/scripts/__tests__/agents-verifier-context.test.js`
- `node --test .github/scripts/__tests__/verifier-ci-query.test.js`

If you want, I can re-run the full suite with `node --test .github/scripts/__tests__/*.test.js`.
Next steps:
1. Run the full selftest CI workflow to satisfy the remaining “Selftest CI passes” checkbox.
2. If you want broader local coverage, run `node --test .github/scripts/__tests__/*.test.js`.
84 changes: 68 additions & 16 deletions codex-prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,23 +101,75 @@ You should assume you're running in `agent-standard` unless explicitly told othe

# Task Prompt

# Autofix from CI failure
# Keepalive Next Task

You are Codex running in autofix mode after a CI failure. Use the available logs and repository context to repair the failing checks.
Your objective is to satisfy the **Acceptance Criteria** by completing each **Task** within the defined **Scope**.

Guidance:
- Inspect the latest CI output provided by the caller (logs or summaries) to pinpoint the root cause.
- Focus on minimal, targeted fixes that unblock the failing job.
- Leave diagnostic breadcrumbs when a failure cannot be reproduced or fully addressed.
- Re-run or suggest the smallest relevant checks to verify the fix.
**This round you MUST:**
1. Implement actual code or test changes that advance at least one incomplete task toward acceptance.
2. Commit meaningful source code (.py, .yml, .js, etc.)—not just status/docs updates.
3. Mark a task checkbox complete ONLY after verifying the implementation works.
4. Focus on the FIRST unchecked task unless blocked, then move to the next.

**Guidelines:**
- Keep edits scoped to the current task rather than reshaping the entire PR.
- Use repository instructions, conventions, and tests to validate work.
- Prefer small, reviewable commits; leave clear notes when follow-up is required.
- Do NOT work on unrelated improvements until all PR tasks are complete.

**The Tasks and Acceptance Criteria are provided in the appendix below.** Work through them in order.

## Run context
Gate run: https://github.com/stranske/Workflows/actions/runs/20515491056
Conclusion: failure
PR: #166
Head SHA: cb79002f67016ab2f196efa524734a75d832223a
Autofix attempts for this head: 1 / 3
Fix scope: src/, tests/, tools/, scripts/, agents/, templates/, .github/
Failing jobs:
- github scripts tests (failure)
- steps: Run node --test .github/scripts/__tests__/*.test.js (failure)
---
## PR Tasks and Acceptance Criteria

**Progress:** 10/12 tasks complete, 2 remaining

### ⚠️ IMPORTANT: Task Reconciliation Required

The previous iteration changed **2 file(s)** but did not update task checkboxes.

**Before continuing, you MUST:**
1. Review the recent commits to understand what was changed
2. Determine which task checkboxes should be marked complete
3. Update the PR body to check off completed tasks
4. Then continue with remaining tasks

_Failure to update checkboxes means progress is not being tracked properly._

### Scope
- [ ] <!-- Updated scope for this follow-up -->
- [ ] Address unmet acceptance criteria from PR #166.
- [ ] Original scope:
- [ ] The verifier CI query (`verifier_ci_query.js`) currently makes a single API call to fetch workflow run results. If the GitHub API returns a transient error (rate limit, timeout, network hiccup), the query fails silently and the verifier sees missing CI results.
- [ ] This can cause false negatives where the verifier marks test-related criteria as NOT MET due to API failures rather than actual CI failures.
- [ ] ### Current Behavior
- [ ] - Single API call per workflow
- [ ] - Failures logged as warnings but not retried
- [ ] - Missing results treated as "not found"
- [ ] ### Desired Behavior
- [ ] - Retry transient failures with exponential backoff
- [ ] - Distinguish between "CI not run" and "API error"
- [ ] - Log retry attempts for debugging

### Tasks
Complete these in order. Mark checkbox done ONLY after implementation is verified:

- [x] <!-- New tasks to address unmet acceptance criteria -->
- [x] Satisfy: Transient API failures (429, 500, 502, 503, 504) are retried up to 3 times
- [x] Satisfy: Successful retry results in correct CI data being returned
- [x] Satisfy: Max retry exceeded results in clear error message, not silent "not found"
- [x] Satisfy: Tests cover retry success and retry exhaustion scenarios
- [ ] Satisfy: Selftest CI passes

### Acceptance Criteria
The PR is complete when ALL of these are satisfied:

- [x] <!-- Criteria verified as unmet by verifier -->
- [x] Transient API failures (429, 500, 502, 503, 504) are retried up to 3 times
- [x] Successful retry results in correct CI data being returned
- [x] Max retry exceeded results in clear error message, not silent "not found"
- [x] Tests cover retry success and retry exhaustion scenarios
- [ ] Selftest CI passes

---
Loading