diff --git a/.github/scripts/__tests__/keepalive-loop.test.js b/.github/scripts/__tests__/keepalive-loop.test.js index af722f03c..16dfcd235 100644 --- a/.github/scripts/__tests__/keepalive-loop.test.js +++ b/.github/scripts/__tests__/keepalive-loop.test.js @@ -11,6 +11,8 @@ const { evaluateKeepaliveLoop, updateKeepaliveLoopSummary, markAgentRunning, + analyzeTaskCompletion, + autoReconcileTasks, } = require('../keepalive_loop.js'); const { formatStateComment } = require('../keepalive_state.js'); @@ -188,13 +190,14 @@ test('evaluateKeepaliveLoop stops when tasks are complete', async () => { assert.equal(result.reason, 'tasks-complete'); }); -test('evaluateKeepaliveLoop stops when max iterations are reached', async () => { +test('evaluateKeepaliveLoop stops when max iterations reached AND unproductive', async () => { const pr = { number: 404, head: { ref: 'feature/four', sha: 'sha-4' }, labels: [{ name: 'agent:codex' }], body: '## Tasks\n- [ ] one\n## Acceptance Criteria\n- [ ] a\n', }; + // No previous state with file changes = unproductive const github = buildGithubStub({ pr, workflowRuns: [{ head_sha: 'sha-4', conclusion: 'success' }], @@ -205,7 +208,39 @@ test('evaluateKeepaliveLoop stops when max iterations are reached', async () => core: buildCore(), }); assert.equal(result.action, 'stop'); - assert.equal(result.reason, 'max-iterations'); + assert.equal(result.reason, 'max-iterations-unproductive'); +}); + +test('evaluateKeepaliveLoop continues past max iterations when productive', async () => { + const pr = { + number: 405, + head: { ref: 'feature/extended', sha: 'sha-ext' }, + labels: [{ name: 'agent:codex' }], + body: '## Tasks\n- [ ] one\n## Acceptance Criteria\n- [ ] a', + }; + // State shows productive work (files changed, no failures) + const stateComment = formatStateComment({ + trace: '', + iteration: 6, + max_iterations: 5, + last_files_changed: 3, + failure: {}, + }); + const comments = [ + { id: 22, body: stateComment, html_url: 'https://example.com/22' }, + ]; + const github = buildGithubStub({ + pr, + comments, + workflowRuns: [{ head_sha: 'sha-ext', conclusion: 'success' }], + }); + const result = await evaluateKeepaliveLoop({ + github, + context: buildContext(pr.number), + core: buildCore(), + }); + assert.equal(result.action, 'run', 'Should continue running when productive'); + assert.equal(result.reason, 'ready-extended', 'Should show extended mode'); }); test('evaluateKeepaliveLoop waits when gate has not succeeded', async () => { @@ -289,7 +324,7 @@ test('updateKeepaliveLoopSummary increments iteration and clears failures on suc assert.equal(github.actions.length, 1); assert.equal(github.actions[0].type, 'update'); - assert.match(github.actions[0].body, /Iteration \*\*3\/5\*\*/); + assert.match(github.actions[0].body, /Iteration 3\/5/); assert.match(github.actions[0].body, /Iteration progress \| \[######----\] 3\/5 \|/); assert.match(github.actions[0].body, /### Last Codex Run/); assert.match(github.actions[0].body, /✅ Success/); @@ -386,7 +421,7 @@ test('updateKeepaliveLoopSummary uses state iteration when inputs have stale val assert.equal(github.actions[0].type, 'update'); // Should preserve iteration=2 from state, NOT use stale iteration=0 from inputs assert.match(github.actions[0].body, /"iteration":2/); - assert.match(github.actions[0].body, /Iteration \*\*2\/5\*\*/); + assert.match(github.actions[0].body, /Iteration 2\/5/); }); test('updateKeepaliveLoopSummary pauses after repeated failures and adds label', async () => { @@ -661,3 +696,207 @@ test('markAgentRunning creates comment when none exists', async () => { assert.ok(body.includes('Claude is actively working'), 'Should capitalize agent name'); assert.ok(body.includes('Iteration | 1 of 3'), 'Should show iteration 1 (0+1)'); }); + +// ===================================================== +// Task Reconciliation Tests +// ===================================================== + +test('analyzeTaskCompletion identifies high-confidence matches', async () => { + const commits = [ + { sha: 'abc123', commit: { message: 'feat: add step summary output to keepalive loop' } }, + { sha: 'def456', commit: { message: 'test: add tests for step summary emission' } }, + ]; + const files = [ + { filename: '.github/workflows/agents-keepalive-loop.yml' }, + { filename: '.github/scripts/keepalive_loop.js' }, + ]; + + const github = { + rest: { + repos: { + async compareCommits() { + return { data: { commits } }; + }, + }, + pulls: { + async listFiles() { + return { data: files }; + }, + }, + }, + }; + + const taskText = ` +- [ ] Add step summary output to agents-keepalive-loop.yml after agent run +- [ ] Include: iteration number, tasks completed, files changed, outcome +- [ ] Ensure summary is visible in workflow run UI +- [ ] Unrelated task about something else entirely +`; + + const result = await analyzeTaskCompletion({ + github, + context: { repo: { owner: 'test', repo: 'repo' } }, + prNumber: 1, + baseSha: 'base123', + headSha: 'head456', + taskText, + core: buildCore(), + }); + + assert.ok(result.matches.length > 0, 'Should find at least one match'); + + // Should match the step summary task with high confidence + const stepSummaryMatch = result.matches.find(m => + m.task.toLowerCase().includes('step summary') + ); + assert.ok(stepSummaryMatch, 'Should match step summary task'); + assert.equal(stepSummaryMatch.confidence, 'high', 'Should be high confidence'); +}); + +test('analyzeTaskCompletion returns empty for unrelated commits', async () => { + const commits = [ + { sha: 'abc123', commit: { message: 'fix: typo in readme' } }, + ]; + const files = [ + { filename: 'README.md' }, + ]; + + const github = { + rest: { + repos: { + async compareCommits() { + return { data: { commits } }; + }, + }, + pulls: { + async listFiles() { + return { data: files }; + }, + }, + }, + }; + + const taskText = ` +- [ ] Implement complex feature in keepalive workflow +- [ ] Add database migrations +`; + + const result = await analyzeTaskCompletion({ + github, + context: { repo: { owner: 'test', repo: 'repo' } }, + prNumber: 1, + baseSha: 'base123', + headSha: 'head456', + taskText, + core: buildCore(), + }); + + // Should find no high-confidence matches + const highConfidence = result.matches.filter(m => m.confidence === 'high'); + assert.equal(highConfidence.length, 0, 'Should not find high-confidence matches for unrelated commits'); +}); + +test('autoReconcileTasks updates PR body for high-confidence matches', async () => { + const prBody = `## Tasks +- [ ] Add step summary output to keepalive loop +- [ ] Add tests for step summary +- [x] Already completed task +`; + + const commits = [ + { sha: 'abc123', commit: { message: 'feat: add step summary output to keepalive loop' } }, + ]; + const files = [ + { filename: '.github/scripts/keepalive_loop.js' }, + ]; + + let updatedBody = null; + const github = { + rest: { + pulls: { + async get() { + return { data: { body: prBody } }; + }, + async update({ body }) { + updatedBody = body; + return { data: {} }; + }, + async listFiles() { + return { data: files }; + }, + }, + repos: { + async compareCommits() { + return { data: { commits } }; + }, + }, + }, + }; + + const result = await autoReconcileTasks({ + github, + context: { repo: { owner: 'test', repo: 'repo' } }, + prNumber: 1, + baseSha: 'base123', + headSha: 'head456', + core: buildCore(), + }); + + assert.ok(result.updated, 'Should update PR body'); + assert.ok(result.tasksChecked > 0, 'Should check at least one task'); + + if (updatedBody) { + assert.ok(updatedBody.includes('[x] Add step summary'), 'Should check off matched task'); + assert.ok(updatedBody.includes('[x] Already completed'), 'Should preserve already-checked tasks'); + } +}); + +test('autoReconcileTasks skips when no high-confidence matches', async () => { + const prBody = `## Tasks +- [ ] Implement feature X +- [ ] Add tests for feature Y +`; + + const commits = [ + { sha: 'abc123', commit: { message: 'docs: update readme' } }, + ]; + const files = [ + { filename: 'README.md' }, + ]; + + let updateCalled = false; + const github = { + rest: { + pulls: { + async get() { + return { data: { body: prBody } }; + }, + async update() { + updateCalled = true; + return { data: {} }; + }, + async listFiles() { + return { data: files }; + }, + }, + repos: { + async compareCommits() { + return { data: { commits } }; + }, + }, + }, + }; + + const result = await autoReconcileTasks({ + github, + context: { repo: { owner: 'test', repo: 'repo' } }, + prNumber: 1, + baseSha: 'base123', + headSha: 'head456', + core: buildCore(), + }); + + assert.equal(result.updated, false, 'Should not update PR body'); + assert.equal(result.tasksChecked, 0, 'Should not check any tasks'); + assert.equal(updateCalled, false, 'Should not call update API'); +}); diff --git a/.github/scripts/__tests__/keepalive-state.test.js b/.github/scripts/__tests__/keepalive-state.test.js index 4c5423eb7..845bf2b46 100644 --- a/.github/scripts/__tests__/keepalive-state.test.js +++ b/.github/scripts/__tests__/keepalive-state.test.js @@ -13,16 +13,31 @@ const { const buildGithubStub = ({ comments = [] } = {}) => { const actions = []; + const commentStore = comments.map((comment) => ({ ...comment })); + let nextId = 101 + commentStore.length; const github = { actions, rest: { issues: { - listComments: async () => ({ data: comments }), + listComments: async () => ({ data: commentStore }), + getComment: async ({ comment_id: commentId }) => { + const match = commentStore.find((comment) => comment.id === commentId); + return { data: match || { id: commentId, body: '' } }; + }, createComment: async ({ body }) => { + const id = nextId++; + const record = { id, body, html_url: `https://example.com/${id}` }; + commentStore.push(record); actions.push({ type: 'create', body }); - return { data: { id: 101, html_url: 'https://example.com/101' } }; + return { data: { id, html_url: record.html_url } }; }, updateComment: async ({ body, comment_id: commentId }) => { + const match = commentStore.find((comment) => comment.id === commentId); + if (match) { + match.body = body; + } else { + commentStore.push({ id: commentId, body, html_url: `https://example.com/${commentId}` }); + } actions.push({ type: 'update', body, commentId }); return { data: { id: commentId } }; }, @@ -85,6 +100,31 @@ test('createKeepaliveStateManager updates existing comment', async () => { assert.match(github.actions[0].body, /"status":"success"/); }); +test('createKeepaliveStateManager preserves summary body when updating state', async () => { + const initialBody = [ + '## Keepalive Summary', + '', + formatStateComment({ trace: 'trace-1', round: '7', pr_number: 42 }), + ].join('\n'); + const github = buildGithubStub({ + comments: [ + { id: 77, body: initialBody, html_url: 'https://example.com/77' }, + ], + }); + const manager = await createKeepaliveStateManager({ + github, + context: { repo: { owner: 'o', repo: 'r' } }, + prNumber: 42, + trace: 'trace-1', + round: '7', + }); + await manager.save({ result: { status: 'success' } }); + assert.equal(github.actions.length, 1); + assert.equal(github.actions[0].type, 'update'); + assert.match(github.actions[0].body, /## Keepalive Summary/); + assert.match(github.actions[0].body, /"status":"success"/); +}); + test('loadKeepaliveState returns stored payload when present', async () => { const storedBody = formatStateComment({ trace: 'trace-x', head_sha: 'def', version: 'v1' }); const github = buildGithubStub({ comments: [{ id: 99, body: storedBody, html_url: 'https://example.com/99' }] }); diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js index c79abab8c..3e0fdf94c 100644 --- a/.github/scripts/keepalive_loop.js +++ b/.github/scripts/keepalive_loop.js @@ -410,10 +410,22 @@ async function evaluateKeepaliveLoop({ github, context, core }) { trace: config.trace, }); const state = stateResult.state || {}; - const iteration = toNumber(config.iteration ?? state.iteration, 0); + // Prefer state iteration unless config explicitly sets it (0 from config is default, not explicit) + const configHasExplicitIteration = config.iteration > 0; + const iteration = configHasExplicitIteration ? config.iteration : toNumber(state.iteration, 0); const maxIterations = toNumber(config.max_iterations ?? state.max_iterations, 5); const failureThreshold = toNumber(config.failure_threshold ?? state.failure_threshold, 3); + // Productivity tracking: determine if recent iterations have been productive + // An iteration is productive if it made file changes or completed tasks + const lastFilesChanged = toNumber(state.last_files_changed, 0); + const hasRecentFailures = Boolean(state.failure?.count > 0); + const isProductive = lastFilesChanged > 0 && !hasRecentFailures; + + // max_iterations is a "stuck detection" threshold, not a hard cap + // Continue past max if productive work is happening + const shouldStopForMaxIterations = iteration >= maxIterations && !isProductive; + // Build task appendix for the agent prompt (after state load for reconciliation info) const taskAppendix = buildTaskAppendix(normalisedSections, checkboxCounts, state); @@ -432,20 +444,21 @@ async function evaluateKeepaliveLoop({ github, context, core }) { } else if (allComplete) { action = 'stop'; reason = 'tasks-complete'; - } else if (iteration >= maxIterations) { + } else if (shouldStopForMaxIterations) { action = 'stop'; - reason = 'max-iterations'; + reason = isProductive ? 'max-iterations' : 'max-iterations-unproductive'; } else if (gateNormalized !== 'success') { action = 'wait'; reason = gateNormalized ? 'gate-not-success' : 'gate-pending'; } else if (tasksRemaining) { action = 'run'; - reason = 'ready'; + reason = iteration >= maxIterations ? 'ready-extended' : 'ready'; } return { prNumber, prRef: pr.head.ref || '', + headSha: pr.head.sha || '', action, reason, gateConclusion, @@ -546,19 +559,27 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { // Capitalize agent name for display const agentDisplayName = agentType.charAt(0).toUpperCase() + agentType.slice(1); + + // Determine if we're in extended mode (past max_iterations but still productive) + const inExtendedMode = nextIteration > maxIterations && maxIterations > 0; + const iterationDisplay = inExtendedMode + ? `**${nextIteration}/${maxIterations}** 🚀 extended` + : `${nextIteration}/${maxIterations || '∞'}`; const summaryLines = [ '', `## 🤖 Keepalive Loop Status`, '', - `**PR #${prNumber}** | Agent: **${agentDisplayName}** | Iteration **${nextIteration}/${maxIterations || '∞'}**`, + `**PR #${prNumber}** | Agent: **${agentDisplayName}** | Iteration ${iterationDisplay}`, '', '### Current State', `| Metric | Value |`, `|--------|-------|`, `| Iteration progress | ${ maxIterations > 0 - ? formatProgressBar(nextIteration, maxIterations) + ? inExtendedMode + ? `${formatProgressBar(maxIterations, maxIterations)} +${nextIteration - maxIterations} extended` + : formatProgressBar(nextIteration, maxIterations) : 'n/a (unbounded)' } |`, `| Action | ${action || 'unknown'} (${summaryReason || 'n/a'}) |`, @@ -788,6 +809,228 @@ async function markAgentRunning({ github, context, core, inputs }) { } } +/** + * Analyze commits and files changed to infer which tasks may have been completed. + * Uses keyword matching and file path analysis to suggest task completions. + * @param {object} params - Parameters + * @param {object} params.github - GitHub API client + * @param {object} params.context - GitHub Actions context + * @param {number} params.prNumber - PR number + * @param {string} params.baseSha - Base SHA to compare from + * @param {string} params.headSha - Head SHA to compare to + * @param {string} params.taskText - The raw task/acceptance text from PR body + * @param {object} [params.core] - Optional core for logging + * @returns {Promise<{matches: Array<{task: string, reason: string, confidence: string}>, summary: string}>} + */ +async function analyzeTaskCompletion({ github, context, prNumber, baseSha, headSha, taskText, core }) { + const matches = []; + const log = (msg) => core?.info?.(msg) || console.log(msg); + + if (!taskText || !baseSha || !headSha) { + return { matches, summary: 'Insufficient data for task analysis' }; + } + + // Get commits between base and head + let commits = []; + try { + const { data } = await github.rest.repos.compareCommits({ + owner: context.repo.owner, + repo: context.repo.repo, + base: baseSha, + head: headSha, + }); + commits = data.commits || []; + } catch (error) { + log(`Failed to get commits: ${error.message}`); + return { matches, summary: `Failed to analyze: ${error.message}` }; + } + + // Get files changed + let filesChanged = []; + try { + const { data } = await github.rest.pulls.listFiles({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + per_page: 100, + }); + filesChanged = data.map(f => f.filename); + } catch (error) { + log(`Failed to get files: ${error.message}`); + } + + // Parse tasks into individual items + const taskLines = taskText.split('\n') + .filter(line => /^\s*[-*+]\s*\[\s*\]/.test(line)) + .map(line => { + const match = line.match(/^\s*[-*+]\s*\[\s*\]\s*(.+)$/); + return match ? match[1].trim() : null; + }) + .filter(Boolean); + + log(`Analyzing ${commits.length} commits against ${taskLines.length} unchecked tasks`); + + // Build keyword map from commits + const commitKeywords = new Set(); + const commitMessages = commits + .map(c => c.commit.message.toLowerCase()) + .join(' '); + + // Extract meaningful words from commit messages + const words = commitMessages.match(/\b[a-z_-]{3,}\b/g) || []; + words.forEach(w => commitKeywords.add(w)); + + // Also extract from file paths + filesChanged.forEach(f => { + const parts = f.toLowerCase().replace(/[^a-z0-9_/-]/g, ' ').split(/[\s/]+/); + parts.forEach(p => p.length > 2 && commitKeywords.add(p)); + }); + + // Match tasks to commits/files + for (const task of taskLines) { + const taskLower = task.toLowerCase(); + const taskWords = taskLower.match(/\b[a-z_-]{3,}\b/g) || []; + + // Calculate overlap score + const matchingWords = taskWords.filter(w => commitKeywords.has(w)); + const score = taskWords.length > 0 ? matchingWords.length / taskWords.length : 0; + + // Check for specific file mentions + const fileMatch = filesChanged.some(f => { + const fLower = f.toLowerCase(); + return taskWords.some(w => fLower.includes(w)); + }); + + // Check for specific commit message matches + const commitMatch = commits.some(c => { + const msg = c.commit.message.toLowerCase(); + return taskWords.some(w => w.length > 4 && msg.includes(w)); + }); + + let confidence = 'low'; + let reason = ''; + + if (score >= 0.5 && (fileMatch || commitMatch)) { + confidence = 'high'; + reason = `${Math.round(score * 100)}% keyword match, ${fileMatch ? 'file match' : 'commit match'}`; + matches.push({ task, reason, confidence }); + } else if (score >= 0.3 || fileMatch) { + confidence = 'medium'; + reason = `${Math.round(score * 100)}% keyword match${fileMatch ? ', file touched' : ''}`; + matches.push({ task, reason, confidence }); + } + } + + const summary = matches.length > 0 + ? `Found ${matches.length} potential task completion(s): ${matches.filter(m => m.confidence === 'high').length} high, ${matches.filter(m => m.confidence === 'medium').length} medium confidence` + : 'No clear task matches found in commits'; + + log(summary); + return { matches, summary }; +} + +/** + * Auto-reconcile task checkboxes in PR body based on commit analysis. + * Updates the PR body to check off tasks that appear to be completed. + * @param {object} params - Parameters + * @param {object} params.github - GitHub API client + * @param {object} params.context - GitHub Actions context + * @param {number} params.prNumber - PR number + * @param {string} params.baseSha - Base SHA (before agent work) + * @param {string} params.headSha - Head SHA (after agent work) + * @param {object} [params.core] - Optional core for logging + * @returns {Promise<{updated: boolean, tasksChecked: number, details: string}>} + */ +async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha, core }) { + const log = (msg) => core?.info?.(msg) || console.log(msg); + + // Get current PR body + let pr; + try { + const { data } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + }); + pr = data; + } catch (error) { + log(`Failed to get PR: ${error.message}`); + return { updated: false, tasksChecked: 0, details: `Failed to get PR: ${error.message}` }; + } + + const sections = parseScopeTasksAcceptanceSections(pr.body || ''); + const taskText = [sections.tasks, sections.acceptance].filter(Boolean).join('\n'); + + if (!taskText) { + return { updated: false, tasksChecked: 0, details: 'No tasks found in PR body' }; + } + + // Analyze what tasks may have been completed + const analysis = await analyzeTaskCompletion({ + github, context, prNumber, baseSha, headSha, taskText, core + }); + + // Only auto-check high-confidence matches + const highConfidence = analysis.matches.filter(m => m.confidence === 'high'); + + if (highConfidence.length === 0) { + log('No high-confidence task matches to auto-check'); + return { + updated: false, + tasksChecked: 0, + details: analysis.summary + ' (no high-confidence matches for auto-check)' + }; + } + + // Update PR body to check off matched tasks + let updatedBody = pr.body; + let checkedCount = 0; + + for (const match of highConfidence) { + // Escape special regex characters in task text + const escaped = match.task.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const pattern = new RegExp(`([-*+]\\s*)\\[\\s*\\](\\s*${escaped})`, 'i'); + + if (pattern.test(updatedBody)) { + updatedBody = updatedBody.replace(pattern, '$1[x]$2'); + checkedCount++; + log(`Auto-checked task: ${match.task.slice(0, 50)}... (${match.reason})`); + } + } + + if (checkedCount === 0) { + return { + updated: false, + tasksChecked: 0, + details: 'Tasks matched but patterns not found in body' + }; + } + + // Update the PR body + try { + await github.rest.pulls.update({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + body: updatedBody, + }); + log(`Updated PR body, checked ${checkedCount} task(s)`); + } catch (error) { + log(`Failed to update PR body: ${error.message}`); + return { + updated: false, + tasksChecked: 0, + details: `Failed to update PR: ${error.message}` + }; + } + + return { + updated: true, + tasksChecked: checkedCount, + details: `Auto-checked ${checkedCount} task(s): ${highConfidence.map(m => m.task.slice(0, 30) + '...').join(', ')}` + }; +} + module.exports = { countCheckboxes, parseConfig, @@ -795,4 +1038,6 @@ module.exports = { evaluateKeepaliveLoop, markAgentRunning, updateKeepaliveLoopSummary, + analyzeTaskCompletion, + autoReconcileTasks, }; diff --git a/.github/scripts/keepalive_state.js b/.github/scripts/keepalive_state.js index 594d28c65..12d205956 100644 --- a/.github/scripts/keepalive_state.js +++ b/.github/scripts/keepalive_state.js @@ -61,6 +61,23 @@ function formatStateComment(data) { return ``; } +function upsertStateCommentBody(body, stateComment) { + const existing = String(body ?? ''); + const marker = String(stateComment ?? '').trim(); + if (!marker) { + return existing; + } + if (!existing.trim()) { + return marker; + } + if (STATE_REGEX.test(existing)) { + return existing.replace(STATE_REGEX, () => marker); + } + const trimmed = existing.trimEnd(); + const separator = trimmed ? '\n\n' : ''; + return `${trimmed}${separator}${marker}`; +} + async function listAllComments({ github, owner, repo, prNumber }) { if (!github?.paginate || !github?.rest?.issues?.listComments) { return []; @@ -127,6 +144,7 @@ async function createKeepaliveStateManager({ github, context, prNumber, trace, r let state = existing?.state && typeof existing.state === 'object' ? { ...existing.state } : {}; let commentId = existing?.comment?.id ? Number(existing.comment.id) : 0; let commentUrl = existing?.comment?.html_url || ''; + let commentBody = existing?.comment?.body || ''; const ensureDefaults = () => { if (trace && normalise(state.trace) !== trace) { @@ -149,12 +167,29 @@ async function createKeepaliveStateManager({ github, context, prNumber, trace, r const body = formatStateComment(state); if (commentId) { + let latestBody = commentBody; + if (github?.rest?.issues?.getComment) { + try { + const response = await github.rest.issues.getComment({ + owner, + repo, + comment_id: commentId, + }); + if (response?.data?.body) { + latestBody = response.data.body; + } + } catch (error) { + // fall back to cached body if lookup fails + } + } + const updatedBody = upsertStateCommentBody(latestBody, body); await github.rest.issues.updateComment({ owner, repo, comment_id: commentId, - body, + body: updatedBody, }); + commentBody = updatedBody; } else { const { data } = await github.rest.issues.createComment({ owner, @@ -164,6 +199,7 @@ async function createKeepaliveStateManager({ github, context, prNumber, trace, r }); commentId = data?.id ? Number(data.id) : 0; commentUrl = data?.html_url || ''; + commentBody = body; } return { state: { ...state }, commentId, commentUrl }; @@ -205,5 +241,6 @@ module.exports = { loadKeepaliveState, parseStateComment, formatStateComment, + upsertStateCommentBody, deepMerge, }; diff --git a/.github/workflows/agents-keepalive-loop.yml b/.github/workflows/agents-keepalive-loop.yml index 0a39b5359..d259ca61a 100644 --- a/.github/workflows/agents-keepalive-loop.yml +++ b/.github/workflows/agents-keepalive-loop.yml @@ -25,6 +25,7 @@ jobs: outputs: pr_number: ${{ steps.evaluate.outputs.pr_number }} pr_ref: ${{ steps.evaluate.outputs.pr_ref }} + head_sha: ${{ steps.evaluate.outputs.head_sha }} action: ${{ steps.evaluate.outputs.action }} reason: ${{ steps.evaluate.outputs.reason }} gate_conclusion: ${{ steps.evaluate.outputs.gate_conclusion }} @@ -111,6 +112,7 @@ jobs: const output = { pr_number: String(result.prNumber || ''), pr_ref: String(result.prRef || ''), + head_sha: String(result.headSha || ''), action: result.action || '', reason: result.reason || '', gate_conclusion: result.gateConclusion || '', @@ -319,6 +321,41 @@ jobs: retention-days: 30 if-no-files-found: error + - name: Auto-reconcile task checkboxes + if: needs.run-codex.outputs.changes-made == 'true' + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { autoReconcileTasks } = require('./.github/scripts/keepalive_loop.js'); + + const prNumber = Number('${{ needs.evaluate.outputs.pr_number }}') || 0; + const beforeSha = '${{ needs.evaluate.outputs.head_sha }}'; // SHA before agent ran + const headSha = '${{ needs.run-codex.outputs.commit-sha }}'; // SHA after agent ran + + if (!prNumber || !beforeSha || !headSha) { + core.info('Missing required inputs for task reconciliation'); + return; + } + + core.info(`Auto-reconciling tasks for PR #${prNumber}`); + core.info(`Comparing ${beforeSha.slice(0, 7)} → ${headSha.slice(0, 7)}`); + + const result = await autoReconcileTasks({ + github, context, prNumber, baseSha: beforeSha, headSha, core + }); + + if (result.updated) { + core.info(`✅ ${result.details}`); + core.notice(`Auto-checked ${result.tasksChecked} task(s) based on commit analysis`); + } else { + core.info(`ℹ️ ${result.details}`); + } + + // Output for step summary + core.setOutput('tasks_checked', result.tasksChecked); + core.setOutput('reconciliation_details', result.details); + - name: Update summary comment uses: actions/github-script@v7 env: diff --git a/.github/workflows/agents-verifier.yml b/.github/workflows/agents-verifier.yml index 9a163324a..d68475cc5 100644 --- a/.github/workflows/agents-verifier.yml +++ b/.github/workflows/agents-verifier.yml @@ -65,12 +65,22 @@ jobs: if: steps.context.outputs.should_run == 'true' env: CODEX_AUTH_JSON: ${{ secrets.CODEX_AUTH_JSON }} + CODEX_HOME: ${{ runner.temp }}/.codex-verifier run: | set -euo pipefail + + # Setup auth in default location mkdir -p ~/.codex echo "$CODEX_AUTH_JSON" > ~/.codex/auth.json chmod 600 ~/.codex/auth.json + + # Also setup in CODEX_HOME for the action + mkdir -p "$CODEX_HOME" + cp ~/.codex/auth.json "$CODEX_HOME/auth.json" + chmod 600 "$CODEX_HOME/auth.json" + echo "Codex auth configured from CODEX_AUTH_JSON secret" + echo "Auth files created at ~/.codex/auth.json and $CODEX_HOME/auth.json" # Check token expiration python3 << 'PYEOF' @@ -149,13 +159,18 @@ jobs: id: codex if: steps.context.outputs.should_run == 'true' uses: openai/codex-action@v1 + env: + CODEX_HOME: ${{ runner.temp }}/.codex-verifier with: # Auth is pre-configured via ~/.codex/auth.json from CODEX_AUTH_JSON secret - openai-api-key: 'auth-via-codex-auth-json' + # When openai-api-key is empty, codex-action skips the proxy and uses auth.json directly + codex-home: ${{ runner.temp }}/.codex-verifier prompt-file: ${{ steps.prepare.outputs.prompt_file }} output-file: codex-output.md sandbox: read-only - safety-strategy: drop-sudo + # Use 'unsafe' since we're not using the API proxy (no key to protect) + # The sandbox: read-only already limits Codex's capabilities + safety-strategy: unsafe - name: Parse verifier verdict id: verdict diff --git a/codex-output.md b/codex-output.md index 4b78718f0..712274c75 100644 --- a/codex-output.md +++ b/codex-output.md @@ -1,13 +1,11 @@ -Implemented gate-summary suppression for agent-labeled PRs by short‑circuiting comment upserts when an `agent:*` label is present, and added coverage for that behavior. Updated the task appendix to reflect the verified Conditional Status Summary and keepalive summary work. +Updated keepalive state updates to preserve any existing summary content by replacing or appending the `keepalive-state` marker instead of overwriting the whole comment, so the summary stays the single source of truth. This is implemented in `.github/scripts/keepalive_state.js` and verified with new/updated coverage in `.github/scripts/__tests__/keepalive-state.test.js` to ensure summary text survives state updates. -Details -- Gate summary skip logic lives in `.github/scripts/comment-dedupe.js`; it now checks PR labels before upserting gate summaries. -- Added a unit test in `.github/scripts/__tests__/comment-dedupe.test.js` to ensure no comment is created/updated when an `agent:*` label is present. -- Checked off verified tasks and updated progress in `codex-prompt.md`. +Tests run: +- `node --test .github/scripts/__tests__/keepalive-state.test.js .github/scripts/__tests__/keepalive-loop.test.js .github/scripts/__tests__/agents-pr-meta-update-body.test.js .github/scripts/__tests__/comment-dedupe.test.js` -Tests -- `node --test .github/scripts/__tests__/comment-dedupe.test.js .github/scripts/__tests__/agents-pr-meta-update-body.test.js .github/scripts/__tests__/keepalive-loop.test.js` +Notes: +- Workflow-file changes (e.g., passing `agent_type` in `agents-pr-meta-v4.yml` or any other `.github/workflows/**` edits) still require a human with `agent-high-privilege`. -Next steps -1. Run the pipeline validation steps on a real PR with `agent:codex` to verify keepalive updates and prompt appendix visibility. -2. Apply the remaining workflow‑file changes (e.g., `agents-pr-meta` and gate summary gating) via a human‑approved edit, since `.github/workflows` is protected. \ No newline at end of file +If you want next steps, I can: +1) Add a unit test for CLI label suppression of keepalive instruction comments. +2) Outline the exact workflow diffs needed for the remaining workflow-only tasks.