diff --git a/.github/scripts/agents-guard.js b/.github/scripts/agents-guard.js index fddae9c0..0fc2971e 100644 --- a/.github/scripts/agents-guard.js +++ b/.github/scripts/agents-guard.js @@ -447,7 +447,9 @@ function evaluateGuard({ const hasCodeownerApproval = hasExternalApproval || authorIsCodeowner; const hasProtectedChanges = modifiedProtectedPaths.size > 0; - const needsApproval = hasProtectedChanges && !hasCodeownerApproval; + // Allow label to bypass approval for automated PRs (dependabot, renovate) + const isAutomatedPR = normalizedAuthor && (normalizedAuthor === 'dependabot[bot]' || normalizedAuthor === 'renovate[bot]'); + const needsApproval = hasProtectedChanges && !hasCodeownerApproval && !(hasAllowLabel && isAutomatedPR); const needsLabel = hasProtectedChanges && !hasAllowLabel && !hasCodeownerApproval; const failureReasons = []; diff --git a/.github/scripts/error_classifier.js b/.github/scripts/error_classifier.js index b37208e3..8bf50b4d 100644 --- a/.github/scripts/error_classifier.js +++ b/.github/scripts/error_classifier.js @@ -41,6 +41,13 @@ const TRANSIENT_PATTERNS = [ 'bad gateway', 'gateway timeout', 'eai_again', + // Git workspace state issues - agent encountered unexpected changes + 'unexpected changes', + 'untracked', + '.workflows-lib is modified', + 'codex-session', + 'existing changes', + 'how would you like me to proceed', ]; const AUTH_PATTERNS = [ diff --git a/.github/scripts/keepalive_instruction_template.js b/.github/scripts/keepalive_instruction_template.js index 52186301..55f0ee91 100644 --- a/.github/scripts/keepalive_instruction_template.js +++ b/.github/scripts/keepalive_instruction_template.js @@ -2,63 +2,109 @@ const fs = require('fs'); const path = require('path'); +const { resolvePromptMode } = require('./keepalive_prompt_routing'); /** - * Path to the canonical keepalive instruction template. - * Edit .github/templates/keepalive-instruction.md to change the instruction text. + * Path to the fallback keepalive instruction template. + * Edit .github/templates/keepalive-instruction.md to change the fallback text. */ const TEMPLATE_PATH = path.resolve(__dirname, '../templates/keepalive-instruction.md'); +const NEXT_TASK_TEMPLATE_PATH = path.resolve(__dirname, '../codex/prompts/keepalive_next_task.md'); +const FIX_TEMPLATE_PATH = path.resolve(__dirname, '../codex/prompts/fix_ci_failures.md'); +const VERIFY_TEMPLATE_PATH = path.resolve(__dirname, '../codex/prompts/verifier_acceptance_check.md'); + +const TEMPLATE_PATHS = { + normal: NEXT_TASK_TEMPLATE_PATH, + fix_ci: FIX_TEMPLATE_PATH, + verify: VERIFY_TEMPLATE_PATH, +}; /** * Cached instruction text (loaded once per process). - * @type {string|null} + * @type {Map} */ -let cachedInstruction = null; +const instructionCache = new Map(); -/** - * Returns the canonical keepalive instruction directive text. - * The text is loaded from .github/templates/keepalive-instruction.md. - * - * @returns {string} The instruction directive (without @agent prefix) - */ -function getKeepaliveInstruction() { - if (cachedInstruction !== null) { - return cachedInstruction; +function normalise(value) { + return String(value ?? '').trim(); +} + +function resolveTemplatePath({ templatePath, mode, action, reason, scenario } = {}) { + const explicit = normalise(templatePath); + if (explicit) { + return { mode: 'custom', path: explicit }; + } + const resolvedMode = resolvePromptMode({ mode, action, reason, scenario }); + return { mode: resolvedMode, path: TEMPLATE_PATHS[resolvedMode] || TEMPLATE_PATH }; +} + +function getFallbackInstruction() { + return [ + 'Your objective is to satisfy the **Acceptance Criteria** by completing each **Task** within the defined **Scope**.', + '', + '**This round you MUST:**', + '1. Implement actual code or test changes that advance at least one incomplete task toward acceptance.', + '2. Commit meaningful source code (.py, .yml, .js, etc.)—not just status/docs updates.', + '3. **UPDATE THE CHECKBOXES** in the Tasks and Acceptance Criteria sections below to mark completed items.', + '4. Change `- [ ]` to `- [x]` for items you have completed and verified.', + '', + '**CRITICAL - Checkbox Updates:**', + 'When you complete a task or acceptance criterion, update its checkbox directly in this prompt file.', + 'Change the `[ ]` to `[x]` for completed items. The automation will read these checkboxes and update the PR status summary.', + '', + '**Example:**', + 'Before: `- [ ] Add validation for user input`', + 'After: `- [x] Add validation for user input`', + '', + '**DO NOT:**', + '- Commit only status files, markdown summaries, or documentation when tasks require code.', + '- Mark checkboxes complete without actually implementing and verifying the work.', + '- Close the round without source-code changes when acceptance criteria require them.', + '- Change the text of checkboxes—only change `[ ]` to `[x]`.', + '', + 'Review the Scope/Tasks/Acceptance below, identify the next incomplete task that requires code, implement it, then **update the checkboxes** to mark completed items.', + ].join('\n'); +} + +function loadInstruction(templatePath, { allowDefaultFallback = true } = {}) { + const resolvedPath = templatePath || TEMPLATE_PATH; + if (instructionCache.has(resolvedPath)) { + return instructionCache.get(resolvedPath); } + let content = ''; try { - cachedInstruction = fs.readFileSync(TEMPLATE_PATH, 'utf8').trim(); + content = fs.readFileSync(resolvedPath, 'utf8').trim(); } catch (err) { - // Fallback if template file is missing - console.warn(`Warning: Could not load keepalive instruction template from ${TEMPLATE_PATH}: ${err.message}`); - cachedInstruction = [ - 'Your objective is to satisfy the **Acceptance Criteria** by completing each **Task** within the defined **Scope**.', - '', - '**This round you MUST:**', - '1. Implement actual code or test changes that advance at least one incomplete task toward acceptance.', - '2. Commit meaningful source code (.py, .yml, .js, etc.)—not just status/docs updates.', - '3. **UPDATE THE CHECKBOXES** in the Tasks and Acceptance Criteria sections below to mark completed items.', - '4. Change `- [ ]` to `- [x]` for items you have completed and verified.', - '', - '**CRITICAL - Checkbox Updates:**', - 'When you complete a task or acceptance criterion, update its checkbox directly in this prompt file.', - 'Change the `[ ]` to `[x]` for completed items. The automation will read these checkboxes and update the PR status summary.', - '', - '**Example:**', - 'Before: `- [ ] Add validation for user input`', - 'After: `- [x] Add validation for user input`', - '', - '**DO NOT:**', - '- Commit only status files, markdown summaries, or documentation when tasks require code.', - '- Mark checkboxes complete without actually implementing and verifying the work.', - '- Close the round without source-code changes when acceptance criteria require them.', - '- Change the text of checkboxes—only change `[ ]` to `[x]`.', - '', - 'Review the Scope/Tasks/Acceptance below, identify the next incomplete task that requires code, implement it, then **update the checkboxes** to mark completed items.', - ].join('\n'); + if (allowDefaultFallback && resolvedPath !== TEMPLATE_PATH) { + try { + content = fs.readFileSync(TEMPLATE_PATH, 'utf8').trim(); + } catch (fallbackError) { + console.warn( + `Warning: Could not load keepalive instruction template from ${resolvedPath}: ${fallbackError.message}` + ); + content = getFallbackInstruction(); + } + } else { + console.warn(`Warning: Could not load keepalive instruction template from ${resolvedPath}: ${err.message}`); + content = getFallbackInstruction(); + } } - return cachedInstruction; + instructionCache.set(resolvedPath, content); + return content; +} + +/** + * Returns the canonical keepalive instruction directive text. + * The text is loaded from .github/templates/keepalive-instruction.md. + * + * @returns {string} The instruction directive (without @agent prefix) + */ +function getKeepaliveInstruction(options = {}) { + const params = options && typeof options === 'object' ? options : {}; + const resolved = resolveTemplatePath(params); + return loadInstruction(resolved.path, { allowDefaultFallback: true }); } /** @@ -67,20 +113,31 @@ function getKeepaliveInstruction() { * @param {string} [agent='codex'] - The agent alias to mention * @returns {string} The full instruction with @agent prefix */ -function getKeepaliveInstructionWithMention(agent = 'codex') { - const alias = String(agent || '').trim() || 'codex'; - return `@${alias} ${getKeepaliveInstruction()}`; +function getKeepaliveInstructionWithMention(agent = 'codex', options = {}) { + let resolvedAgent = agent; + let params = options; + + if (agent && typeof agent === 'object') { + params = agent; + resolvedAgent = params.agent; + } + + const alias = String(resolvedAgent || '').trim() || 'codex'; + return `@${alias} ${getKeepaliveInstruction(params)}`; } /** * Clears the cached instruction (useful for testing). */ function clearCache() { - cachedInstruction = null; + instructionCache.clear(); } module.exports = { TEMPLATE_PATH, + NEXT_TASK_TEMPLATE_PATH, + FIX_TEMPLATE_PATH, + VERIFY_TEMPLATE_PATH, getKeepaliveInstruction, getKeepaliveInstructionWithMention, clearCache, diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js index d9b9e30e..cdce7012 100644 --- a/.github/scripts/keepalive_loop.js +++ b/.github/scripts/keepalive_loop.js @@ -5,13 +5,37 @@ const path = require('path'); const { parseScopeTasksAcceptanceSections } = require('./issue_scope_parser'); const { loadKeepaliveState, formatStateComment } = require('./keepalive_state'); +const { resolvePromptMode } = require('./keepalive_prompt_routing'); const { classifyError, ERROR_CATEGORIES } = require('./error_classifier'); const { formatFailureComment } = require('./failure_comment_formatter'); +const ATTEMPT_HISTORY_LIMIT = 5; +const ATTEMPTED_TASK_LIMIT = 6; + +const PROMPT_ROUTES = { + fix_ci: { + mode: 'fix_ci', + file: '.github/codex/prompts/fix_ci_failures.md', + }, + verify: { + mode: 'verify', + file: '.github/codex/prompts/verifier_acceptance_check.md', + }, + normal: { + mode: 'normal', + file: '.github/codex/prompts/keepalive_next_task.md', + }, +}; + function normalise(value) { return String(value ?? '').trim(); } +function resolvePromptRouting({ scenario, mode, action, reason } = {}) { + const resolvedMode = resolvePromptMode({ scenario, mode, action, reason }); + return PROMPT_ROUTES[resolvedMode] || PROMPT_ROUTES.normal; +} + function toBool(value, defaultValue = false) { const raw = normalise(value); if (!raw) return Boolean(defaultValue); @@ -54,6 +78,120 @@ function toOptionalNumber(value) { return null; } +function buildAttemptEntry({ + iteration, + action, + reason, + runResult, + promptMode, + promptFile, + gateConclusion, + errorCategory, + errorType, +}) { + const actionValue = normalise(action) || 'unknown'; + const reasonValue = normalise(reason) || actionValue; + const entry = { + iteration: Math.max(0, toNumber(iteration, 0)), + action: actionValue, + reason: reasonValue, + }; + + if (runResult) { + entry.run_result = normalise(runResult); + } + if (promptMode) { + entry.prompt_mode = normalise(promptMode); + } + if (promptFile) { + entry.prompt_file = normalise(promptFile); + } + if (gateConclusion) { + entry.gate = normalise(gateConclusion); + } + if (errorCategory) { + entry.error_category = normalise(errorCategory); + } + if (errorType) { + entry.error_type = normalise(errorType); + } + + return entry; +} + +function updateAttemptHistory(existing, nextEntry, limit = ATTEMPT_HISTORY_LIMIT) { + const history = Array.isArray(existing) + ? existing.filter((item) => item && typeof item === 'object') + : []; + if (!nextEntry || typeof nextEntry !== 'object') { + return history.slice(-limit); + } + const trimmed = history.slice(-limit); + const last = trimmed[trimmed.length - 1]; + if ( + last && + last.iteration === nextEntry.iteration && + last.action === nextEntry.action && + last.reason === nextEntry.reason + ) { + return [...trimmed.slice(0, -1), { ...last, ...nextEntry }]; + } + return [...trimmed, nextEntry].slice(-limit); +} + +function normaliseTaskText(value) { + return String(value ?? '').replace(/\s+/g, ' ').trim(); +} + +function normaliseTaskKey(value) { + return normaliseTaskText(value).toLowerCase(); +} + +function normaliseAttemptedTasks(value) { + if (!Array.isArray(value)) { + return []; + } + const entries = []; + value.forEach((entry) => { + if (typeof entry === 'string') { + const task = normaliseTaskText(entry); + if (task) { + entries.push({ task, key: normaliseTaskKey(task) }); + } + return; + } + if (entry && typeof entry === 'object') { + const task = normaliseTaskText(entry.task || entry.text || ''); + if (!task) { + return; + } + entries.push({ + ...entry, + task, + key: normaliseTaskKey(entry.key || task), + }); + } + }); + return entries; +} + +function updateAttemptedTasks(existing, nextTask, iteration, limit = ATTEMPTED_TASK_LIMIT) { + const history = normaliseAttemptedTasks(existing); + const taskText = normaliseTaskText(nextTask); + if (!taskText) { + return history.slice(-limit); + } + const key = normaliseTaskKey(taskText); + const trimmed = history.filter((entry) => entry.key !== key).slice(-limit); + const entry = { + task: taskText, + key, + iteration: Math.max(0, toNumber(iteration, 0)), + timestamp: new Date().toISOString(), + }; + return [...trimmed, entry].slice(-limit); +} + function resolveDurationMs({ durationMs, startTs }) { if (Number.isFinite(durationMs)) { return Math.max(0, Math.floor(durationMs)); @@ -231,6 +369,27 @@ function classifyFailureDetails({ action, runResult, summaryReason, agentExitCod let category = errorInfo.category; const isGateCancelled = summaryReason.startsWith('gate-cancelled'); + // If the agent runner reports failure with exit code 0, that strongly suggests + // an infrastructure/control-plane hiccup rather than a code/tool failure. + if (runFailed && summaryReason === 'agent-run-failed' && (!agentExitCode || agentExitCode === '0')) { + category = ERROR_CATEGORIES.transient; + } + + // Detect dirty git state issues - agent saw unexpected changes before starting. + // These are typically workflow artifacts (.workflows-lib, codex-session-*.jsonl) + // that should have been cleaned up but weren't. Classify as transient. + const dirtyGitPatterns = [ + /unexpected\s*changes/i, + /\.workflows-lib.*modified/i, + /codex-session.*untracked/i, + /existing\s*changes/i, + /how\s*would\s*you\s*like\s*me\s*to\s*proceed/i, + /before\s*making\s*edits/i, + ]; + if (dirtyGitPatterns.some(pattern => pattern.test(message))) { + category = ERROR_CATEGORIES.transient; + } + if (runFailed && (runResult === 'cancelled' || runResult === 'skipped')) { category = ERROR_CATEGORIES.transient; } @@ -279,6 +438,21 @@ function extractSourceSection(body) { return null; } +function extractChecklistItems(markdown) { + const items = []; + const content = String(markdown || ''); + const regex = /(?:^|\n)\s*(?:[-*+]|\d+[.)])\s*\[( |x|X)\]\s*(.+)/g; + let match; + while ((match = regex.exec(content)) !== null) { + const checked = (match[1] || '').toLowerCase() === 'x'; + const text = normaliseTaskText(match[2] || ''); + if (text) { + items.push({ text, checked }); + } + } + return items; +} + /** * Build the task appendix that gets passed to the agent prompt. * This provides explicit, structured tasks and acceptance criteria. @@ -334,6 +508,29 @@ function buildTaskAppendix(sections, checkboxCounts, state = {}, options = {}) { lines.push(sections.acceptance); lines.push(''); } + + const attemptedTasks = normaliseAttemptedTasks(state?.attempted_tasks); + const candidateSource = sections?.tasks || sections?.acceptance || ''; + const taskItems = extractChecklistItems(candidateSource); + const unchecked = taskItems.filter((item) => !item.checked); + const attemptedKeys = new Set(attemptedTasks.map((entry) => entry.key)); + const suggested = unchecked.find((item) => !attemptedKeys.has(normaliseTaskKey(item.text))) || unchecked[0]; + + if (attemptedTasks.length > 0) { + lines.push('### Recently Attempted Tasks'); + lines.push('Avoid repeating these unless a task needs explicit follow-up:'); + lines.push(''); + attemptedTasks.slice(-3).forEach((entry) => { + lines.push(`- ${entry.task}`); + }); + lines.push(''); + } + + if (suggested?.text) { + lines.push('### Suggested Next Task'); + lines.push(`- ${suggested.text}`); + lines.push(''); + } // Add Source section if PR body contains links to parent issues/PRs if (options.prBody) { @@ -352,6 +549,25 @@ function buildTaskAppendix(sections, checkboxCounts, state = {}, options = {}) { return lines.join('\n'); } +async function fetchPrBody({ github, context, prNumber, core }) { + if (!github?.rest?.pulls?.get || !context?.repo?.owner || !context?.repo?.repo) { + return ''; + } + try { + const { data } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + }); + return String(data?.body || ''); + } catch (error) { + if (core) { + core.info(`Failed to fetch PR body for task focus: ${error.message}`); + } + return ''; + } +} + function extractConfigSnippet(body) { const source = String(body || ''); if (!source.trim()) { @@ -428,6 +644,9 @@ function parseConfigFromSnippet(snippet) { function normaliseConfig(config = {}) { const cfg = config && typeof config === 'object' ? config : {}; const trace = normalise(cfg.trace || cfg.keepalive_trace); + const promptMode = normalise(cfg.prompt_mode ?? cfg.promptMode); + const promptFile = normalise(cfg.prompt_file ?? cfg.promptFile); + const promptScenario = normalise(cfg.prompt_scenario ?? cfg.promptScenario); return { keepalive_enabled: toBool( cfg.keepalive_enabled ?? cfg.enable_keepalive ?? cfg.keepalive, @@ -438,6 +657,9 @@ function normaliseConfig(config = {}) { max_iterations: toNumber(cfg.max_iterations ?? cfg.keepalive_max_iterations, 5), failure_threshold: toNumber(cfg.failure_threshold ?? cfg.keepalive_failure_threshold, 3), trace, + prompt_mode: promptMode, + prompt_file: promptFile, + prompt_scenario: promptScenario, }; } @@ -866,6 +1088,9 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP let action = 'wait'; let reason = 'pending'; + const verificationStatus = normalise(state?.verification?.status); + const verificationDone = ['done', 'verified', 'complete'].includes(verificationStatus.toLowerCase()); + const needsVerification = allComplete && !verificationDone; if (!hasAgentLabel) { action = 'wait'; @@ -876,16 +1101,6 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP } else if (!tasksPresent) { action = 'stop'; reason = 'no-checklists'; - } else if (allComplete) { - action = 'stop'; - reason = 'tasks-complete'; - } else if (shouldStopEarly) { - // Evidence-based early stopping: diminishing returns detected - action = 'stop'; - reason = 'diminishing-returns'; - } else if (shouldStopForMaxIterations) { - action = 'stop'; - reason = isProductive ? 'max-iterations' : 'max-iterations-unproductive'; } else if (gateNormalized !== 'success') { if (gateNormalized === 'cancelled') { gateRateLimit = await detectRateLimitCancellation({ @@ -919,16 +1134,37 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP reason = gateNormalized ? 'gate-not-success' : 'gate-pending'; } } + } else if (allComplete) { + if (needsVerification) { + action = 'run'; + reason = 'verify-acceptance'; + } else { + action = 'stop'; + reason = 'tasks-complete'; + } + } else if (shouldStopEarly) { + // Evidence-based early stopping: diminishing returns detected + action = 'stop'; + reason = 'diminishing-returns'; + } else if (shouldStopForMaxIterations) { + action = 'stop'; + reason = isProductive ? 'max-iterations' : 'max-iterations-unproductive'; } else if (tasksRemaining) { action = 'run'; reason = iteration >= maxIterations ? 'ready-extended' : 'ready'; } - // Determine prompt mode based on action - const promptMode = action === 'fix' ? 'fix_ci' : 'normal'; - const promptFile = action === 'fix' - ? '.github/codex/prompts/fix_ci_failures.md' - : '.github/codex/prompts/keepalive_next_task.md'; + const promptScenario = normalise(config.prompt_scenario); + const promptModeOverride = normalise(config.prompt_mode); + const promptFileOverride = normalise(config.prompt_file); + const promptRoute = resolvePromptRouting({ + scenario: promptScenario, + mode: promptModeOverride, + action, + reason, + }); + const promptMode = promptModeOverride || promptRoute.mode; + const promptFile = promptFileOverride || promptRoute.file; return { prNumber, @@ -982,6 +1218,17 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { const agentFilesChanged = toNumber(inputs.agent_files_changed ?? inputs.agentFilesChanged ?? inputs.codex_files_changed ?? inputs.codexFilesChanged, 0); const agentSummary = normalise(inputs.agent_summary ?? inputs.agentSummary ?? inputs.codex_summary ?? inputs.codexSummary); const runUrl = normalise(inputs.run_url ?? inputs.runUrl); + const promptModeInput = normalise(inputs.prompt_mode ?? inputs.promptMode); + const promptFileInput = normalise(inputs.prompt_file ?? inputs.promptFile); + const promptScenarioInput = normalise(inputs.prompt_scenario ?? inputs.promptScenario); + const promptRoute = resolvePromptRouting({ + scenario: promptScenarioInput, + mode: promptModeInput, + action, + reason, + }); + const promptMode = promptModeInput || promptRoute.mode; + const promptFile = promptFileInput || promptRoute.file; // LLM task analysis details const llmProvider = normalise(inputs.llm_provider ?? inputs.llmProvider); @@ -1003,6 +1250,12 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { trace: stateTrace, }); const previousFailure = previousState?.failure || {}; + const prBody = await fetchPrBody({ github, context, prNumber, core }); + const focusSections = prBody ? normaliseChecklistSections(parseScopeTasksAcceptanceSections(prBody)) : {}; + const focusItems = extractChecklistItems(focusSections.tasks || focusSections.acceptance || ''); + const focusUnchecked = focusItems.filter((item) => !item.checked); + const currentFocus = normaliseTaskText(previousState?.current_focus || ''); + const fallbackFocus = focusUnchecked[0]?.text || ''; // Use the iteration from the CURRENT persisted state, not the stale value from evaluate. // This prevents race conditions where another run updated state between evaluate and summary. @@ -1026,12 +1279,12 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { agentExitCode, agentSummary, }); - const runFailed = action === 'run' && runResult && runResult !== 'success'; - const isTransientFailure = + const runFailed = action === 'run' && runResult && - runResult !== 'success' && - transientDetails.category === ERROR_CATEGORIES.transient; + !['success', 'skipped', 'cancelled'].includes(runResult); + const isTransientFailure = + runFailed && transientDetails.category === ERROR_CATEGORIES.transient; const waitLikeAction = action === 'wait' || action === 'defer'; const waitIsTransientReason = [ 'gate-pending', @@ -1059,7 +1312,16 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { nextIteration = currentIteration + 1; failure = {}; } else if (runResult) { - if (isTransientFailure) { + // If the job was skipped/cancelled, it usually means the workflow condition + // prevented execution (e.g. gate not ready, label missing, concurrency). + // Don't treat this as an agent failure. + if (runResult === 'skipped') { + failure = {}; + summaryReason = 'agent-run-skipped'; + } else if (runResult === 'cancelled') { + failure = {}; + summaryReason = 'agent-run-cancelled'; + } else if (isTransientFailure) { failure = {}; summaryReason = 'agent-run-transient'; } else { @@ -1210,6 +1472,20 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { if (agentCommitSha) { summaryLines.push(`| Commit | [\`${agentCommitSha.slice(0, 7)}\`](../commit/${agentCommitSha}) |`); } + } else if (runResult === 'skipped') { + summaryLines.push( + `| Result | Value |`, + `|--------|-------|`, + `| Status | ⏭️ Skipped |`, + `| Reason | ${summaryReason || 'agent-run-skipped'} |`, + ); + } else if (runResult === 'cancelled') { + summaryLines.push( + `| Result | Value |`, + `|--------|-------|`, + `| Status | 🚫 Cancelled |`, + `| Reason | ${summaryReason || 'agent-run-cancelled'} |`, + ); } else { summaryLines.push( `| Result | Value |`, @@ -1371,6 +1647,27 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { ); } + const focusTask = currentFocus || fallbackFocus; + const shouldRecordAttempt = action === 'run' && reason !== 'verify-acceptance'; + let attemptedTasks = normaliseAttemptedTasks(previousState?.attempted_tasks); + if (shouldRecordAttempt && focusTask) { + attemptedTasks = updateAttemptedTasks(attemptedTasks, focusTask, metricsIteration); + } + + let verification = previousState?.verification && typeof previousState.verification === 'object' + ? { ...previousState.verification } + : {}; + if (tasksUnchecked > 0) { + verification = {}; + } else if (reason === 'verify-acceptance') { + verification = { + status: runResult === 'success' ? 'done' : 'pending', + iteration: nextIteration, + last_result: runResult || '', + updated_at: new Date().toISOString(), + }; + } + const newState = { trace: stateTrace || previousState?.trace || '', pr_number: prNumber, @@ -1392,7 +1689,22 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) { // Quality metrics for analysis validation last_effort_score: sessionEffortScore, last_data_quality: sessionDataQuality, + attempted_tasks: attemptedTasks, + last_focus: focusTask || '', + verification, }; + const attemptEntry = buildAttemptEntry({ + iteration: metricsIteration, + action, + reason: summaryReason, + runResult, + promptMode, + promptFile, + gateConclusion, + errorCategory, + errorType, + }); + newState.attempts = updateAttemptHistory(previousState?.attempts, attemptEntry); const summaryOutcome = runResult || summaryReason || action || 'unknown'; if (action === 'run' || runResult) { @@ -1501,6 +1813,13 @@ async function markAgentRunning({ github, context, core, inputs }) { prNumber, trace: stateTrace, }); + const prBody = await fetchPrBody({ github, context, prNumber, core }); + const focusSections = prBody ? normaliseChecklistSections(parseScopeTasksAcceptanceSections(prBody)) : {}; + const focusItems = extractChecklistItems(focusSections.tasks || focusSections.acceptance || ''); + const focusUnchecked = focusItems.filter((item) => !item.checked); + const attemptedTasks = normaliseAttemptedTasks(previousState?.attempted_tasks); + const attemptedKeys = new Set(attemptedTasks.map((entry) => entry.key)); + const suggestedFocus = focusUnchecked.find((item) => !attemptedKeys.has(normaliseTaskKey(item.text))) || focusUnchecked[0]; // Capitalize agent name for display const agentDisplayName = agentType.charAt(0).toUpperCase() + agentType.slice(1); @@ -1543,6 +1862,10 @@ async function markAgentRunning({ github, context, core, inputs }) { const preservedState = previousState || {}; preservedState.running = true; preservedState.running_since = new Date().toISOString(); + if (suggestedFocus?.text) { + preservedState.current_focus = suggestedFocus.text; + preservedState.current_focus_set_at = new Date().toISOString(); + } summaryLines.push('', formatStateComment(preservedState)); const body = summaryLines.join('\n'); diff --git a/.github/scripts/keepalive_prompt_routing.js b/.github/scripts/keepalive_prompt_routing.js new file mode 100644 index 00000000..8b598433 --- /dev/null +++ b/.github/scripts/keepalive_prompt_routing.js @@ -0,0 +1,75 @@ +'use strict'; + +function normalise(value) { + return String(value ?? '').trim().toLowerCase(); +} + +const FIX_SCENARIOS = new Set([ + 'ci', + 'ci-failure', + 'ci_failure', + 'fix', + 'fix-ci', + 'fix_ci', + 'fix-ci-failure', +]); + +const VERIFY_SCENARIOS = new Set([ + 'verify', + 'verification', + 'verify-acceptance', + 'acceptance', +]); + +const FEATURE_SCENARIOS = new Set([ + 'feature', + 'feature-work', + 'feature_work', + 'task', + 'next-task', + 'next_task', + 'nexttask', +]); + +const FIX_MODES = new Set(['fix', 'fix-ci', 'fix_ci', 'ci', 'ci-failure']); +const VERIFY_MODES = new Set(['verify', 'verification', 'verify-acceptance', 'acceptance']); + +function resolvePromptMode({ scenario, mode, action, reason } = {}) { + const modeValue = normalise(mode); + if (modeValue) { + if (FIX_MODES.has(modeValue)) { + return 'fix_ci'; + } + if (VERIFY_MODES.has(modeValue)) { + return 'verify'; + } + } + + const actionValue = normalise(action); + const reasonValue = normalise(reason); + if (actionValue === 'fix' || reasonValue.startsWith('fix-')) { + return 'fix_ci'; + } + if (actionValue === 'verify' || reasonValue === 'verify-acceptance') { + return 'verify'; + } + + const scenarioValue = normalise(scenario); + if (scenarioValue) { + if (FIX_SCENARIOS.has(scenarioValue)) { + return 'fix_ci'; + } + if (VERIFY_SCENARIOS.has(scenarioValue)) { + return 'verify'; + } + if (FEATURE_SCENARIOS.has(scenarioValue)) { + return 'normal'; + } + } + + return 'normal'; +} + +module.exports = { + resolvePromptMode, +};