Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions .github/scripts/__tests__/keepalive-loop.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1989,3 +1989,123 @@ test('normaliseChecklistSection preserves non-list content', () => {

assert.equal(result, expected);
});

test('updateKeepaliveLoopSummary displays LLM provider analysis details', async () => {
const existingState = formatStateComment({
trace: 'trace-llm',
iteration: 1,
max_iterations: 5,
failure_threshold: 3,
});
const github = buildGithubStub({
comments: [{ id: 77, body: existingState, html_url: 'https://example.com/77' }],
});
await updateKeepaliveLoopSummary({
github,
context: buildContext(123),
core: buildCore(),
inputs: {
prNumber: 123,
action: 'run',
runResult: 'success',
gateConclusion: 'success',
tasksTotal: 4,
tasksUnchecked: 2,
keepaliveEnabled: true,
autofixEnabled: false,
iteration: 1,
maxIterations: 5,
failureThreshold: 3,
trace: 'trace-llm',
llm_provider: 'github-models',
llm_confidence: 0.95,
llm_analysis_run: true,
},
});

assert.equal(github.actions.length, 1);
assert.equal(github.actions[0].type, 'update');
assert.match(github.actions[0].body, /### 🧠 Task Analysis/);
assert.match(github.actions[0].body, /GitHub Models \(primary\)/);
assert.match(github.actions[0].body, /Confidence \| 95%/);
});

test('updateKeepaliveLoopSummary shows fallback warning for OpenAI provider', async () => {
const existingState = formatStateComment({
trace: 'trace-openai',
iteration: 1,
max_iterations: 5,
failure_threshold: 3,
});
const github = buildGithubStub({
comments: [{ id: 78, body: existingState, html_url: 'https://example.com/78' }],
});
await updateKeepaliveLoopSummary({
github,
context: buildContext(123),
core: buildCore(),
inputs: {
prNumber: 123,
action: 'run',
runResult: 'success',
gateConclusion: 'success',
tasksTotal: 4,
tasksUnchecked: 2,
keepaliveEnabled: true,
autofixEnabled: false,
iteration: 1,
maxIterations: 5,
failureThreshold: 3,
trace: 'trace-openai',
llm_provider: 'openai',
llm_confidence: 0.87,
llm_analysis_run: true,
},
});

assert.equal(github.actions.length, 1);
assert.equal(github.actions[0].type, 'update');
assert.match(github.actions[0].body, /### 🧠 Task Analysis/);
assert.match(github.actions[0].body, /OpenAI \(fallback\)/);
assert.match(github.actions[0].body, /Primary provider.*was unavailable/);
});

test('updateKeepaliveLoopSummary shows regex fallback warning', async () => {
const existingState = formatStateComment({
trace: 'trace-regex',
iteration: 1,
max_iterations: 5,
failure_threshold: 3,
});
const github = buildGithubStub({
comments: [{ id: 79, body: existingState, html_url: 'https://example.com/79' }],
});
await updateKeepaliveLoopSummary({
github,
context: buildContext(123),
core: buildCore(),
inputs: {
prNumber: 123,
action: 'run',
runResult: 'success',
gateConclusion: 'success',
tasksTotal: 4,
tasksUnchecked: 2,
keepaliveEnabled: true,
autofixEnabled: false,
iteration: 1,
maxIterations: 5,
failureThreshold: 3,
trace: 'trace-regex',
llm_provider: 'regex-fallback',
llm_confidence: 0.7,
llm_analysis_run: true,
},
});

assert.equal(github.actions.length, 1);
assert.equal(github.actions[0].type, 'update');
assert.match(github.actions[0].body, /### 🧠 Task Analysis/);
assert.match(github.actions[0].body, /Regex \(fallback\)/);
assert.match(github.actions[0].body, /Primary provider.*was unavailable/);
});
81 changes: 74 additions & 7 deletions .github/scripts/keepalive_loop.js
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,11 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
const agentSummary = normalise(inputs.agent_summary ?? inputs.agentSummary ?? inputs.codex_summary ?? inputs.codexSummary);
const runUrl = normalise(inputs.run_url ?? inputs.runUrl);

// LLM task analysis details
const llmProvider = normalise(inputs.llm_provider ?? inputs.llmProvider);
const llmConfidence = toNumber(inputs.llm_confidence ?? inputs.llmConfidence, 0);
const llmAnalysisRun = toBool(inputs.llm_analysis_run ?? inputs.llmAnalysisRun, false);

const { state: previousState, commentId } = await loadKeepaliveState({
github,
context,
Expand Down Expand Up @@ -1211,6 +1216,29 @@ async function updateKeepaliveLoopSummary({ github, context, core, inputs }) {
}
}

// LLM analysis details - show which provider was used for task completion detection
if (llmAnalysisRun && llmProvider) {
const providerIcon = llmProvider === 'github-models' ? '✅' :
llmProvider === 'openai' ? '⚠️' :
llmProvider === 'regex-fallback' ? '🔶' : 'ℹ️';
const providerLabel = llmProvider === 'github-models' ? 'GitHub Models (primary)' :
llmProvider === 'openai' ? 'OpenAI (fallback)' :
llmProvider === 'regex-fallback' ? 'Regex (fallback)' : llmProvider;
const confidencePercent = Math.round(llmConfidence * 100);
summaryLines.push(
'',
'### 🧠 Task Analysis',
`| Provider | ${providerIcon} ${providerLabel} |`,
`| Confidence | ${confidencePercent}% |`,
Comment on lines +1228 to +1232
Copy link

Copilot AI Jan 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The markdown table formatting is incomplete. Lines 1231-1232 create table rows without proper markdown table syntax (missing header separator and consistent column structure). The output will render as plain text rather than a table. Add proper table headers and separators, for example:

| Field | Value |
|-------|-------|
| Provider | ... |
| Confidence | ... |

Copilot uses AI. Check for mistakes.
);
if (llmProvider !== 'github-models') {
summaryLines.push(
'',
`> ⚠️ Primary provider (GitHub Models) was unavailable; used ${providerLabel} instead.`,
);
}
}

if (isTransientFailure) {
summaryLines.push(
'',
Expand Down Expand Up @@ -1682,12 +1710,13 @@ async function analyzeTaskCompletion({ github, context, prNumber, baseSha, headS
* @param {number} params.prNumber - PR number
* @param {string} params.baseSha - Base SHA (before agent work)
* @param {string} params.headSha - Head SHA (after agent work)
* @param {string[]} [params.llmCompletedTasks] - Tasks marked complete by LLM analysis
* @param {object} [params.core] - Optional core for logging
* @returns {Promise<{updated: boolean, tasksChecked: number, details: string}>}
*/
async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha, core }) {
async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha, llmCompletedTasks, core }) {
const log = (msg) => core?.info?.(msg) || console.log(msg);

// Get current PR body
let pr;
try {
Expand All @@ -1710,13 +1739,39 @@ async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha,
return { updated: false, tasksChecked: 0, details: 'No tasks found in PR body' };
}

// Analyze what tasks may have been completed
// Build high-confidence matches from multiple sources
let highConfidence = [];

// Source 1: LLM analysis (highest priority if available)
if (llmCompletedTasks && Array.isArray(llmCompletedTasks) && llmCompletedTasks.length > 0) {
log(`LLM analysis found ${llmCompletedTasks.length} completed task(s)`);
for (const task of llmCompletedTasks) {
highConfidence.push({
task,
reason: 'LLM session analysis',
confidence: 'high',
source: 'llm',
});
}
}

// Source 2: Commit/file analysis (fallback or supplementary)
const analysis = await analyzeTaskCompletion({
github, context, prNumber, baseSha, headSha, taskText, core
});

// Only auto-check high-confidence matches
const highConfidence = analysis.matches.filter(m => m.confidence === 'high');
// Add commit-based matches that aren't already covered by LLM
const llmTasksLower = new Set((llmCompletedTasks || []).map(t => t.toLowerCase()));
const commitMatches = analysis.matches
.filter(m => m.confidence === 'high')
.filter(m => !llmTasksLower.has(m.task.toLowerCase()));

if (commitMatches.length > 0) {
log(`Commit analysis found ${commitMatches.length} additional task(s)`);
for (const match of commitMatches) {
highConfidence.push({ ...match, source: 'commit' });
}
}

if (highConfidence.length === 0) {
log('No high-confidence task matches to auto-check');
Expand Down Expand Up @@ -1766,14 +1821,26 @@ async function autoReconcileTasks({ github, context, prNumber, baseSha, headSha,
return {
updated: false,
tasksChecked: 0,
details: `Failed to update PR: ${error.message}`
details: `Failed to update PR: ${error.message}`,
sources: { llm: 0, commit: 0 },
};
}

// Count matches by source for reporting
const llmCount = highConfidence.filter(m => m.source === 'llm').length;
const commitCount = highConfidence.filter(m => m.source === 'commit').length;

// Build detailed description
const sourceDesc = [];
if (llmCount > 0) sourceDesc.push(`${llmCount} from LLM analysis`);
if (commitCount > 0) sourceDesc.push(`${commitCount} from commit analysis`);
const sourceInfo = sourceDesc.length > 0 ? ` (${sourceDesc.join(', ')})` : '';

return {
updated: true,
tasksChecked: checkedCount,
details: `Auto-checked ${checkedCount} task(s): ${highConfidence.map(m => m.task.slice(0, 30) + '...').join(', ')}`
details: `Auto-checked ${checkedCount} task(s)${sourceInfo}: ${highConfidence.map(m => m.task.slice(0, 30) + '...').join(', ')}`,
sources: { llm: llmCount, commit: commitCount },
};
}

Expand Down
35 changes: 32 additions & 3 deletions .github/workflows/agents-keepalive-loop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,26 @@ jobs:
const beforeSha = '${{ needs.evaluate.outputs.head_sha }}'; // SHA before agent ran
const headSha = '${{ needs.run-codex.outputs.commit-sha }}'; // SHA after agent ran

// LLM analysis metadata
const llmProvider = '${{ needs.run-codex.outputs.llm-provider || '' }}';
const llmConfidence = '${{ needs.run-codex.outputs.llm-confidence || '' }}';
const llmAnalysisRun = '${{ needs.run-codex.outputs.llm-analysis-run }}' === 'true';

// Parse LLM completed tasks if available
let llmCompletedTasks = [];
const llmTasksJson = '${{ needs.run-codex.outputs.llm-completed-tasks || '[]' }}';
try {
llmCompletedTasks = JSON.parse(llmTasksJson);
if (llmCompletedTasks.length > 0) {
core.info(`LLM analysis found ${llmCompletedTasks.length} completed task(s)`);
if (llmProvider) {
core.info(`LLM provider: ${llmProvider} (confidence: ${llmConfidence})`);
}
}
} catch (e) {
core.debug(`Failed to parse LLM tasks: ${e.message}`);
}

if (!prNumber || !beforeSha || !headSha) {
core.info('Missing required inputs for task reconciliation');
return;
Expand All @@ -371,19 +391,24 @@ jobs:
core.info(`Comparing ${beforeSha.slice(0, 7)} → ${headSha.slice(0, 7)}`);

const result = await autoReconcileTasks({
github, context, prNumber, baseSha: beforeSha, headSha, core
github, context, prNumber, baseSha: beforeSha, headSha, llmCompletedTasks, core
});

if (result.updated) {
core.info(`✅ ${result.details}`);
core.notice(`Auto-checked ${result.tasksChecked} task(s) based on commit analysis`);
core.notice(`Auto-checked ${result.tasksChecked} task(s) based on analysis`);
} else {
core.info(`ℹ️ ${result.details}`);
}

// Output for step summary
// Output for step summary and downstream reporting
core.setOutput('tasks_checked', result.tasksChecked);
core.setOutput('reconciliation_details', result.details);
core.setOutput('llm_provider', llmProvider);
core.setOutput('llm_confidence', llmConfidence);
core.setOutput('llm_analysis_run', llmAnalysisRun);
core.setOutput('llm_tasks_count', llmCompletedTasks.length);
core.setOutput('commit_tasks_count', result.sources?.commit || 0);

- name: Update summary comment
uses: actions/github-script@v7
Expand Down Expand Up @@ -415,5 +440,9 @@ jobs:
agent_commit_sha: '${{ needs.run-codex.outputs.commit-sha }}',
agent_files_changed: '${{ needs.run-codex.outputs.files-changed }}',
agent_summary: process.env.CODEX_SUMMARY || '',
// LLM analysis details for task completion reporting
llm_provider: '${{ needs.run-codex.outputs.llm-provider || '' }}',
llm_confidence: '${{ needs.run-codex.outputs.llm-confidence || '' }}',
llm_analysis_run: '${{ needs.run-codex.outputs.llm-analysis-run }}' === 'true',
};
await updateKeepaliveLoopSummary({ github, context, core, inputs });
Loading
Loading