Skip to content
Closed
177 changes: 177 additions & 0 deletions .github/workflows/agents-autofix-loop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ jobs:
stop_reason: ${{ steps.evaluate.outputs.stop_reason }}
attempts: ${{ steps.evaluate.outputs.attempts }}
max_attempts: ${{ steps.evaluate.outputs.max_attempts }}
trigger_reason: ${{ steps.evaluate.outputs.trigger_reason }}
trigger_job: ${{ steps.evaluate.outputs.trigger_job }}
trigger_step: ${{ steps.evaluate.outputs.trigger_step }}
gate_conclusion: ${{ steps.evaluate.outputs.gate_conclusion }}
gate_run_id: ${{ steps.evaluate.outputs.gate_run_id }}
security_blocked: ${{ steps.security_gate.outputs.blocked }}
security_reason: ${{ steps.security_gate.outputs.reason }}
steps:
Expand Down Expand Up @@ -110,6 +115,11 @@ jobs:
stop_reason: '',
attempts: '0',
max_attempts: '3',
trigger_reason: 'unknown',
trigger_job: '',
trigger_step: '',
gate_conclusion: String(run?.conclusion || run?.status || ''),
gate_run_id: String(run?.id || ''),
};

const stop = (reason, stopReason = '') => {
Expand Down Expand Up @@ -202,6 +212,8 @@ jobs:
outputs.max_attempts = String(maxAttempts);

const failingJobs = [];
let triggerJob = null;
let triggerStep = null;
for (const job of jobs) {
const conclusion = (job.conclusion || job.status || '').toLowerCase();
if (!conclusion || ['success', 'skipped'].includes(conclusion)) {
Expand All @@ -223,8 +235,36 @@ jobs:
detailLines.push(` - steps: ${failingSteps.join('; ')}`);
}
failingJobs.push(detailLines.join('\n'));

if (!triggerJob) {
triggerJob = job;
const failingStep = Array.isArray(job.steps)
? job.steps.find((step) => {
const stepConclusion = (step.conclusion || step.status || '').toLowerCase();
return stepConclusion && !['success', 'skipped'].includes(stepConclusion);
})
: null;
triggerStep = failingStep || null;
}
}

const inferTriggerReason = (job, step) => {
const text = [job?.name, step?.name]
.filter(Boolean)
.map((value) => String(value).toLowerCase())
.join(' ');

if (!text) return 'unknown';
if (text.includes('mypy')) return 'mypy';
if (text.includes('lint') || text.includes('flake8') || text.includes('ruff')) return 'lint';
if (text.includes('pytest') || text.includes('test')) return 'pytest';
return 'unknown';
};

outputs.trigger_reason = inferTriggerReason(triggerJob, triggerStep);
outputs.trigger_job = triggerJob?.name || triggerJob?.id || '';
outputs.trigger_step = triggerStep?.name || '';

const appendixLines = [
`Gate run: ${run.html_url || run.id}`,
`Conclusion: ${run.conclusion || run.status || 'unknown'}`,
Expand Down Expand Up @@ -319,3 +359,140 @@ jobs:
issue_number: prNumber,
body,
});

metrics:
name: Record autofix metrics
needs:
- prepare
- autofix
if: always()
runs-on: ubuntu-latest
environment: agent-standard
steps:
- name: Collect metrics
id: collect
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const prNumber = Number('${{ needs.prepare.outputs.pr_number || 0 }}') || 0;
const attemptNumber = Number('${{ needs.prepare.outputs.attempts || 0 }}') || 0;
const attemptLimit = Number('${{ needs.prepare.outputs.max_attempts || 0 }}') || 0;
const headShaBefore = '${{ needs.prepare.outputs.head_sha }}';
const gateConclusionBefore = '${{ needs.prepare.outputs.gate_conclusion }}' || (context.payload.workflow_run?.conclusion || '');
const gateRunId = '${{ needs.prepare.outputs.gate_run_id }}' || String(context.payload.workflow_run?.id || '');
const triggerReason = '${{ needs.prepare.outputs.trigger_reason || 'unknown' }}';
const triggerJob = '${{ needs.prepare.outputs.trigger_job }}';
const triggerStep = '${{ needs.prepare.outputs.trigger_step }}';
const stopReason = '${{ needs.prepare.outputs.stop_reason }}';
const autofixResult = '${{ needs.autofix.result }}';

const { owner, repo } = context.repo;
let fixApplied = false;
let headShaAfter = headShaBefore;
let gateResultAfter = gateConclusionBefore || 'unknown';

if (prNumber) {
try {
const { data: pr } = await github.rest.pulls.get({
owner,
repo,
pull_number: prNumber,
});
headShaAfter = pr.head?.sha || headShaAfter;
fixApplied = Boolean(headShaBefore && headShaAfter && headShaBefore !== headShaAfter);

const gateWorkflow = 'pr-00-gate.yml';
const runs = await github.paginate(github.rest.actions.listWorkflowRuns, {
owner,
repo,
workflow_id: gateWorkflow,
head_sha: headShaAfter,
per_page: 20,
});
const latestGateRun = runs[0];
if (latestGateRun) {
gateResultAfter = latestGateRun.conclusion || latestGateRun.status || 'unknown';
} else {
gateResultAfter = 'not-found';
}
} catch (error) {
core.warning(`Failed to resolve PR or gate status: ${error.message}`);
}
}

const metrics = {
workflow_run_id: gateRunId,
pr_number: prNumber,
attempt_number: attemptNumber,
attempt_limit: attemptLimit,
trigger_reason: triggerReason || 'unknown',
trigger_job: triggerJob,
trigger_step: triggerStep,
fix_applied: fixApplied,
gate_result_after: gateResultAfter || 'unknown',
gate_conclusion_before: gateConclusionBefore || 'unknown',
stop_reason: stopReason || '',
autofix_result: autofixResult || 'unknown',
head_sha_before: headShaBefore,
head_sha_after: headShaAfter,
recorded_at: new Date().toISOString(),
};

core.setOutput('metrics_json', JSON.stringify(metrics));

- name: Write summary and artifact
env:
METRICS_JSON: ${{ steps.collect.outputs.metrics_json }}
run: |
set -euo pipefail
if [ -z "${METRICS_JSON:-}" ]; then
echo "No metrics JSON captured; skipping summary."
exit 0
fi

python - <<'PY'
import json
import os

metrics = json.loads(os.environ["METRICS_JSON"])
order = [
"pr_number",
"attempt_number",
"attempt_limit",
"trigger_reason",
"trigger_job",
"trigger_step",
"fix_applied",
"gate_conclusion_before",
"gate_result_after",
"autofix_result",
"stop_reason",
"workflow_run_id",
"head_sha_before",
"head_sha_after",
"recorded_at",
]

lines = ["## Autofix loop metrics", ""] + ["| Field | Value |", "| --- | --- |"]
for key in order:
value = metrics.get(key, "")
lines.append(f"| {key} | `{value}` |")

summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if summary_path:
with open(summary_path, "a", encoding="utf-8") as fp:
fp.write("\n".join(lines) + "\n")

out_path = "autofix-metrics.ndjson"
with open(out_path, "a", encoding="utf-8") as fp:
fp.write(json.dumps(metrics) + "\n")
print(f"Wrote metrics to {out_path}")
PY

- name: Upload metrics artifact
uses: actions/upload-artifact@v4
with:
name: agents-autofix-metrics
path: autofix-metrics.ndjson
retention-days: 30
79 changes: 79 additions & 0 deletions .github/workflows/agents-keepalive-loop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,16 @@ jobs:
autofix_enabled: ${{ steps.evaluate.outputs.autofix_enabled }}
has_agent_label: ${{ steps.evaluate.outputs.has_agent_label }}
trace: ${{ steps.evaluate.outputs.trace }}
start_ts: ${{ steps.timestamps.outputs.start_ts }}
security_blocked: ${{ steps.security_gate.outputs.blocked }}
security_reason: ${{ steps.security_gate.outputs.reason }}
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Capture timestamps
id: timestamps
run: echo "start_ts=$(date -u +%s)" >> "$GITHUB_OUTPUT"
- name: Security gate - prompt injection guard
id: security_gate
uses: actions/github-script@v7
Expand Down Expand Up @@ -146,6 +150,81 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Emit keepalive metrics
id: keepalive-metrics
env:
PR_NUMBER: ${{ needs.evaluate.outputs.pr_number }}
ACTION: ${{ needs.evaluate.outputs.action }}
REASON: ${{ needs.evaluate.outputs.reason }}
GATE_CONCLUSION: ${{ needs.evaluate.outputs.gate_conclusion }}
ITERATION: ${{ needs.evaluate.outputs.iteration }}
MAX_ITERATIONS: ${{ needs.evaluate.outputs.max_iterations }}
TASKS_TOTAL: ${{ needs.evaluate.outputs.tasks_total }}
TASKS_UNCHECKED: ${{ needs.evaluate.outputs.tasks_unchecked }}
START_TS: ${{ needs.evaluate.outputs.start_ts }}
run: |
set -euo pipefail

now=$(date -u +%s)
if [[ "${START_TS:-}" =~ ^[0-9]+$ ]]; then
duration=$(( now - START_TS ))
if [ "$duration" -lt 0 ]; then duration=0; fi
else
duration=0
fi

tasks_total=${TASKS_TOTAL:-0}
tasks_unchecked=${TASKS_UNCHECKED:-0}
if ! [[ "$tasks_total" =~ ^-?[0-9]+$ ]]; then tasks_total=0; fi
if ! [[ "$tasks_unchecked" =~ ^-?[0-9]+$ ]]; then tasks_unchecked=0; fi
tasks_completed=$(( tasks_total - tasks_unchecked ))
if [ "$tasks_completed" -lt 0 ]; then tasks_completed=0; fi

metrics_json=$(jq -n \
--arg pr "${PR_NUMBER:-0}" \
--arg iteration "${ITERATION:-0}" \
--arg action "${ACTION:-}" \
--arg stop_reason "${REASON:-}" \
--arg gate_conclusion "${GATE_CONCLUSION:-}" \
--arg tasks_total "$tasks_total" \
--arg tasks_completed "$tasks_completed" \
--arg duration "$duration" \
'{
pr_number: ($pr | tonumber? // 0),
iteration_count: ($iteration | tonumber? // 0),
action: $action,
stop_reason: $stop_reason,
gate_conclusion: $gate_conclusion,
tasks_total: ($tasks_total | tonumber? // 0),
tasks_completed: ($tasks_completed | tonumber? // 0),
duration_seconds: ($duration | tonumber? // 0)
}')

{
echo '### Keepalive metrics'
echo ''
echo '| Field | Value |'
echo '| --- | --- |'
echo "| pr_number | $(echo "$metrics_json" | jq -r '.pr_number') |"
echo "| iteration_count | $(echo "$metrics_json" | jq -r '.iteration_count') |"
echo "| action | $(echo "$metrics_json" | jq -r '.action') |"
echo "| stop_reason | $(echo "$metrics_json" | jq -r '.stop_reason') |"
echo "| gate_conclusion | $(echo "$metrics_json" | jq -r '.gate_conclusion') |"
echo "| tasks_total | $(echo "$metrics_json" | jq -r '.tasks_total') |"
echo "| tasks_completed | $(echo "$metrics_json" | jq -r '.tasks_completed') |"
echo "| duration_seconds | $(echo "$metrics_json" | jq -r '.duration_seconds') |"
} >> "$GITHUB_STEP_SUMMARY"

echo "$metrics_json" >> keepalive-metrics.ndjson

- name: Upload keepalive metrics artifact
uses: actions/upload-artifact@v4
with:
name: keepalive-metrics
path: keepalive-metrics.ndjson
retention-days: 30
if-no-files-found: error

- name: Update summary comment
uses: actions/github-script@v7
with:
Expand Down
Loading