diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml index 3343ebfb5..109cc33a7 100644 --- a/.github/actions/setup-api-client/action.yml +++ b/.github/actions/setup-api-client/action.yml @@ -102,6 +102,22 @@ runs: if [ -d "node_modules/@octokit/rest" ]; then echo "✅ @octokit/rest already installed" else + # Snapshot vendored package metadata before npm install. + # npm may overwrite transitive deps (e.g. minimatch) that are + # committed as vendored packages with intentional version pins. + VENDORED_SNAPSHOT="" + if [ -f "node_modules/minimatch/package.json" ]; then + VENDORED_SNAPSHOT=$(mktemp -d) + for pkg_dir in node_modules/*/; do + if [ -f "${pkg_dir}package.json" ]; then + pkg_name=$(basename "$pkg_dir") + mkdir -p "${VENDORED_SNAPSHOT}/${pkg_name}" + cp "${pkg_dir}package.json" "${VENDORED_SNAPSHOT}/${pkg_name}/package.json" + fi + done + echo "📸 Snapshotted vendored package metadata" + fi + # Install with pinned versions for consistency # Capture stderr for debugging if the command fails npm_output=$(mktemp) @@ -122,6 +138,20 @@ runs: @octokit/plugin-paginate-rest@9.1.5 \ @octokit/auth-app@6.0.3 fi + + # Restore vendored package metadata that npm may have overwritten + if [ -n "${VENDORED_SNAPSHOT:-}" ] && [ -d "${VENDORED_SNAPSHOT}" ]; then + for pkg_backup in "${VENDORED_SNAPSHOT}"/*/; do + pkg_name=$(basename "$pkg_backup") + if [ -f "node_modules/${pkg_name}/package.json" ] && \ + [ -f "${pkg_backup}package.json" ]; then + cp "${pkg_backup}package.json" "node_modules/${pkg_name}/package.json" + fi + done + rm -rf "${VENDORED_SNAPSHOT}" + echo "📸 Restored vendored package metadata" + fi + echo "✅ @octokit dependencies installed" fi diff --git a/.github/scripts/__tests__/detect-changes.test.js b/.github/scripts/__tests__/detect-changes.test.js index bd7b8c119..09fe2604c 100644 --- a/.github/scripts/__tests__/detect-changes.test.js +++ b/.github/scripts/__tests__/detect-changes.test.js @@ -82,3 +82,65 @@ test('detectChanges fetches files via callback', async () => { assert.equal(result.outputs.run_core, 'true'); assert.equal(result.outputs.workflow_changed, 'false'); }); + +test('detectChanges falls back to conservative defaults when listFiles is inaccessible', async () => { + const warnings = []; + const result = await detectChanges({ + core: { + warning(message) { + warnings.push(String(message)); + }, + setOutput() {}, + }, + context: { + eventName: 'pull_request', + repo: { owner: 'octo', repo: 'demo' }, + payload: { pull_request: { number: 42 } }, + }, + github: { + rest: { + pulls: { + listFiles: async () => ({ data: [] }), + }, + }, + paginate: { + iterator: () => { + const error = new Error('Resource not accessible by integration'); + error.status = 403; + throw error; + }, + }, + }, + }); + + assert.equal(result.outputs.doc_only, 'false'); + assert.equal(result.outputs.run_core, 'true'); + assert.equal(result.outputs.reason, 'rate_limited'); + assert.equal(result.outputs.docker_changed, 'false'); + assert.equal(result.outputs.workflow_changed, 'true'); + assert.equal(warnings.length, 1); + assert.match(warnings[0], /Unable to determine changed files via API/); +}); + +test('detectChanges supports clients without paginate.iterator', async () => { + const result = await detectChanges({ + context: { + eventName: 'pull_request', + repo: { owner: 'octo', repo: 'demo' }, + payload: { pull_request: { number: 1 } }, + }, + github: { + rest: { + pulls: { + listFiles: async () => ({ data: [] }), + }, + }, + paginate: async () => [{ filename: 'docs/README.md' }], + }, + }); + + assert.equal(result.outputs.doc_only, 'true'); + assert.equal(result.outputs.run_core, 'false'); + assert.equal(result.outputs.reason, 'docs_only'); + assert.equal(result.outputs.workflow_changed, 'false'); +}); diff --git a/.github/scripts/detect-changes.js b/.github/scripts/detect-changes.js index 45c31c4f5..64d34cb68 100644 --- a/.github/scripts/detect-changes.js +++ b/.github/scripts/detect-changes.js @@ -168,6 +168,28 @@ function isRateLimitError(error) { return message.includes('rate limit') || message.includes('ratelimit'); } +function isNonFatalListFilesError(error) { + if (!error) { + return false; + } + if (isRateLimitError(error)) { + return true; + } + const status = error.status || error?.response?.status; + if ([401, 403, 404, 422].includes(status)) { + return true; + } + const message = String(error.message || error?.response?.data?.message || '').toLowerCase(); + return ( + message.includes('resource not accessible by integration') || + message.includes('insufficient permission') || + message.includes('requires higher permissions') || + message.includes('not found') || + message.includes('unprocessable') || + message.includes('validation failed') + ); +} + async function listChangedFiles({ github, context }) { const pull = context?.payload?.pull_request; const number = pull?.number; @@ -175,25 +197,42 @@ async function listChangedFiles({ github, context }) { return []; } try { - const iterator = github.paginate.iterator(github.rest.pulls.listFiles, { + const files = []; + const params = { owner: context.repo.owner, repo: context.repo.repo, pull_number: number, per_page: 100, - }); - const files = []; - for await (const page of iterator) { - if (Array.isArray(page.data)) { - for (const item of page.data) { + }; + if (typeof github?.paginate?.iterator === 'function') { + const iterator = github.paginate.iterator(github.rest.pulls.listFiles, params); + for await (const page of iterator) { + if (Array.isArray(page.data)) { + for (const item of page.data) { + if (item && typeof item.filename === 'string') { + files.push(item.filename); + } + } + } + } + return files; + } + + if (typeof github?.paginate === 'function') { + const items = await github.paginate(github.rest.pulls.listFiles, params); + if (Array.isArray(items)) { + for (const item of items) { if (item && typeof item.filename === 'string') { files.push(item.filename); } } } + return files; } - return files; + + throw new Error('GitHub paginate API is unavailable'); } catch (error) { - if (isRateLimitError(error)) { + if (isNonFatalListFilesError(error)) { return null; } throw error; @@ -261,7 +300,7 @@ async function detectChanges({ github, context, core, files, fetchFiles } = {}) workflow_changed: 'true', }; const warn = core?.warning ? core.warning.bind(core) : console.warn.bind(console); - warn('Rate limit reached while determining changed files; assuming code changes (but not docker).'); + warn('Unable to determine changed files via API; assuming code changes (but not docker).'); if (core) { for (const [key, value] of Object.entries(outputs)) { core.setOutput(key, value); diff --git a/.github/workflows/agents-72-codex-belt-worker.yml b/.github/workflows/agents-72-codex-belt-worker.yml index ec87d6d2f..15f8271f4 100644 --- a/.github/workflows/agents-72-codex-belt-worker.yml +++ b/.github/workflows/agents-72-codex-belt-worker.yml @@ -903,9 +903,12 @@ jobs: gh_output = os.environ.get('GITHUB_OUTPUT') if gh_output: + task = start_info['task'] or {} + task_title = (task.get('title') or '').replace('\r', ' ').replace('\n', ' ') with open(gh_output, 'a', encoding='utf-8') as handle: - handle.write(f"task_id={start_info['task']['id'] if start_info['task'] else ''}\n") - handle.write(f"task_status={start_info['task']['current_status'] if start_info['task'] else ''}\n") + handle.write(f"task_id={task.get('id', '')}\n") + handle.write(f"task_title={task_title}\n") + handle.write(f"task_status={task.get('current_status', '')}\n") handle.write(f"ledger_changed={'true' if changed else 'false'}\n") handle.write(f"ledger_created={'true' if start_info['created'] else 'false'}\n") handle.write(f"ledger_base_aligned={'true' if base_aligned else 'false'}\n") @@ -1187,12 +1190,28 @@ jobs: const prNumber = Number('${{ steps.pr.outputs.number }}'); const branch = ('${{ steps.ctx.outputs.branch }}' || '').trim() || '(unknown branch)'; const dryRun = '${{ steps.mode.outputs.dry_run }}' === 'true'; + const taskId = ('${{ steps.ledger_start.outputs.task_id }}' || '').trim(); + const taskTitle = ('${{ steps.ledger_start.outputs.task_title }}' || '').trim(); const { owner, repo } = context.repo; const marker = ''; const summary = dryRun ? `Codex Worker activated for branch \`${branch}\` (dry run preview).` : `Codex Worker activated for branch \`${branch}\`.`; - const body = `${marker}\n${summary}\n\n@codex start\n\nAutomated belt worker prepared this PR. Please continue implementing the requested changes.`; + // Direct Codex to focus on the single next ledger task for higher + // first-commit success probability. Full issue context is in the + // PR body; this comment narrows the immediate scope. + let taskDirective = ''; + if (taskId && taskTitle) { + taskDirective = [ + '', + `**Focus on this task first:** \`${taskId}\` — ${taskTitle}`, + '', + 'Implement **only** this task in your first commit.', + 'Ensure the code compiles and existing tests pass before moving on.', + 'The keepalive loop will assign subsequent tasks after this one is complete.', + ].join('\n'); + } + const body = `${marker}\n${summary}\n\n@codex start${taskDirective}`; try { const comments = await paginateWithRetry( diff --git a/.github/workflows/agents-autofix-loop.yml b/.github/workflows/agents-autofix-loop.yml index f88518eb5..a989768c3 100644 --- a/.github/workflows/agents-autofix-loop.yml +++ b/.github/workflows/agents-autofix-loop.yml @@ -162,7 +162,7 @@ jobs: appendix: '', stop_reason: '', attempts: '0', - max_attempts: '3', + max_attempts: '2', trigger_reason: 'unknown', trigger_job: '', trigger_step: '', @@ -287,7 +287,7 @@ jobs: // Reduce attempts for auto-escalated PRs (they weren't agent-initiated) const isEscalated = labels.includes('autofix:escalated'); const maxAttempts = isEscalated - ? Math.min(2, Number(outputs.max_attempts)) + ? 1 : Number(outputs.max_attempts); const previousRuns = await paginateWithRetry( github, diff --git a/.github/workflows/agents-pr-meta-v4.yml b/.github/workflows/agents-pr-meta-v4.yml index 117b658f2..a7b4a7390 100644 --- a/.github/workflows/agents-pr-meta-v4.yml +++ b/.github/workflows/agents-pr-meta-v4.yml @@ -37,9 +37,11 @@ concurrency: && github.event.comment && github.event.comment.id && format('agents-pr-meta-comment-{0}', github.event.comment.id) + || github.event_name == 'pull_request' + && format('agents-pr-meta-pr-{0}', github.event.pull_request.number) || format('agents-pr-meta-run-{0}', github.run_id) }} - cancel-in-progress: false + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: comment_event_context: diff --git a/.github/workflows/reusable-18-autofix.yml b/.github/workflows/reusable-18-autofix.yml index 564e38157..b83a1f0f8 100644 --- a/.github/workflows/reusable-18-autofix.yml +++ b/.github/workflows/reusable-18-autofix.yml @@ -794,6 +794,8 @@ jobs: git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git add -A + # Unstage vendored node_modules that may have been modified by npm install + git reset HEAD -- .github/scripts/node_modules node_modules .workflows-lib/.github/scripts/node_modules 2>/dev/null || true git commit -m "${AUTOFIX_COMMIT_PREFIX} formatting/lint" echo "AUTOFIX_COMMIT_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV" @@ -865,6 +867,8 @@ jobs: git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git add -A + # Unstage vendored node_modules that may have been modified by npm install + git reset HEAD -- .github/scripts/node_modules node_modules .workflows-lib/.github/scripts/node_modules 2>/dev/null || true git commit -m "${AUTOFIX_COMMIT_PREFIX} formatting/lint (patch)" || true git format-patch -1 --stdout > autofix.patch diff --git a/.github/workflows/reusable-agents-issue-bridge.yml b/.github/workflows/reusable-agents-issue-bridge.yml index d193a16ce..16dea6053 100644 --- a/.github/workflows/reusable-agents-issue-bridge.yml +++ b/.github/workflows/reusable-agents-issue-bridge.yml @@ -580,7 +580,9 @@ jobs: git checkout -B "$HEAD_BRANCH" "origin/${BASE_BRANCH}" mkdir -p agents printf "\n" "$AGENT" "$ISSUE_NUM" > "agents/${AGENT}-${ISSUE_NUM}.md" - git add -A || true + # Stage only the intended bootstrap file — 'git add -A' would capture + # vendored node_modules changes made by setup-api-client npm install. + git add "agents/${AGENT}-${ISSUE_NUM}.md" || true if ! git diff --cached --quiet; then git commit -m "chore(${AGENT}): bootstrap PR for issue #${ISSUE_NUM}" else diff --git a/scripts/langchain/capability_check.py b/scripts/langchain/capability_check.py index 0ff398491..8ef5db729 100755 --- a/scripts/langchain/capability_check.py +++ b/scripts/langchain/capability_check.py @@ -161,8 +161,8 @@ def _is_multi_action_task(task: str) -> bool: def _requires_admin_access(task: str) -> bool: patterns = [ r"\bgithub\s+secrets?\b", - r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\s+secrets?\b", - r"\bsecrets?\s+(?:management|configuration|rotation)\b", + r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\b.{0,30}\bsecrets?\b", + r"\bsecrets?\b.{0,30}\b(?:management|configuration|rotation)\b", r"\brepository\s+settings\b", r"\brepo\s+settings\b", r"\bbranch\s+protection\b", diff --git a/scripts/langchain/verdict_policy.py b/scripts/langchain/verdict_policy.py index 809bdcadb..86d422350 100644 --- a/scripts/langchain/verdict_policy.py +++ b/scripts/langchain/verdict_policy.py @@ -16,7 +16,7 @@ "fail": 3, } -CONCERNS_NEEDS_HUMAN_THRESHOLD = 0.50 +CONCERNS_NEEDS_HUMAN_THRESHOLD = 0.85 @dataclass(frozen=True) @@ -193,11 +193,11 @@ def evaluate_verdict_policy( needs_human_reason = "" if split_verdict: confidence_value = concerns_confidence or 0.0 - if confidence_value < CONCERNS_NEEDS_HUMAN_THRESHOLD: + if confidence_value >= CONCERNS_NEEDS_HUMAN_THRESHOLD: needs_human = True needs_human_reason = ( - "Provider verdicts split with low-confidence concerns; " - f"dissenting confidence {confidence_value:.2f} < " + "Provider verdicts split with high-confidence concerns; " + f"dissenting confidence {confidence_value:.2f} >= " f"{CONCERNS_NEEDS_HUMAN_THRESHOLD:.2f}. " "Requires human review before starting another automated follow-up." ) diff --git a/scripts/ledger_migrate_base.py b/scripts/ledger_migrate_base.py index 2a551ecac..9f87285b0 100644 --- a/scripts/ledger_migrate_base.py +++ b/scripts/ledger_migrate_base.py @@ -174,14 +174,27 @@ def main(argv: Iterable[str] | None = None) -> int: mismatches: list[LedgerResult] = [] updated: list[LedgerResult] = [] + skipped: list[tuple[Path, str]] = [] for ledger_path in ledgers: - result = migrate_ledger(ledger_path, default_branch, check=args.check) + try: + result = migrate_ledger(ledger_path, default_branch, check=args.check) + except (MigrationError, yaml.YAMLError) as exc: + # One corrupt ledger must not block processing of the remaining files. + reason = str(exc).replace("\n", " ").replace("\r", " ") + print(f"::warning::Skipping {ledger_path.name}: {reason}") + skipped.append((ledger_path, reason)) + continue if args.check: if result.previous != default_branch: mismatches.append(result) elif result.changed: updated.append(result) + if skipped: + print(f"Skipped {len(skipped)} corrupt ledger(s):") + for path, reason in skipped: + print(f" - {path.name}: {reason}") + if args.check: if mismatches: print("Found ledgers with stale base values:") diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml index 3343ebfb5..109cc33a7 100644 --- a/templates/consumer-repo/.github/actions/setup-api-client/action.yml +++ b/templates/consumer-repo/.github/actions/setup-api-client/action.yml @@ -102,6 +102,22 @@ runs: if [ -d "node_modules/@octokit/rest" ]; then echo "✅ @octokit/rest already installed" else + # Snapshot vendored package metadata before npm install. + # npm may overwrite transitive deps (e.g. minimatch) that are + # committed as vendored packages with intentional version pins. + VENDORED_SNAPSHOT="" + if [ -f "node_modules/minimatch/package.json" ]; then + VENDORED_SNAPSHOT=$(mktemp -d) + for pkg_dir in node_modules/*/; do + if [ -f "${pkg_dir}package.json" ]; then + pkg_name=$(basename "$pkg_dir") + mkdir -p "${VENDORED_SNAPSHOT}/${pkg_name}" + cp "${pkg_dir}package.json" "${VENDORED_SNAPSHOT}/${pkg_name}/package.json" + fi + done + echo "📸 Snapshotted vendored package metadata" + fi + # Install with pinned versions for consistency # Capture stderr for debugging if the command fails npm_output=$(mktemp) @@ -122,6 +138,20 @@ runs: @octokit/plugin-paginate-rest@9.1.5 \ @octokit/auth-app@6.0.3 fi + + # Restore vendored package metadata that npm may have overwritten + if [ -n "${VENDORED_SNAPSHOT:-}" ] && [ -d "${VENDORED_SNAPSHOT}" ]; then + for pkg_backup in "${VENDORED_SNAPSHOT}"/*/; do + pkg_name=$(basename "$pkg_backup") + if [ -f "node_modules/${pkg_name}/package.json" ] && \ + [ -f "${pkg_backup}package.json" ]; then + cp "${pkg_backup}package.json" "node_modules/${pkg_name}/package.json" + fi + done + rm -rf "${VENDORED_SNAPSHOT}" + echo "📸 Restored vendored package metadata" + fi + echo "✅ @octokit dependencies installed" fi diff --git a/templates/consumer-repo/.github/scripts/detect-changes.js b/templates/consumer-repo/.github/scripts/detect-changes.js index 45c31c4f5..64d34cb68 100644 --- a/templates/consumer-repo/.github/scripts/detect-changes.js +++ b/templates/consumer-repo/.github/scripts/detect-changes.js @@ -168,6 +168,28 @@ function isRateLimitError(error) { return message.includes('rate limit') || message.includes('ratelimit'); } +function isNonFatalListFilesError(error) { + if (!error) { + return false; + } + if (isRateLimitError(error)) { + return true; + } + const status = error.status || error?.response?.status; + if ([401, 403, 404, 422].includes(status)) { + return true; + } + const message = String(error.message || error?.response?.data?.message || '').toLowerCase(); + return ( + message.includes('resource not accessible by integration') || + message.includes('insufficient permission') || + message.includes('requires higher permissions') || + message.includes('not found') || + message.includes('unprocessable') || + message.includes('validation failed') + ); +} + async function listChangedFiles({ github, context }) { const pull = context?.payload?.pull_request; const number = pull?.number; @@ -175,25 +197,42 @@ async function listChangedFiles({ github, context }) { return []; } try { - const iterator = github.paginate.iterator(github.rest.pulls.listFiles, { + const files = []; + const params = { owner: context.repo.owner, repo: context.repo.repo, pull_number: number, per_page: 100, - }); - const files = []; - for await (const page of iterator) { - if (Array.isArray(page.data)) { - for (const item of page.data) { + }; + if (typeof github?.paginate?.iterator === 'function') { + const iterator = github.paginate.iterator(github.rest.pulls.listFiles, params); + for await (const page of iterator) { + if (Array.isArray(page.data)) { + for (const item of page.data) { + if (item && typeof item.filename === 'string') { + files.push(item.filename); + } + } + } + } + return files; + } + + if (typeof github?.paginate === 'function') { + const items = await github.paginate(github.rest.pulls.listFiles, params); + if (Array.isArray(items)) { + for (const item of items) { if (item && typeof item.filename === 'string') { files.push(item.filename); } } } + return files; } - return files; + + throw new Error('GitHub paginate API is unavailable'); } catch (error) { - if (isRateLimitError(error)) { + if (isNonFatalListFilesError(error)) { return null; } throw error; @@ -261,7 +300,7 @@ async function detectChanges({ github, context, core, files, fetchFiles } = {}) workflow_changed: 'true', }; const warn = core?.warning ? core.warning.bind(core) : console.warn.bind(console); - warn('Rate limit reached while determining changed files; assuming code changes (but not docker).'); + warn('Unable to determine changed files via API; assuming code changes (but not docker).'); if (core) { for (const [key, value] of Object.entries(outputs)) { core.setOutput(key, value); diff --git a/templates/consumer-repo/.github/workflows/agents-72-codex-belt-worker.yml b/templates/consumer-repo/.github/workflows/agents-72-codex-belt-worker.yml index 14e6f2bfb..ab0801413 100644 --- a/templates/consumer-repo/.github/workflows/agents-72-codex-belt-worker.yml +++ b/templates/consumer-repo/.github/workflows/agents-72-codex-belt-worker.yml @@ -903,9 +903,12 @@ jobs: gh_output = os.environ.get('GITHUB_OUTPUT') if gh_output: + task = start_info['task'] or {} + task_title = (task.get('title') or '').replace('\r', ' ').replace('\n', ' ') with open(gh_output, 'a', encoding='utf-8') as handle: - handle.write(f"task_id={start_info['task']['id'] if start_info['task'] else ''}\n") - handle.write(f"task_status={start_info['task']['current_status'] if start_info['task'] else ''}\n") + handle.write(f"task_id={task.get('id', '')}\n") + handle.write(f"task_title={task_title}\n") + handle.write(f"task_status={task.get('current_status', '')}\n") handle.write(f"ledger_changed={'true' if changed else 'false'}\n") handle.write(f"ledger_created={'true' if start_info['created'] else 'false'}\n") handle.write(f"ledger_base_aligned={'true' if base_aligned else 'false'}\n") @@ -1187,12 +1190,28 @@ jobs: const prNumber = Number('${{ steps.pr.outputs.number }}'); const branch = ('${{ steps.ctx.outputs.branch }}' || '').trim() || '(unknown branch)'; const dryRun = '${{ steps.mode.outputs.dry_run }}' === 'true'; + const taskId = ('${{ steps.ledger_start.outputs.task_id }}' || '').trim(); + const taskTitle = ('${{ steps.ledger_start.outputs.task_title }}' || '').trim(); const { owner, repo } = context.repo; const marker = ''; const summary = dryRun ? `Codex Worker activated for branch \`${branch}\` (dry run preview).` : `Codex Worker activated for branch \`${branch}\`.`; - const body = `${marker}\n${summary}\n\n@codex start\n\nAutomated belt worker prepared this PR. Please continue implementing the requested changes.`; + // Direct Codex to focus on the single next ledger task for higher + // first-commit success probability. Full issue context is in the + // PR body; this comment narrows the immediate scope. + let taskDirective = ''; + if (taskId && taskTitle) { + taskDirective = [ + '', + `**Focus on this task first:** \`${taskId}\` — ${taskTitle}`, + '', + 'Implement **only** this task in your first commit.', + 'Ensure the code compiles and existing tests pass before moving on.', + 'The keepalive loop will assign subsequent tasks after this one is complete.', + ].join('\n'); + } + const body = `${marker}\n${summary}\n\n@codex start${taskDirective}`; try { const comments = await paginateWithRetry( diff --git a/templates/consumer-repo/.github/workflows/agents-autofix-loop.yml b/templates/consumer-repo/.github/workflows/agents-autofix-loop.yml index 677da056a..902cdf831 100644 --- a/templates/consumer-repo/.github/workflows/agents-autofix-loop.yml +++ b/templates/consumer-repo/.github/workflows/agents-autofix-loop.yml @@ -155,7 +155,7 @@ jobs: appendix: '', stop_reason: '', attempts: '0', - max_attempts: '3', + max_attempts: '2', trigger_reason: 'unknown', trigger_job: '', trigger_step: '', @@ -280,7 +280,7 @@ jobs: // Reduce attempts for auto-escalated PRs (they weren't agent-initiated) const isEscalated = labels.includes('autofix:escalated'); const maxAttempts = isEscalated - ? Math.min(2, Number(outputs.max_attempts)) + ? 1 : Number(outputs.max_attempts); const previousRuns = await paginateWithRetry( github, diff --git a/templates/consumer-repo/.github/workflows/agents-pr-meta.yml b/templates/consumer-repo/.github/workflows/agents-pr-meta.yml index f07a33ee7..24a480544 100644 --- a/templates/consumer-repo/.github/workflows/agents-pr-meta.yml +++ b/templates/consumer-repo/.github/workflows/agents-pr-meta.yml @@ -47,8 +47,10 @@ concurrency: group: >- ${{ github.event_name == 'issue_comment' && format('agents-pr-meta-comment-{0}', github.event.comment.id) || + github.event_name == 'pull_request' && + format('agents-pr-meta-pr-{0}', github.event.pull_request.number) || format('agents-pr-meta-run-{0}', github.run_id) }} - cancel-in-progress: false + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: # Resolve PR context for issue_comment events diff --git a/templates/consumer-repo/scripts/langchain/capability_check.py b/templates/consumer-repo/scripts/langchain/capability_check.py index b53886821..0bd8ab361 100755 --- a/templates/consumer-repo/scripts/langchain/capability_check.py +++ b/templates/consumer-repo/scripts/langchain/capability_check.py @@ -160,8 +160,8 @@ def _is_multi_action_task(task: str) -> bool: def _requires_admin_access(task: str) -> bool: patterns = [ r"\bgithub\s+secrets?\b", - r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\s+secrets?\b", - r"\bsecrets?\s+(?:management|configuration|rotation)\b", + r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\b.{0,30}\bsecrets?\b", + r"\bsecrets?\b.{0,30}\b(?:management|configuration|rotation)\b", r"\brepository\s+settings\b", r"\brepo\s+settings\b", r"\bbranch\s+protection\b", diff --git a/tests/scripts/test_capability_check.py b/tests/scripts/test_capability_check.py index e35788188..d462e057e 100644 --- a/tests/scripts/test_capability_check.py +++ b/tests/scripts/test_capability_check.py @@ -434,6 +434,21 @@ def test_fallback_flags_manage_secrets(self) -> None: assert result.recommendation == "BLOCKED" assert "admin" in result.blocked_tasks[0]["reason"].lower() + def test_fallback_flags_set_repository_secret(self) -> None: + """Regression: 'Set repository secret TOKEN' must be blocked even with + intervening words between the verb and 'secret'.""" + with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None): + result = classify_capabilities(["Set repository secret TOKEN"], "") + assert result.recommendation == "BLOCKED" + assert "admin" in result.blocked_tasks[0]["reason"].lower() + + def test_fallback_flags_update_actions_secret(self) -> None: + """Regression: 'Update GitHub Actions secret FOO' must be blocked.""" + with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None): + result = classify_capabilities(["Update GitHub Actions secret FOO"], "") + assert result.recommendation == "BLOCKED" + assert "admin" in result.blocked_tasks[0]["reason"].lower() + def test_fallback_suggests_decomposition(self) -> None: with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None): result = classify_capabilities(["Refactor auth + add tests + update docs"], "") diff --git a/tests/scripts/test_ledger_migrate_base.py b/tests/scripts/test_ledger_migrate_base.py index 0fe7bac40..180eaac5a 100644 --- a/tests/scripts/test_ledger_migrate_base.py +++ b/tests/scripts/test_ledger_migrate_base.py @@ -318,3 +318,48 @@ def test_main_reports_no_updates(monkeypatch, capsys, tmp_path) -> None: assert exit_code == 0 out = capsys.readouterr().out assert "Ledgers already matched the default branch; no updates written." in out + + +def test_main_skips_corrupt_ledger_and_continues(monkeypatch, capsys, tmp_path) -> None: + """A corrupt YAML ledger must not block processing of other ledgers.""" + agents_dir = tmp_path / ".agents" + agents_dir.mkdir() + # Write a corrupt ledger with invalid YAML + corrupt = agents_dir / "issue-10-ledger.yml" + corrupt.write_text("base: main\ntasks:\n - title: `backtick breaks yaml`\n", encoding="utf-8") + # Write a valid ledger that should still be processed + valid = agents_dir / "issue-20-ledger.yml" + _write_ledger(valid, {"base": "develop", "items": []}) + + monkeypatch.setattr(ledger_migrate_base, "find_repo_root", lambda: tmp_path) + monkeypatch.setattr(ledger_migrate_base, "detect_default_branch", lambda _=None: "main") + + exit_code = ledger_migrate_base.main([]) + + assert exit_code == 0 + out = capsys.readouterr().out + # The corrupt ledger was skipped with a warning + assert "Skipping issue-10-ledger.yml" in out + assert "Skipped 1 corrupt ledger(s):" in out + # The valid ledger was still updated + assert yaml.safe_load(valid.read_text(encoding="utf-8"))["base"] == "main" + + +def test_main_check_skips_corrupt_ledger(monkeypatch, capsys, tmp_path) -> None: + """--check mode also skips corrupt ledgers without failing.""" + agents_dir = tmp_path / ".agents" + agents_dir.mkdir() + corrupt = agents_dir / "issue-5-ledger.yml" + corrupt.write_text("not: valid: yaml: `oops`\n", encoding="utf-8") + valid = agents_dir / "issue-6-ledger.yml" + valid.write_text("base: main\n", encoding="utf-8") + + monkeypatch.setattr(ledger_migrate_base, "find_repo_root", lambda: tmp_path) + monkeypatch.setattr(ledger_migrate_base, "detect_default_branch", lambda _=None: "main") + + exit_code = ledger_migrate_base.main(["--check"]) + + assert exit_code == 0 + out = capsys.readouterr().out + assert "Skipping issue-5-ledger.yml" in out + assert "All ledgers already track the default branch." in out diff --git a/tests/test_followup_issue_generator.py b/tests/test_followup_issue_generator.py index 7570cd103..c3f613be6 100755 --- a/tests/test_followup_issue_generator.py +++ b/tests/test_followup_issue_generator.py @@ -528,12 +528,12 @@ def test_advisory_concerns_are_notes(self): assert "- [ ] Address: Could add a clarifying comment" not in followup.body assert "## Notes" in followup.body - def test_split_low_confidence_requires_needs_human(self): - """Low-confidence split verdicts should trigger needs-human labeling.""" + def test_split_high_confidence_requires_needs_human(self): + """High-confidence CONCERNS in a split verdict should trigger needs-human labeling.""" verification_data = VerificationData( provider_verdicts={ "openai": {"verdict": "PASS", "confidence": 90}, - "anthropic": {"verdict": "CONCERNS", "confidence": 49}, + "anthropic": {"verdict": "CONCERNS", "confidence": 92}, }, concerns=["Missing test coverage"], ) diff --git a/tests/test_verdict_extract.py b/tests/test_verdict_extract.py index 4e7c70c7f..38d43456a 100644 --- a/tests/test_verdict_extract.py +++ b/tests/test_verdict_extract.py @@ -25,7 +25,7 @@ def _parse_github_output(raw: str) -> dict[str, str]: def test_verdict_extract_emits_structured_github_outputs(tmp_path): summary = _build_summary( "| openai | gpt-5.2 | PASS | 0.92 | Looks good. |", - "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.49 | Missing edge case. |", + "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.90 | Missing edge case. |", ) result = verdict_extract.build_verdict_result(summary, policy="worst") output_path = tmp_path / "github_output.txt" diff --git a/tests/test_verdict_policy.py b/tests/test_verdict_policy.py index 7e134a5d5..bfe631e05 100755 --- a/tests/test_verdict_policy.py +++ b/tests/test_verdict_policy.py @@ -51,6 +51,7 @@ def test_select_verdict_majority_policy(): def test_needs_human_threshold_boundary(): + """At exactly the threshold, needs_human should fire (>= comparison).""" verdicts = [ ProviderVerdict("openai", "gpt-5.2", "PASS", 0.92), ProviderVerdict( @@ -60,30 +61,30 @@ def test_needs_human_threshold_boundary(): result = evaluate_verdict_policy(verdicts, policy="worst") - assert result.needs_human is False + assert result.needs_human is True -def test_needs_human_true_below_threshold(): - """Concerns below the threshold should trigger needs_human.""" +def test_needs_human_true_above_threshold(): + """Concerns above the threshold should trigger needs_human.""" verdicts = [ ProviderVerdict("openai", "gpt-5.2", "PASS", 0.92), - ProviderVerdict("anthropic", "claude-sonnet-4-5", "CONCERNS", 0.40), + ProviderVerdict("anthropic", "claude-sonnet-4-5", "CONCERNS", 0.90), ] result = evaluate_verdict_policy(verdicts, policy="worst") assert result.needs_human is True assert result.split_verdict is True - assert "low-confidence" in result.needs_human_reason + assert "high-confidence" in result.needs_human_reason def test_moderate_confidence_concerns_do_not_block(): - """Regression: 72% concerns in a split verdict should not trigger needs_human. + """Moderate-confidence concerns in a split verdict should not trigger needs_human. - Previously CONCERNS_NEEDS_HUMAN_THRESHOLD was 0.85, which caused any - split verdict with <85% concerns to be flagged. The lowered threshold - (0.50) allows moderate-confidence concerns to proceed with automatic - follow-up creation. + needs_human only fires when the CONCERNS provider is highly confident + (>= 0.85), indicating the LLM is quite sure there are real problems. + Moderate confidence means the LLM is uncertain — that's a weaker signal + and shouldn't block follow-up automation. """ verdicts = [ ProviderVerdict("openai", "gpt-5.2", "CONCERNS", 72), diff --git a/tests/test_verdict_policy_integration.py b/tests/test_verdict_policy_integration.py index c5b29fde3..40562e48f 100755 --- a/tests/test_verdict_policy_integration.py +++ b/tests/test_verdict_policy_integration.py @@ -23,7 +23,8 @@ def _build_summary(*rows: str) -> str: return f"## Provider Summary\n\n{header}{body}\n" -def test_split_verdict_confidence_boundary_needs_human_false(): +def test_split_verdict_confidence_boundary_needs_human_true(): + """At exactly the threshold (0.85), needs_human should be True.""" summary = _build_summary( "| openai | gpt-5.2 | PASS | 0.92 | Looks good. |", "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.85 | Missing edge case. |", @@ -33,22 +34,23 @@ def test_split_verdict_confidence_boundary_needs_human_false(): followup_result = _followup_result(summary) assert workflow_result.verdict == followup_result.verdict == "CONCERNS" - assert workflow_result.needs_human is False - assert followup_result.needs_human is False + assert workflow_result.needs_human is True + assert followup_result.needs_human is True -def test_split_verdict_low_confidence_needs_human_true(): +def test_split_verdict_low_confidence_needs_human_false(): + """Below threshold, low-confidence concerns should NOT trigger needs_human.""" summary = _build_summary( "| openai | gpt-5.2 | PASS | 0.92 | Looks good. |", - "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.49 | Missing edge case. |", + "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.40 | Missing edge case. |", ) workflow_result = _workflow_result(summary) followup_result = _followup_result(summary) assert workflow_result.verdict == followup_result.verdict == "CONCERNS" - assert workflow_result.needs_human is True - assert followup_result.needs_human is True + assert workflow_result.needs_human is False + assert followup_result.needs_human is False def test_split_verdict_row_order_invariance():