Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/actions/setup-api-client/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,22 @@ runs:
if [ -d "node_modules/@octokit/rest" ]; then
echo "✅ @octokit/rest already installed"
else
# Snapshot vendored package metadata before npm install.
# npm may overwrite transitive deps (e.g. minimatch) that are
# committed as vendored packages with intentional version pins.
VENDORED_SNAPSHOT=""
if [ -f "node_modules/minimatch/package.json" ]; then
VENDORED_SNAPSHOT=$(mktemp -d)
for pkg_dir in node_modules/*/; do
if [ -f "${pkg_dir}package.json" ]; then
pkg_name=$(basename "$pkg_dir")
mkdir -p "${VENDORED_SNAPSHOT}/${pkg_name}"
cp "${pkg_dir}package.json" "${VENDORED_SNAPSHOT}/${pkg_name}/package.json"
fi
done
echo "📸 Snapshotted vendored package metadata"
fi

# Install with pinned versions for consistency
# Capture stderr for debugging if the command fails
npm_output=$(mktemp)
Expand All @@ -122,6 +138,20 @@ runs:
@octokit/plugin-paginate-rest@9.1.5 \
@octokit/auth-app@6.0.3
fi

# Restore vendored package metadata that npm may have overwritten
if [ -n "${VENDORED_SNAPSHOT:-}" ] && [ -d "${VENDORED_SNAPSHOT}" ]; then
for pkg_backup in "${VENDORED_SNAPSHOT}"/*/; do
pkg_name=$(basename "$pkg_backup")
if [ -f "node_modules/${pkg_name}/package.json" ] && \
[ -f "${pkg_backup}package.json" ]; then
cp "${pkg_backup}package.json" "node_modules/${pkg_name}/package.json"
fi
done
rm -rf "${VENDORED_SNAPSHOT}"
echo "📸 Restored vendored package metadata"
fi

echo "✅ @octokit dependencies installed"
fi

Expand Down
19 changes: 18 additions & 1 deletion .github/workflows/agents-72-codex-belt-worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,7 @@ jobs:
if gh_output:
with open(gh_output, 'a', encoding='utf-8') as handle:
handle.write(f"task_id={start_info['task']['id'] if start_info['task'] else ''}\n")
handle.write(f"task_title={start_info['task']['title'] if start_info['task'] else ''}\n")
Comment thread
stranske marked this conversation as resolved.
Outdated
handle.write(f"task_status={start_info['task']['current_status'] if start_info['task'] else ''}\n")

Copilot AI Feb 12, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Writing task_title to $GITHUB_OUTPUT as a single key=value line will break output parsing if a ledger task title contains a newline (or other characters that require the multiline output format). Consider sanitizing task_title (e.g., replace newlines) or emitting it using the official multiline <<DELIM syntax so any title is safe.

Suggested change
with open(gh_output, 'a', encoding='utf-8') as handle:
handle.write(f"task_id={start_info['task']['id'] if start_info['task'] else ''}\n")
handle.write(f"task_title={start_info['task']['title'] if start_info['task'] else ''}\n")
handle.write(f"task_status={start_info['task']['current_status'] if start_info['task'] else ''}\n")
# Prepare safe, single-line task fields for GitHub outputs
task = start_info['task'] or {}
task_id = task.get('id', '') if task else ''
task_title = task.get('title', '') if task else ''
# Sanitize title to avoid breaking GITHUB_OUTPUT parsing
task_title = task_title.replace('\r', ' ').replace('\n', ' ')
task_status = task.get('current_status', '') if task else ''
with open(gh_output, 'a', encoding='utf-8') as handle:
handle.write(f"task_id={task_id}\n")
handle.write(f"task_title={task_title}\n")
handle.write(f"task_status={task_status}\n")

Copilot uses AI. Check for mistakes.
handle.write(f"ledger_changed={'true' if changed else 'false'}\n")
handle.write(f"ledger_created={'true' if start_info['created'] else 'false'}\n")
Expand Down Expand Up @@ -1187,12 +1188,28 @@ jobs:
const prNumber = Number('${{ steps.pr.outputs.number }}');
const branch = ('${{ steps.ctx.outputs.branch }}' || '').trim() || '(unknown branch)';
const dryRun = '${{ steps.mode.outputs.dry_run }}' === 'true';
const taskId = ('${{ steps.ledger_start.outputs.task_id }}' || '').trim();
const taskTitle = ('${{ steps.ledger_start.outputs.task_title }}' || '').trim();
const { owner, repo } = context.repo;
const marker = '<!-- codex-activation-marker -->';
const summary = dryRun
? `Codex Worker activated for branch \`${branch}\` (dry run preview).`
: `Codex Worker activated for branch \`${branch}\`.`;
const body = `${marker}\n${summary}\n\n@codex start\n\nAutomated belt worker prepared this PR. Please continue implementing the requested changes.`;
// Direct Codex to focus on the single next ledger task for higher
// first-commit success probability. Full issue context is in the
// PR body; this comment narrows the immediate scope.
let taskDirective = '';
if (taskId && taskTitle) {
taskDirective = [
'',
`**Focus on this task first:** \`${taskId}\` — ${taskTitle}`,
'',
'Implement **only** this task in your first commit.',
'Ensure the code compiles and existing tests pass before moving on.',
'The keepalive loop will assign subsequent tasks after this one is complete.',
].join('\n');
}
const body = `${marker}\n${summary}\n\n@codex start${taskDirective}`;

try {
const comments = await paginateWithRetry(
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/agents-autofix-loop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
appendix: '',
stop_reason: '',
attempts: '0',
max_attempts: '3',
max_attempts: '2',
trigger_reason: 'unknown',
trigger_job: '',
trigger_step: '',
Expand Down Expand Up @@ -287,7 +287,7 @@ jobs:
// Reduce attempts for auto-escalated PRs (they weren't agent-initiated)
const isEscalated = labels.includes('autofix:escalated');
const maxAttempts = isEscalated
? Math.min(2, Number(outputs.max_attempts))
? 1
: Number(outputs.max_attempts);
const previousRuns = await paginateWithRetry(
github,
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/agents-pr-meta-v4.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ concurrency:
&& github.event.comment
&& github.event.comment.id
&& format('agents-pr-meta-comment-{0}', github.event.comment.id)
|| github.event_name == 'pull_request'
&& format('agents-pr-meta-pr-{0}', github.event.pull_request.number)
|| format('agents-pr-meta-run-{0}', github.run_id)
}}
cancel-in-progress: false
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
comment_event_context:
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/reusable-18-autofix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,8 @@ jobs:
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add -A
# Unstage vendored node_modules that may have been modified by npm install
git reset HEAD -- .github/scripts/node_modules node_modules .workflows-lib/.github/scripts/node_modules 2>/dev/null || true
Comment thread
stranske marked this conversation as resolved.
git commit -m "${AUTOFIX_COMMIT_PREFIX} formatting/lint"
echo "AUTOFIX_COMMIT_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV"

Expand Down Expand Up @@ -865,6 +867,8 @@ jobs:
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add -A
# Unstage vendored node_modules that may have been modified by npm install
git reset HEAD -- .github/scripts/node_modules node_modules .workflows-lib/.github/scripts/node_modules 2>/dev/null || true
git commit -m "${AUTOFIX_COMMIT_PREFIX} formatting/lint (patch)" || true
git format-patch -1 --stdout > autofix.patch

Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/reusable-agents-issue-bridge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,9 @@ jobs:
git checkout -B "$HEAD_BRANCH" "origin/${BASE_BRANCH}"
mkdir -p agents
printf "<!-- bootstrap for %s on issue #%s -->\n" "$AGENT" "$ISSUE_NUM" > "agents/${AGENT}-${ISSUE_NUM}.md"
git add -A || true
# Stage only the intended bootstrap file — 'git add -A' would capture
# vendored node_modules changes made by setup-api-client npm install.
git add "agents/${AGENT}-${ISSUE_NUM}.md" || true
if ! git diff --cached --quiet; then
git commit -m "chore(${AGENT}): bootstrap PR for issue #${ISSUE_NUM}"
else
Expand Down
4 changes: 2 additions & 2 deletions scripts/langchain/capability_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def _is_multi_action_task(task: str) -> bool:
def _requires_admin_access(task: str) -> bool:
patterns = [
r"\bgithub\s+secrets?\b",
r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\s+secrets?\b",
r"\bsecrets?\s+(?:management|configuration|rotation)\b",
r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\b.{0,30}\bsecrets?\b",
r"\bsecrets?\b.{0,30}\b(?:management|configuration|rotation)\b",
r"\brepository\s+settings\b",
r"\brepo\s+settings\b",
r"\bbranch\s+protection\b",
Expand Down
8 changes: 4 additions & 4 deletions scripts/langchain/verdict_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"fail": 3,
}

CONCERNS_NEEDS_HUMAN_THRESHOLD = 0.50
CONCERNS_NEEDS_HUMAN_THRESHOLD = 0.85


@dataclass(frozen=True)
Expand Down Expand Up @@ -193,11 +193,11 @@ def evaluate_verdict_policy(
needs_human_reason = ""
if split_verdict:
confidence_value = concerns_confidence or 0.0
if confidence_value < CONCERNS_NEEDS_HUMAN_THRESHOLD:
if confidence_value >= CONCERNS_NEEDS_HUMAN_THRESHOLD:
needs_human = True
needs_human_reason = (
"Provider verdicts split with low-confidence concerns; "
f"dissenting confidence {confidence_value:.2f} < "
"Provider verdicts split with high-confidence concerns; "
f"dissenting confidence {confidence_value:.2f} >= "
f"{CONCERNS_NEEDS_HUMAN_THRESHOLD:.2f}. "
"Requires human review before starting another automated follow-up."
)
Expand Down
14 changes: 13 additions & 1 deletion scripts/ledger_migrate_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,26 @@ def main(argv: Iterable[str] | None = None) -> int:

mismatches: list[LedgerResult] = []
updated: list[LedgerResult] = []
skipped: list[tuple[Path, str]] = []
for ledger_path in ledgers:
result = migrate_ledger(ledger_path, default_branch, check=args.check)
try:
result = migrate_ledger(ledger_path, default_branch, check=args.check)
except (MigrationError, yaml.YAMLError) as exc:
# One corrupt ledger must not block processing of the remaining files.
print(f"::warning::Skipping {ledger_path.name}: {exc}")
skipped.append((ledger_path, str(exc)))
Comment thread
stranske marked this conversation as resolved.
Outdated
continue
if args.check:
if result.previous != default_branch:
mismatches.append(result)
elif result.changed:
updated.append(result)

if skipped:
print(f"Skipped {len(skipped)} corrupt ledger(s):")
for path, reason in skipped:
print(f" - {path.name}: {reason}")

if args.check:
if mismatches:
print("Found ledgers with stale base values:")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,22 @@ runs:
if [ -d "node_modules/@octokit/rest" ]; then
echo "✅ @octokit/rest already installed"
else
# Snapshot vendored package metadata before npm install.
# npm may overwrite transitive deps (e.g. minimatch) that are
# committed as vendored packages with intentional version pins.
VENDORED_SNAPSHOT=""
if [ -f "node_modules/minimatch/package.json" ]; then
VENDORED_SNAPSHOT=$(mktemp -d)
for pkg_dir in node_modules/*/; do
if [ -f "${pkg_dir}package.json" ]; then
pkg_name=$(basename "$pkg_dir")
mkdir -p "${VENDORED_SNAPSHOT}/${pkg_name}"
cp "${pkg_dir}package.json" "${VENDORED_SNAPSHOT}/${pkg_name}/package.json"
fi
done
echo "📸 Snapshotted vendored package metadata"
fi

# Install with pinned versions for consistency
# Capture stderr for debugging if the command fails
npm_output=$(mktemp)
Expand All @@ -122,6 +138,20 @@ runs:
@octokit/plugin-paginate-rest@9.1.5 \
@octokit/auth-app@6.0.3
fi

# Restore vendored package metadata that npm may have overwritten
if [ -n "${VENDORED_SNAPSHOT:-}" ] && [ -d "${VENDORED_SNAPSHOT}" ]; then
for pkg_backup in "${VENDORED_SNAPSHOT}"/*/; do
pkg_name=$(basename "$pkg_backup")
if [ -f "node_modules/${pkg_name}/package.json" ] && \
[ -f "${pkg_backup}package.json" ]; then
cp "${pkg_backup}package.json" "node_modules/${pkg_name}/package.json"
fi
done
rm -rf "${VENDORED_SNAPSHOT}"
echo "📸 Restored vendored package metadata"
fi

echo "✅ @octokit dependencies installed"
fi

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,7 @@ jobs:
if gh_output:
with open(gh_output, 'a', encoding='utf-8') as handle:
handle.write(f"task_id={start_info['task']['id'] if start_info['task'] else ''}\n")
handle.write(f"task_title={start_info['task']['title'] if start_info['task'] else ''}\n")

Copilot AI Feb 12, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Writing task_title to $GITHUB_OUTPUT as a single key=value line will break output parsing if a ledger task title contains a newline (or other characters that require the multiline output format). Consider sanitizing task_title (e.g., replace newlines) or emitting it using the official multiline <<DELIM syntax so any title is safe.

Suggested change
handle.write(f"task_title={start_info['task']['title'] if start_info['task'] else ''}\n")
title = start_info['task']['title'] if start_info['task'] else ''
handle.write("task_title<<TASK_TITLE_EOF\n")
handle.write(f"{title}\n")
handle.write("TASK_TITLE_EOF\n")

Copilot uses AI. Check for mistakes.
handle.write(f"task_status={start_info['task']['current_status'] if start_info['task'] else ''}\n")
handle.write(f"ledger_changed={'true' if changed else 'false'}\n")
handle.write(f"ledger_created={'true' if start_info['created'] else 'false'}\n")
Expand Down Expand Up @@ -1187,12 +1188,28 @@ jobs:
const prNumber = Number('${{ steps.pr.outputs.number }}');
const branch = ('${{ steps.ctx.outputs.branch }}' || '').trim() || '(unknown branch)';
const dryRun = '${{ steps.mode.outputs.dry_run }}' === 'true';
const taskId = ('${{ steps.ledger_start.outputs.task_id }}' || '').trim();
const taskTitle = ('${{ steps.ledger_start.outputs.task_title }}' || '').trim();
const { owner, repo } = context.repo;
const marker = '<!-- codex-activation-marker -->';
const summary = dryRun
? `Codex Worker activated for branch \`${branch}\` (dry run preview).`
: `Codex Worker activated for branch \`${branch}\`.`;
const body = `${marker}\n${summary}\n\n@codex start\n\nAutomated belt worker prepared this PR. Please continue implementing the requested changes.`;
// Direct Codex to focus on the single next ledger task for higher
// first-commit success probability. Full issue context is in the
// PR body; this comment narrows the immediate scope.
let taskDirective = '';
if (taskId && taskTitle) {
taskDirective = [
'',
`**Focus on this task first:** \`${taskId}\` — ${taskTitle}`,
'',
'Implement **only** this task in your first commit.',
'Ensure the code compiles and existing tests pass before moving on.',
'The keepalive loop will assign subsequent tasks after this one is complete.',
].join('\n');
}
const body = `${marker}\n${summary}\n\n@codex start${taskDirective}`;

try {
const comments = await paginateWithRetry(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ jobs:
appendix: '',
stop_reason: '',
attempts: '0',
max_attempts: '3',
max_attempts: '2',
trigger_reason: 'unknown',
trigger_job: '',
trigger_step: '',
Expand Down Expand Up @@ -280,7 +280,7 @@ jobs:
// Reduce attempts for auto-escalated PRs (they weren't agent-initiated)
const isEscalated = labels.includes('autofix:escalated');
const maxAttempts = isEscalated
? Math.min(2, Number(outputs.max_attempts))
? 1
: Number(outputs.max_attempts);
const previousRuns = await paginateWithRetry(
github,
Expand Down
4 changes: 3 additions & 1 deletion templates/consumer-repo/.github/workflows/agents-pr-meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@ concurrency:
group: >-
${{ github.event_name == 'issue_comment' &&
format('agents-pr-meta-comment-{0}', github.event.comment.id) ||
github.event_name == 'pull_request' &&
format('agents-pr-meta-pr-{0}', github.event.pull_request.number) ||
format('agents-pr-meta-run-{0}', github.run_id) }}
cancel-in-progress: false
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
# Resolve PR context for issue_comment events
Expand Down
4 changes: 2 additions & 2 deletions templates/consumer-repo/scripts/langchain/capability_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ def _is_multi_action_task(task: str) -> bool:
def _requires_admin_access(task: str) -> bool:
patterns = [
r"\bgithub\s+secrets?\b",
r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\s+secrets?\b",
r"\bsecrets?\s+(?:management|configuration|rotation)\b",
r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\b.{0,30}\bsecrets?\b",
r"\bsecrets?\b.{0,30}\b(?:management|configuration|rotation)\b",
r"\brepository\s+settings\b",
r"\brepo\s+settings\b",
r"\bbranch\s+protection\b",
Expand Down
15 changes: 15 additions & 0 deletions tests/scripts/test_capability_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,21 @@ def test_fallback_flags_manage_secrets(self) -> None:
assert result.recommendation == "BLOCKED"
assert "admin" in result.blocked_tasks[0]["reason"].lower()

def test_fallback_flags_set_repository_secret(self) -> None:
"""Regression: 'Set repository secret TOKEN' must be blocked even with
intervening words between the verb and 'secret'."""
with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None):
result = classify_capabilities(["Set repository secret TOKEN"], "")
assert result.recommendation == "BLOCKED"
assert "admin" in result.blocked_tasks[0]["reason"].lower()

def test_fallback_flags_update_actions_secret(self) -> None:
"""Regression: 'Update GitHub Actions secret FOO' must be blocked."""
with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None):
result = classify_capabilities(["Update GitHub Actions secret FOO"], "")
assert result.recommendation == "BLOCKED"
assert "admin" in result.blocked_tasks[0]["reason"].lower()

def test_fallback_suggests_decomposition(self) -> None:
with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None):
result = classify_capabilities(["Refactor auth + add tests + update docs"], "")
Expand Down
Loading
Loading