stranske · stranske · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/.github/actions/setup-api-client/action.yml b/.github/actions/setup-api-client/action.yml
@@ -102,6 +102,22 @@ runs:
         if [ -d "node_modules/@octokit/rest" ]; then
           echo "✅ @octokit/rest already installed"
         else
+          # Snapshot vendored package metadata before npm install.
+          # npm may overwrite transitive deps (e.g. minimatch) that are
+          # committed as vendored packages with intentional version pins.
+          VENDORED_SNAPSHOT=""
+          if [ -f "node_modules/minimatch/package.json" ]; then
+            VENDORED_SNAPSHOT=$(mktemp -d)
+            for pkg_dir in node_modules/*/; do
+              if [ -f "${pkg_dir}package.json" ]; then
+                pkg_name=$(basename "$pkg_dir")
+                mkdir -p "${VENDORED_SNAPSHOT}/${pkg_name}"
+                cp "${pkg_dir}package.json" "${VENDORED_SNAPSHOT}/${pkg_name}/package.json"
+              fi
+            done
+            echo "📸 Snapshotted vendored package metadata"
+          fi
+
           # Install with pinned versions for consistency
           # Capture stderr for debugging if the command fails
           npm_output=$(mktemp)
@@ -122,6 +138,20 @@ runs:
               @octokit/plugin-paginate-rest@9.1.5 \
               @octokit/auth-app@6.0.3
           fi
+
+          # Restore vendored package metadata that npm may have overwritten
+          if [ -n "${VENDORED_SNAPSHOT:-}" ] && [ -d "${VENDORED_SNAPSHOT}" ]; then
+            for pkg_backup in "${VENDORED_SNAPSHOT}"/*/; do
+              pkg_name=$(basename "$pkg_backup")
+              if [ -f "node_modules/${pkg_name}/package.json" ] && \
+                 [ -f "${pkg_backup}package.json" ]; then
+                cp "${pkg_backup}package.json" "node_modules/${pkg_name}/package.json"
+              fi
+            done
+            rm -rf "${VENDORED_SNAPSHOT}"
+            echo "📸 Restored vendored package metadata"
+          fi
+
           echo "✅ @octokit dependencies installed"
         fi
 

@@ -794,6 +794,8 @@ jobs:
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
           git add -A
+          # Unstage vendored node_modules that may have been modified by npm install
+          git reset HEAD -- .github/scripts/node_modules node_modules .workflows-lib/.github/scripts/node_modules 2>/dev/null || true
           git commit -m "${AUTOFIX_COMMIT_PREFIX} formatting/lint"
           echo "AUTOFIX_COMMIT_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV"
 
@@ -865,6 +867,8 @@ jobs:
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
           git add -A
+          # Unstage vendored node_modules that may have been modified by npm install
+          git reset HEAD -- .github/scripts/node_modules node_modules .workflows-lib/.github/scripts/node_modules 2>/dev/null || true
           git commit -m "${AUTOFIX_COMMIT_PREFIX} formatting/lint (patch)" || true
           git format-patch -1 --stdout > autofix.patch
 

@@ -580,7 +580,9 @@ jobs:
           git checkout -B "$HEAD_BRANCH" "origin/${BASE_BRANCH}"
           mkdir -p agents
           printf "<!-- bootstrap for %s on issue #%s -->\n" "$AGENT" "$ISSUE_NUM" > "agents/${AGENT}-${ISSUE_NUM}.md"
-          git add -A || true
+          # Stage only the intended bootstrap file — 'git add -A' would capture
+          # vendored node_modules changes made by setup-api-client npm install.
+          git add "agents/${AGENT}-${ISSUE_NUM}.md" || true
           if ! git diff --cached --quiet; then
             git commit -m "chore(${AGENT}): bootstrap PR for issue #${ISSUE_NUM}"
           else

diff --git a/scripts/check_issue_consistency.py b/scripts/check_issue_consistency.py
@@ -260,6 +260,8 @@ def _is_fallback_error(message: str) -> bool:
         for snippet in (
             "no merge base",
             "bad object",
+            "bad revision",
+            "invalid revision range",
             "invalid object name",
             "not a valid object name",
             "ambiguous argument",

diff --git a/scripts/langchain/capability_check.py b/scripts/langchain/capability_check.py
@@ -161,8 +161,8 @@ def _is_multi_action_task(task: str) -> bool:
 def _requires_admin_access(task: str) -> bool:
     patterns = [
         r"\bgithub\s+secrets?\b",
-        r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\s+secrets?\b",
-        r"\bsecrets?\s+(?:management|configuration|rotation)\b",
+        r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\b.{0,30}\bsecrets?\b",
+        r"\bsecrets?\b.{0,30}\b(?:management|configuration|rotation)\b",
         r"\brepository\s+settings\b",
         r"\brepo\s+settings\b",
         r"\bbranch\s+protection\b",

diff --git a/scripts/langchain/verdict_policy.py b/scripts/langchain/verdict_policy.py
@@ -16,7 +16,7 @@
     "fail": 3,
 }
 
-CONCERNS_NEEDS_HUMAN_THRESHOLD = 0.50
+CONCERNS_NEEDS_HUMAN_THRESHOLD = 0.85
 
 
 @dataclass(frozen=True)
@@ -193,11 +193,11 @@ def evaluate_verdict_policy(
     needs_human_reason = ""
     if split_verdict:
         confidence_value = concerns_confidence or 0.0
-        if confidence_value < CONCERNS_NEEDS_HUMAN_THRESHOLD:
+        if confidence_value >= CONCERNS_NEEDS_HUMAN_THRESHOLD:
             needs_human = True
             needs_human_reason = (
-                "Provider verdicts split with low-confidence concerns; "
-                f"dissenting confidence {confidence_value:.2f} < "
+                "Provider verdicts split with high-confidence concerns; "
+                f"dissenting confidence {confidence_value:.2f} >= "
                 f"{CONCERNS_NEEDS_HUMAN_THRESHOLD:.2f}. "
                 "Requires human review before starting another automated follow-up."
             )

diff --git a/templates/consumer-repo/.github/actions/setup-api-client/action.yml b/templates/consumer-repo/.github/actions/setup-api-client/action.yml
@@ -102,6 +102,22 @@ runs:
         if [ -d "node_modules/@octokit/rest" ]; then
           echo "✅ @octokit/rest already installed"
         else
+          # Snapshot vendored package metadata before npm install.
+          # npm may overwrite transitive deps (e.g. minimatch) that are
+          # committed as vendored packages with intentional version pins.
+          VENDORED_SNAPSHOT=""
+          if [ -f "node_modules/minimatch/package.json" ]; then
+            VENDORED_SNAPSHOT=$(mktemp -d)
+            for pkg_dir in node_modules/*/; do
+              if [ -f "${pkg_dir}package.json" ]; then
+                pkg_name=$(basename "$pkg_dir")
+                mkdir -p "${VENDORED_SNAPSHOT}/${pkg_name}"
+                cp "${pkg_dir}package.json" "${VENDORED_SNAPSHOT}/${pkg_name}/package.json"
+              fi
+            done
+            echo "📸 Snapshotted vendored package metadata"
+          fi
+
           # Install with pinned versions for consistency
           # Capture stderr for debugging if the command fails
           npm_output=$(mktemp)
@@ -122,6 +138,20 @@ runs:
               @octokit/plugin-paginate-rest@9.1.5 \
               @octokit/auth-app@6.0.3
           fi
+
+          # Restore vendored package metadata that npm may have overwritten
+          if [ -n "${VENDORED_SNAPSHOT:-}" ] && [ -d "${VENDORED_SNAPSHOT}" ]; then
+            for pkg_backup in "${VENDORED_SNAPSHOT}"/*/; do
+              pkg_name=$(basename "$pkg_backup")
+              if [ -f "node_modules/${pkg_name}/package.json" ] && \
+                 [ -f "${pkg_backup}package.json" ]; then
+                cp "${pkg_backup}package.json" "node_modules/${pkg_name}/package.json"
+              fi
+            done
+            rm -rf "${VENDORED_SNAPSHOT}"
+            echo "📸 Restored vendored package metadata"
+          fi
+
           echo "✅ @octokit dependencies installed"
         fi
 

diff --git a/templates/consumer-repo/scripts/langchain/capability_check.py b/templates/consumer-repo/scripts/langchain/capability_check.py
@@ -160,8 +160,8 @@ def _is_multi_action_task(task: str) -> bool:
 def _requires_admin_access(task: str) -> bool:
     patterns = [
         r"\bgithub\s+secrets?\b",
-        r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\s+secrets?\b",
-        r"\bsecrets?\s+(?:management|configuration|rotation)\b",
+        r"\b(?:manage|configure|set|create|update|delete|add|modify|rotate)\b.{0,30}\bsecrets?\b",
+        r"\bsecrets?\b.{0,30}\b(?:management|configuration|rotation)\b",
         r"\brepository\s+settings\b",
         r"\brepo\s+settings\b",
         r"\bbranch\s+protection\b",

diff --git a/tests/scripts/test_capability_check.py b/tests/scripts/test_capability_check.py
@@ -434,6 +434,21 @@ def test_fallback_flags_manage_secrets(self) -> None:
             assert result.recommendation == "BLOCKED"
             assert "admin" in result.blocked_tasks[0]["reason"].lower()
 
+    def test_fallback_flags_set_repository_secret(self) -> None:
+        """Regression: 'Set repository secret TOKEN' must be blocked even with
+        intervening words between the verb and 'secret'."""
+        with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None):
+            result = classify_capabilities(["Set repository secret TOKEN"], "")
+            assert result.recommendation == "BLOCKED"
+            assert "admin" in result.blocked_tasks[0]["reason"].lower()
+
+    def test_fallback_flags_update_actions_secret(self) -> None:
+        """Regression: 'Update GitHub Actions secret FOO' must be blocked."""
+        with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None):
+            result = classify_capabilities(["Update GitHub Actions secret FOO"], "")
+            assert result.recommendation == "BLOCKED"
+            assert "admin" in result.blocked_tasks[0]["reason"].lower()
+
     def test_fallback_suggests_decomposition(self) -> None:
         with mock.patch("scripts.langchain.capability_check._get_llm_client", return_value=None):
             result = classify_capabilities(["Refactor auth + add tests + update docs"], "")

diff --git a/tests/scripts/test_issue_consistency_check.py b/tests/scripts/test_issue_consistency_check.py
@@ -174,6 +174,27 @@ def fake_run_git(args: list[str]) -> str:
     assert calls == [["log"], ["log", "-n", "1"]]
 
 
+def test_run_git_with_fallback_handles_bad_revision(monkeypatch) -> None:
+    calls = []
+
+    def fake_run_git(args: list[str]) -> str:
+        calls.append(args)
+        if args == ["log"]:
+            raise RuntimeError("fatal: bad revision 'deadbeef..HEAD'")
+        return "ok"
+
+    monkeypatch.setattr(check_issue_consistency, "_run_git", fake_run_git)
+
+    output, used_fallback = check_issue_consistency._run_git_with_fallback_and_flag(
+        ["log"],
+        ["log", "-n", "1"],
+    )
+
+    assert output == "ok"
+    assert used_fallback is True
+    assert calls == [["log"], ["log", "-n", "1"]]
+
+
 def test_resolve_base_sha_prefers_merge_base(monkeypatch) -> None:
     calls = []
 

diff --git a/tests/test_followup_issue_generator.py b/tests/test_followup_issue_generator.py
@@ -528,12 +528,12 @@ def test_advisory_concerns_are_notes(self):
         assert "- [ ] Address: Could add a clarifying comment" not in followup.body
         assert "## Notes" in followup.body
 
-    def test_split_low_confidence_requires_needs_human(self):
-        """Low-confidence split verdicts should trigger needs-human labeling."""
+    def test_split_high_confidence_requires_needs_human(self):
+        """High-confidence CONCERNS in a split verdict should trigger needs-human labeling."""
         verification_data = VerificationData(
             provider_verdicts={
                 "openai": {"verdict": "PASS", "confidence": 90},
-                "anthropic": {"verdict": "CONCERNS", "confidence": 49},
+                "anthropic": {"verdict": "CONCERNS", "confidence": 92},
             },
             concerns=["Missing test coverage"],
         )

diff --git a/tests/test_verdict_extract.py b/tests/test_verdict_extract.py
@@ -25,7 +25,7 @@ def _parse_github_output(raw: str) -> dict[str, str]:
 def test_verdict_extract_emits_structured_github_outputs(tmp_path):
     summary = _build_summary(
         "| openai | gpt-5.2 | PASS | 0.92 | Looks good. |",
-        "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.49 | Missing edge case. |",
+        "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.90 | Missing edge case. |",
     )
     result = verdict_extract.build_verdict_result(summary, policy="worst")
     output_path = tmp_path / "github_output.txt"

diff --git a/tests/test_verdict_policy.py b/tests/test_verdict_policy.py
@@ -51,6 +51,7 @@ def test_select_verdict_majority_policy():
 
 
 def test_needs_human_threshold_boundary():
+    """At exactly the threshold, needs_human should fire (>= comparison)."""
     verdicts = [
         ProviderVerdict("openai", "gpt-5.2", "PASS", 0.92),
         ProviderVerdict(
@@ -60,30 +61,30 @@ def test_needs_human_threshold_boundary():
 
     result = evaluate_verdict_policy(verdicts, policy="worst")
 
-    assert result.needs_human is False
+    assert result.needs_human is True
 
 
-def test_needs_human_true_below_threshold():
-    """Concerns below the threshold should trigger needs_human."""
+def test_needs_human_true_above_threshold():
+    """Concerns above the threshold should trigger needs_human."""
     verdicts = [
         ProviderVerdict("openai", "gpt-5.2", "PASS", 0.92),
-        ProviderVerdict("anthropic", "claude-sonnet-4-5", "CONCERNS", 0.40),
+        ProviderVerdict("anthropic", "claude-sonnet-4-5", "CONCERNS", 0.90),
     ]
 
     result = evaluate_verdict_policy(verdicts, policy="worst")
 
     assert result.needs_human is True
     assert result.split_verdict is True
-    assert "low-confidence" in result.needs_human_reason
+    assert "high-confidence" in result.needs_human_reason
 
 
 def test_moderate_confidence_concerns_do_not_block():
-    """Regression: 72% concerns in a split verdict should not trigger needs_human.
+    """Moderate-confidence concerns in a split verdict should not trigger needs_human.
 
-    Previously CONCERNS_NEEDS_HUMAN_THRESHOLD was 0.85, which caused any
-    split verdict with <85% concerns to be flagged.  The lowered threshold
-    (0.50) allows moderate-confidence concerns to proceed with automatic
-    follow-up creation.
+    needs_human only fires when the CONCERNS provider is highly confident
+    (>= 0.85), indicating the LLM is quite sure there are real problems.
+    Moderate confidence means the LLM is uncertain — that's a weaker signal
+    and shouldn't block follow-up automation.
     """
     verdicts = [
         ProviderVerdict("openai", "gpt-5.2", "CONCERNS", 72),

diff --git a/tests/test_verdict_policy_integration.py b/tests/test_verdict_policy_integration.py
@@ -23,7 +23,8 @@ def _build_summary(*rows: str) -> str:
     return f"## Provider Summary\n\n{header}{body}\n"
 
 
-def test_split_verdict_confidence_boundary_needs_human_false():
+def test_split_verdict_confidence_boundary_needs_human_true():
+    """At exactly the threshold (0.85), needs_human should be True."""
     summary = _build_summary(
         "| openai | gpt-5.2 | PASS | 0.92 | Looks good. |",
         "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.85 | Missing edge case. |",
@@ -33,22 +34,23 @@ def test_split_verdict_confidence_boundary_needs_human_false():
     followup_result = _followup_result(summary)
 
     assert workflow_result.verdict == followup_result.verdict == "CONCERNS"
-    assert workflow_result.needs_human is False
-    assert followup_result.needs_human is False
+    assert workflow_result.needs_human is True
+    assert followup_result.needs_human is True
 
 
-def test_split_verdict_low_confidence_needs_human_true():
+def test_split_verdict_low_confidence_needs_human_false():
+    """Below threshold, low-confidence concerns should NOT trigger needs_human."""
     summary = _build_summary(
         "| openai | gpt-5.2 | PASS | 0.92 | Looks good. |",
-        "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.49 | Missing edge case. |",
+        "| anthropic | claude-sonnet-4-5 | CONCERNS | 0.40 | Missing edge case. |",
     )
 
     workflow_result = _workflow_result(summary)
     followup_result = _followup_result(summary)
 
     assert workflow_result.verdict == followup_result.verdict == "CONCERNS"
-    assert workflow_result.needs_human is True
-    assert followup_result.needs_human is True
+    assert workflow_result.needs_human is False
+    assert followup_result.needs_human is False
 
 
 def test_split_verdict_row_order_invariance():