stranske · stranske · Mar 2, 2026
@@ -0,0 +1,50 @@
+# ---------------------------------------------------------------------------
+# Agents PR Health — Consumer Repo Thin Wrapper
+# ---------------------------------------------------------------------------
+# Periodically scans open PRs for merge conflicts and failing checks,
+# then routes remediation to the coding agent that most recently worked
+# on each PR.
+#
+# Copy this file to: .github/workflows/agents-pr-health.yml
+#
+# Required secrets (inherited from org / repo):
+#   - WORKFLOWS_APP_ID / WORKFLOWS_APP_PRIVATE_KEY (preferred)
+#   - ACTIONS_BOT_PAT or SERVICE_BOT_PAT (fallback)
+# ---------------------------------------------------------------------------
+
+name: Agents PR Health
+
+on:
+  schedule:
+    # Every 6 hours — frequent enough to catch drift, rare enough to
+    # stay well within rate limits and agent cost budgets.
+    - cron: '0 */6 * * *'
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: 'Preview mode — scan and report without pushing or dispatching'
+        required: false
+        default: false
+        type: boolean
+      max_prs:
+        description: 'Maximum PRs to process'
+        required: false
+        default: 10
+        type: number
+
+permissions:
+  contents: write
+  pull-requests: write
+  actions: write
+
+concurrency:
+  group: pr-health-${{ github.repository }}
+  cancel-in-progress: true
+
+jobs:
+  health:
+    uses: stranske/Workflows/.github/workflows/reusable-agents-pr-health.yml@main
+    with:
+      dry_run: ${{ inputs.dry_run || false }}
+      max_prs: ${{ inputs.max_prs || 10 }}
+    secrets: inherit
diff --git a/scripts/langchain/integration_layer.py b/scripts/langchain/integration_layer.py
@@ -11,11 +11,9 @@
 from typing import Any
 
 try:
-    from scripts.langchain import label_matcher as _label_matcher
+    from scripts.langchain import label_matcher
 except ModuleNotFoundError:
-    import label_matcher as _label_matcher  # type: ignore[no-redef]
-
-label_matcher = _label_matcher
+    import label_matcher
 
 
 @dataclass
@@ -68,7 +66,7 @@ def _build_issue_text(issue: IssueData) -> str:
     return "\n\n".join(parts)
 
 
-def _build_label_store(labels: Iterable[Any]) -> Any | None:
+def _build_label_store(labels: Iterable[Any]) -> label_matcher.LabelVectorStore | None:
     label_records = _collect_label_records(labels)
     if not label_records:
         return None
@@ -86,15 +84,15 @@ def _build_label_store(labels: Iterable[Any]) -> Any | None:
     )
 
 
-def _collect_label_records(labels: Iterable[Any]) -> list[Any]:
+def _collect_label_records(labels: Iterable[Any]) -> list[label_matcher.LabelRecord]:
     if labels is None:
         raise ValueError("labels must be an iterable of label records, not None.")
     if isinstance(labels, (str, bytes)):
         raise ValueError("labels must be an iterable of label records, not a string.")
     if not isinstance(labels, Iterable):
         raise ValueError("labels must be an iterable of label records.")
 
-    records: list[Any] = []
+    records: list[label_matcher.LabelRecord] = []
     for index, item in enumerate(labels):
         record = _coerce_label_record(item)
         if record is not None:
@@ -108,7 +106,7 @@ def _collect_label_records(labels: Iterable[Any]) -> list[Any]:
     return records
 
 
-def _coerce_label_record(item: Any) -> Any | None:
+def _coerce_label_record(item: Any) -> label_matcher.LabelRecord | None:
     if isinstance(item, label_matcher.LabelRecord):
         return item
     if isinstance(item, (str, bytes)):
@@ -137,7 +135,7 @@ def _coerce_label_record(item: Any) -> Any | None:
 
 
 def _select_label_names(
-    matches: Sequence[Any],
+    matches: Sequence[label_matcher.LabelMatch],
     *,
     max_labels: int | None = None,
 ) -> list[str]:

diff --git a/scripts/langchain/issue_dedup.py b/scripts/langchain/issue_dedup.py
@@ -12,11 +12,9 @@
 from typing import Any
 
 try:
-    from scripts.langchain import semantic_matcher as _semantic_matcher
+    from scripts.langchain import semantic_matcher
 except ModuleNotFoundError:
-    import semantic_matcher as _semantic_matcher  # type: ignore[no-redef]
-
-semantic_matcher = _semantic_matcher
+    import semantic_matcher
 
 
 @dataclass(frozen=True)
@@ -92,7 +90,7 @@ def _issue_text(issue: IssueRecord) -> str:
 def build_issue_vector_store(
     issues: Iterable[Any],
     *,
-    client_info: Any | None = None,
+    client_info: semantic_matcher.EmbeddingClientInfo | None = None,
     model: str | None = None,
 ) -> IssueVectorStore | None:
     issue_records: list[IssueRecord] = []

diff --git a/scripts/langchain/issue_formatter.py b/scripts/langchain/issue_formatter.py
@@ -17,13 +17,9 @@
 from typing import Any
 
 try:
-    from scripts.langchain.injection_guard import check_prompt_injection as _check_prompt_injection
+    from scripts.langchain.injection_guard import check_prompt_injection
 except ImportError:  # pragma: no cover - fallback for direct invocation
-    from injection_guard import (
-        check_prompt_injection as _check_prompt_injection,  # type: ignore[no-redef]
-    )
-
-check_prompt_injection = _check_prompt_injection
+    from injection_guard import check_prompt_injection
 
 # Maximum issue body size to prevent OpenAI rate limit errors (30k TPM limit)
 # ~4 chars per token, so 50k chars ≈ 12.5k tokens, leaving headroom for prompt + output
@@ -394,15 +390,15 @@ def _validate_and_refine_tasks(formatted: str, *, use_llm: bool) -> tuple[str, s
         return formatted, None
 
     try:
-        from scripts.langchain import task_validator as _task_validator
+        from scripts.langchain import task_validator
     except ImportError:
         try:
-            import task_validator as _task_validator  # type: ignore[no-redef]
+            import task_validator
         except ImportError:
             return formatted, None
 
     # Run validation
-    result = _task_validator.validate_tasks(tasks, context=formatted, use_llm=use_llm)
+    result = task_validator.validate_tasks(tasks, context=formatted, use_llm=use_llm)
 
     # If no changes, return original
     if set(result.tasks) == set(tasks) and len(result.tasks) == len(tasks):
@@ -484,7 +480,7 @@ def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any
 
                 prompt = _load_prompt()
                 template = ChatPromptTemplate.from_template(prompt)
-                chain: Any = template | client  # type: ignore[operator]
+                chain = template | client
                 try:
                     response = chain.invoke({"issue_body": issue_body})
                 except Exception as e:
@@ -493,7 +489,7 @@ def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any
                         fallback_info = _get_llm_client(force_openai=True)
                         if fallback_info:
                             client, provider = fallback_info
-                            chain = template | client  # type: ignore[operator]
+                            chain = template | client
                             response = chain.invoke({"issue_body": issue_body})
                         else:
                             raise

diff --git a/scripts/langchain/label_matcher.py b/scripts/langchain/label_matcher.py
@@ -13,11 +13,9 @@
 from typing import Any
 
 try:
-    from scripts.langchain import semantic_matcher as _semantic_matcher
+    from scripts.langchain import semantic_matcher
 except ModuleNotFoundError:
-    import semantic_matcher as _semantic_matcher  # type: ignore[no-redef]
-
-semantic_matcher = _semantic_matcher
+    import semantic_matcher
 
 
 @dataclass(frozen=True)
@@ -255,7 +253,7 @@ def _label_text(label: LabelRecord) -> str:
 def build_label_vector_store(
     labels: Iterable[Any],
     *,
-    client_info: Any | None = None,
+    client_info: semantic_matcher.EmbeddingClientInfo | None = None,
     model: str | None = None,
 ) -> LabelVectorStore | None:
     label_records: list[LabelRecord] = []
@@ -439,9 +437,9 @@ def find_similar_labels(
         search_fn = store.similarity_search_with_score
         score_type = "distance"
     else:
-        keyword_hits = _keyword_matches(label_store.labels, query, threshold=threshold)
-        keyword_hits.sort(key=lambda match: match.score, reverse=True)
-        return keyword_hits
+        matches = _keyword_matches(label_store.labels, query, threshold=threshold)
+        matches.sort(key=lambda match: match.score, reverse=True)
+        return matches
 
     limit = k or DEFAULT_LABEL_SIMILARITY_K
     try:

diff --git a/scripts/langchain/semantic_matcher.py b/scripts/langchain/semantic_matcher.py
@@ -12,7 +12,6 @@
 import os
 from collections.abc import Iterable
 from dataclasses import dataclass
-from typing import Protocol
 
 from tools.embedding_provider import (
     EmbeddingProvider,
@@ -25,15 +24,9 @@
 DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
 
 
-class EmbeddingClient(Protocol):
-    def embed_documents(self, texts: list[str]) -> list[list[float]]: ...
-
-    def embed_query(self, text: str) -> list[float]: ...
-
-
 @dataclass
 class EmbeddingClientInfo:
-    client: EmbeddingClient
+    client: object
     provider: str
     model: str
     is_fallback: bool
@@ -137,7 +130,7 @@ def generate_embeddings(
     if resolved is None:
         return None
 
-    vectors = resolved.client.embed_documents(items)  # type: ignore[attr-defined]
+    vectors = resolved.client.embed_documents(items)
     dimensions = len(vectors[0]) if vectors else None
     return EmbeddingResult(
         vectors=vectors,