diff --git a/.github/workflows/agents-pr-health.yml b/.github/workflows/agents-pr-health.yml new file mode 100644 index 00000000..c2f0c6c2 --- /dev/null +++ b/.github/workflows/agents-pr-health.yml @@ -0,0 +1,50 @@ +# --------------------------------------------------------------------------- +# Agents PR Health — Consumer Repo Thin Wrapper +# --------------------------------------------------------------------------- +# Periodically scans open PRs for merge conflicts and failing checks, +# then routes remediation to the coding agent that most recently worked +# on each PR. +# +# Copy this file to: .github/workflows/agents-pr-health.yml +# +# Required secrets (inherited from org / repo): +# - WORKFLOWS_APP_ID / WORKFLOWS_APP_PRIVATE_KEY (preferred) +# - ACTIONS_BOT_PAT or SERVICE_BOT_PAT (fallback) +# --------------------------------------------------------------------------- + +name: Agents PR Health + +on: + schedule: + # Every 6 hours — frequent enough to catch drift, rare enough to + # stay well within rate limits and agent cost budgets. + - cron: '0 */6 * * *' + workflow_dispatch: + inputs: + dry_run: + description: 'Preview mode — scan and report without pushing or dispatching' + required: false + default: false + type: boolean + max_prs: + description: 'Maximum PRs to process' + required: false + default: 10 + type: number + +permissions: + contents: write + pull-requests: write + actions: write + +concurrency: + group: pr-health-${{ github.repository }} + cancel-in-progress: true + +jobs: + health: + uses: stranske/Workflows/.github/workflows/reusable-agents-pr-health.yml@main + with: + dry_run: ${{ inputs.dry_run || false }} + max_prs: ${{ inputs.max_prs || 10 }} + secrets: inherit diff --git a/scripts/langchain/integration_layer.py b/scripts/langchain/integration_layer.py index 9720c17c..839ed8f3 100755 --- a/scripts/langchain/integration_layer.py +++ b/scripts/langchain/integration_layer.py @@ -11,11 +11,9 @@ from typing import Any try: - from scripts.langchain import label_matcher as _label_matcher + from scripts.langchain import label_matcher except ModuleNotFoundError: - import label_matcher as _label_matcher # type: ignore[no-redef] - -label_matcher = _label_matcher + import label_matcher @dataclass @@ -68,7 +66,7 @@ def _build_issue_text(issue: IssueData) -> str: return "\n\n".join(parts) -def _build_label_store(labels: Iterable[Any]) -> Any | None: +def _build_label_store(labels: Iterable[Any]) -> label_matcher.LabelVectorStore | None: label_records = _collect_label_records(labels) if not label_records: return None @@ -86,7 +84,7 @@ def _build_label_store(labels: Iterable[Any]) -> Any | None: ) -def _collect_label_records(labels: Iterable[Any]) -> list[Any]: +def _collect_label_records(labels: Iterable[Any]) -> list[label_matcher.LabelRecord]: if labels is None: raise ValueError("labels must be an iterable of label records, not None.") if isinstance(labels, (str, bytes)): @@ -94,7 +92,7 @@ def _collect_label_records(labels: Iterable[Any]) -> list[Any]: if not isinstance(labels, Iterable): raise ValueError("labels must be an iterable of label records.") - records: list[Any] = [] + records: list[label_matcher.LabelRecord] = [] for index, item in enumerate(labels): record = _coerce_label_record(item) if record is not None: @@ -108,7 +106,7 @@ def _collect_label_records(labels: Iterable[Any]) -> list[Any]: return records -def _coerce_label_record(item: Any) -> Any | None: +def _coerce_label_record(item: Any) -> label_matcher.LabelRecord | None: if isinstance(item, label_matcher.LabelRecord): return item if isinstance(item, (str, bytes)): @@ -137,7 +135,7 @@ def _coerce_label_record(item: Any) -> Any | None: def _select_label_names( - matches: Sequence[Any], + matches: Sequence[label_matcher.LabelMatch], *, max_labels: int | None = None, ) -> list[str]: diff --git a/scripts/langchain/issue_dedup.py b/scripts/langchain/issue_dedup.py index afa3f9a3..44bf8639 100755 --- a/scripts/langchain/issue_dedup.py +++ b/scripts/langchain/issue_dedup.py @@ -12,11 +12,9 @@ from typing import Any try: - from scripts.langchain import semantic_matcher as _semantic_matcher + from scripts.langchain import semantic_matcher except ModuleNotFoundError: - import semantic_matcher as _semantic_matcher # type: ignore[no-redef] - -semantic_matcher = _semantic_matcher + import semantic_matcher @dataclass(frozen=True) @@ -92,7 +90,7 @@ def _issue_text(issue: IssueRecord) -> str: def build_issue_vector_store( issues: Iterable[Any], *, - client_info: Any | None = None, + client_info: semantic_matcher.EmbeddingClientInfo | None = None, model: str | None = None, ) -> IssueVectorStore | None: issue_records: list[IssueRecord] = [] diff --git a/scripts/langchain/issue_formatter.py b/scripts/langchain/issue_formatter.py index 48736cdd..71fe6163 100755 --- a/scripts/langchain/issue_formatter.py +++ b/scripts/langchain/issue_formatter.py @@ -17,13 +17,9 @@ from typing import Any try: - from scripts.langchain.injection_guard import check_prompt_injection as _check_prompt_injection + from scripts.langchain.injection_guard import check_prompt_injection except ImportError: # pragma: no cover - fallback for direct invocation - from injection_guard import ( - check_prompt_injection as _check_prompt_injection, # type: ignore[no-redef] - ) - -check_prompt_injection = _check_prompt_injection + from injection_guard import check_prompt_injection # Maximum issue body size to prevent OpenAI rate limit errors (30k TPM limit) # ~4 chars per token, so 50k chars ≈ 12.5k tokens, leaving headroom for prompt + output @@ -394,15 +390,15 @@ def _validate_and_refine_tasks(formatted: str, *, use_llm: bool) -> tuple[str, s return formatted, None try: - from scripts.langchain import task_validator as _task_validator + from scripts.langchain import task_validator except ImportError: try: - import task_validator as _task_validator # type: ignore[no-redef] + import task_validator except ImportError: return formatted, None # Run validation - result = _task_validator.validate_tasks(tasks, context=formatted, use_llm=use_llm) + result = task_validator.validate_tasks(tasks, context=formatted, use_llm=use_llm) # If no changes, return original if set(result.tasks) == set(tasks) and len(result.tasks) == len(tasks): @@ -484,7 +480,7 @@ def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any prompt = _load_prompt() template = ChatPromptTemplate.from_template(prompt) - chain: Any = template | client # type: ignore[operator] + chain = template | client try: response = chain.invoke({"issue_body": issue_body}) except Exception as e: @@ -493,7 +489,7 @@ def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any fallback_info = _get_llm_client(force_openai=True) if fallback_info: client, provider = fallback_info - chain = template | client # type: ignore[operator] + chain = template | client response = chain.invoke({"issue_body": issue_body}) else: raise diff --git a/scripts/langchain/label_matcher.py b/scripts/langchain/label_matcher.py index b03a52c0..2d02b744 100755 --- a/scripts/langchain/label_matcher.py +++ b/scripts/langchain/label_matcher.py @@ -13,11 +13,9 @@ from typing import Any try: - from scripts.langchain import semantic_matcher as _semantic_matcher + from scripts.langchain import semantic_matcher except ModuleNotFoundError: - import semantic_matcher as _semantic_matcher # type: ignore[no-redef] - -semantic_matcher = _semantic_matcher + import semantic_matcher @dataclass(frozen=True) @@ -255,7 +253,7 @@ def _label_text(label: LabelRecord) -> str: def build_label_vector_store( labels: Iterable[Any], *, - client_info: Any | None = None, + client_info: semantic_matcher.EmbeddingClientInfo | None = None, model: str | None = None, ) -> LabelVectorStore | None: label_records: list[LabelRecord] = [] @@ -439,9 +437,9 @@ def find_similar_labels( search_fn = store.similarity_search_with_score score_type = "distance" else: - keyword_hits = _keyword_matches(label_store.labels, query, threshold=threshold) - keyword_hits.sort(key=lambda match: match.score, reverse=True) - return keyword_hits + matches = _keyword_matches(label_store.labels, query, threshold=threshold) + matches.sort(key=lambda match: match.score, reverse=True) + return matches limit = k or DEFAULT_LABEL_SIMILARITY_K try: diff --git a/scripts/langchain/semantic_matcher.py b/scripts/langchain/semantic_matcher.py index 613e2136..71585062 100755 --- a/scripts/langchain/semantic_matcher.py +++ b/scripts/langchain/semantic_matcher.py @@ -12,7 +12,6 @@ import os from collections.abc import Iterable from dataclasses import dataclass -from typing import Protocol from tools.embedding_provider import ( EmbeddingProvider, @@ -25,15 +24,9 @@ DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small" -class EmbeddingClient(Protocol): - def embed_documents(self, texts: list[str]) -> list[list[float]]: ... - - def embed_query(self, text: str) -> list[float]: ... - - @dataclass class EmbeddingClientInfo: - client: EmbeddingClient + client: object provider: str model: str is_fallback: bool @@ -137,7 +130,7 @@ def generate_embeddings( if resolved is None: return None - vectors = resolved.client.embed_documents(items) # type: ignore[attr-defined] + vectors = resolved.client.embed_documents(items) dimensions = len(vectors[0]) if vectors else None return EmbeddingResult( vectors=vectors,