diff --git a/.github/scripts/agents-guard.js b/.github/scripts/agents-guard.js index 0fc2971e..4bc11a9a 100644 --- a/.github/scripts/agents-guard.js +++ b/.github/scripts/agents-guard.js @@ -447,7 +447,9 @@ function evaluateGuard({ const hasCodeownerApproval = hasExternalApproval || authorIsCodeowner; const hasProtectedChanges = modifiedProtectedPaths.size > 0; - // Allow label to bypass approval for automated PRs (dependabot, renovate) + // Security note: Allow `agents:allow-change` label to bypass CODEOWNER approval + // ONLY for automated dependency PRs from known bots (dependabot, renovate). + // Human PRs or other bot PRs still require CODEOWNER approval even with label. const isAutomatedPR = normalizedAuthor && (normalizedAuthor === 'dependabot[bot]' || normalizedAuthor === 'renovate[bot]'); const needsApproval = hasProtectedChanges && !hasCodeownerApproval && !(hasAllowLabel && isAutomatedPR); const needsLabel = hasProtectedChanges && !hasAllowLabel && !hasCodeownerApproval; diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js index cdce7012..10cef603 100644 --- a/.github/scripts/keepalive_loop.js +++ b/.github/scripts/keepalive_loop.js @@ -1088,8 +1088,8 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP let action = 'wait'; let reason = 'pending'; - const verificationStatus = normalise(state?.verification?.status); - const verificationDone = ['done', 'verified', 'complete'].includes(verificationStatus.toLowerCase()); + const verificationStatus = normalise(state?.verification?.status)?.toLowerCase(); + const verificationDone = ['done', 'verified', 'complete'].includes(verificationStatus); const needsVerification = allComplete && !verificationDone; if (!hasAgentLabel) { diff --git a/.github/scripts/keepalive_prompt_routing.js b/.github/scripts/keepalive_prompt_routing.js index 8b598433..b6d9c1ba 100644 --- a/.github/scripts/keepalive_prompt_routing.js +++ b/.github/scripts/keepalive_prompt_routing.js @@ -31,7 +31,7 @@ const FEATURE_SCENARIOS = new Set([ 'nexttask', ]); -const FIX_MODES = new Set(['fix', 'fix-ci', 'fix_ci', 'ci', 'ci-failure']); +const FIX_MODES = new Set(['fix', 'fix-ci', 'fix_ci', 'ci', 'ci-failure', 'ci_failure', 'fix-ci-failure']); const VERIFY_MODES = new Set(['verify', 'verification', 'verify-acceptance', 'acceptance']); function resolvePromptMode({ scenario, mode, action, reason } = {}) { diff --git a/docs/LABELS.md b/docs/LABELS.md index bd503fbd..87df350a 100644 --- a/docs/LABELS.md +++ b/docs/LABELS.md @@ -200,5 +200,5 @@ To add new label-triggered functionality: --- -*Last updated: December 25, 2025* +*Last updated: January 5, 2026* *Source of truth: Workflows repository* diff --git a/scripts/langchain/issue_formatter.py b/scripts/langchain/issue_formatter.py new file mode 100755 index 00000000..ed6256f4 --- /dev/null +++ b/scripts/langchain/issue_formatter.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python3 +""" +Format raw issue text into the AGENT_ISSUE_TEMPLATE structure. + +Run with: + python scripts/langchain/issue_formatter.py \ + --input-file issue.md --output-file formatted.md +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import sys +from pathlib import Path +from typing import Any + +ISSUE_FORMATTER_PROMPT = """ +You are a formatting assistant. Convert the raw GitHub issue body into the +AGENT_ISSUE_TEMPLATE format with the exact section headers in order: + +## Why +## Scope +## Non-Goals +## Tasks +## Acceptance Criteria +## Implementation Notes + +Rules: +- Use bullet points ONLY in Tasks and Acceptance Criteria. +- Every task/criterion must be specific, verifiable, and sized for ~10 minutes. +- Use unchecked checkboxes: "- [ ]". +- Preserve file paths and concrete details when mentioned. +- If a section lacks content, use "_Not provided._" (or "- [ ] _Not provided._" + for Tasks/Acceptance). +- Output ONLY the formatted markdown with these sections (no extra commentary). + +Raw issue body: +{issue_body} +""".strip() + +PROMPT_PATH = Path(__file__).resolve().parent / "prompts" / "format_issue.md" +FEEDBACK_PROMPT_PATH = Path(__file__).resolve().parent / "prompts" / "format_issue_feedback.md" + +SECTION_ALIASES = { + "why": ["why", "motivation", "summary", "goals"], + "scope": ["scope", "background", "context", "overview"], + "non_goals": ["non-goals", "nongoals", "out of scope", "constraints", "exclusions"], + "tasks": ["tasks", "task list", "tasklist", "todo", "to do", "implementation"], + "acceptance": [ + "acceptance criteria", + "acceptance", + "definition of done", + "done criteria", + "success criteria", + ], + "implementation": [ + "implementation notes", + "implementation note", + "notes", + "details", + "technical notes", + ], +} + +SECTION_TITLES = { + "why": "Why", + "scope": "Scope", + "non_goals": "Non-Goals", + "tasks": "Tasks", + "acceptance": "Acceptance Criteria", + "implementation": "Implementation Notes", +} + +LIST_ITEM_REGEX = re.compile(r"^(\s*)([-*+]|\d+[.)])\s+(.*)$") +CHECKBOX_REGEX = re.compile(r"^\[([ xX])\]\s*(.*)$") + + +def _load_prompt() -> str: + if PROMPT_PATH.is_file(): + base_prompt = PROMPT_PATH.read_text(encoding="utf-8").strip() + else: + base_prompt = ISSUE_FORMATTER_PROMPT + + if FEEDBACK_PROMPT_PATH.is_file(): + feedback = FEEDBACK_PROMPT_PATH.read_text(encoding="utf-8").strip() + if feedback: + return f"{base_prompt}\n\n{feedback}\n" + return base_prompt + + +def _get_llm_client() -> tuple[object, str] | None: + try: + from langchain_openai import ChatOpenAI + except ImportError: + return None + + github_token = os.environ.get("GITHUB_TOKEN") + openai_token = os.environ.get("OPENAI_API_KEY") + if not github_token and not openai_token: + return None + + from tools.llm_provider import DEFAULT_MODEL, GITHUB_MODELS_BASE_URL + + if github_token: + return ( + ChatOpenAI( + model=DEFAULT_MODEL, + base_url=GITHUB_MODELS_BASE_URL, + api_key=github_token, + temperature=0.1, + ), + "github-models", + ) + return ( + ChatOpenAI( + model=DEFAULT_MODEL, + api_key=openai_token, + temperature=0.1, + ), + "openai", + ) + + +def _normalize_heading(text: str) -> str: + cleaned = re.sub(r"[#*_:]+", " ", text).strip().lower() + cleaned = re.sub(r"\s+", " ", cleaned) + return cleaned + + +def _resolve_section(label: str) -> str | None: + normalized = _normalize_heading(label) + for key, aliases in SECTION_ALIASES.items(): + for alias in aliases: + if normalized == _normalize_heading(alias): + return key + return None + + +def _strip_list_marker(line: str) -> str: + match = LIST_ITEM_REGEX.match(line) + if not match: + return line + return match.group(3).strip() + + +def _normalize_non_action_lines(lines: list[str]) -> list[str]: + cleaned: list[str] = [] + in_fence = False + for raw in lines: + stripped = raw.strip() + if stripped.startswith("```"): + in_fence = not in_fence + cleaned.append(raw) + continue + if in_fence: + cleaned.append(raw) + continue + if not stripped: + cleaned.append("") + continue + cleaned.append(_strip_list_marker(raw)) + return cleaned + + +def _normalize_checklist_lines(lines: list[str]) -> list[str]: + cleaned: list[str] = [] + in_fence = False + for raw in lines: + stripped = raw.strip() + if stripped.startswith("```"): + in_fence = not in_fence + cleaned.append(raw) + continue + if in_fence: + cleaned.append(raw) + continue + if not stripped: + continue + match = LIST_ITEM_REGEX.match(raw) + if match: + indent, _, remainder = match.groups() + checkbox = CHECKBOX_REGEX.match(remainder.strip()) + if checkbox: + mark = "x" if checkbox.group(1).lower() == "x" else " " + text = checkbox.group(2).strip() + if text: + cleaned.append(f"{indent}- [{mark}] {text}") + continue + cleaned.append(f"{indent}- [ ] {remainder.strip()}") + else: + cleaned.append(f"- [ ] {stripped}") + return cleaned + + +def _parse_sections(body: str) -> tuple[dict[str, list[str]], list[str]]: + sections: dict[str, list[str]] = {key: [] for key in SECTION_TITLES} + preamble: list[str] = [] + current: str | None = None + for line in body.splitlines(): + heading_match = re.match(r"^\s*#{1,6}\s+(.*)$", line) + if heading_match: + section_key = _resolve_section(heading_match.group(1)) + if section_key: + current = section_key + continue + if re.match(r"^\s*(?:\*\*|__)(.+?)(?:\*\*|__)\s*:?\s*$", line): + inner = re.sub(r"^\s*(?:\*\*|__)(.+?)(?:\*\*|__)\s*:?\s*$", r"\1", line) + section_key = _resolve_section(inner) + if section_key: + current = section_key + continue + if re.match(r"^\s*[A-Za-z][A-Za-z0-9\s-]{2,}:\s*$", line): + label = line.split(":", 1)[0] + section_key = _resolve_section(label) + if section_key: + current = section_key + continue + if current: + sections[current].append(line) + else: + preamble.append(line) + return sections, preamble + + +def _format_issue_fallback(issue_body: str) -> str: + body = issue_body.strip() + sections, preamble = _parse_sections(body) + + if preamble and not sections["scope"]: + sections["scope"] = preamble + + why_lines = _normalize_non_action_lines(sections["why"]) + scope_lines = _normalize_non_action_lines(sections["scope"]) + non_goals_lines = _normalize_non_action_lines(sections["non_goals"]) + impl_lines = _normalize_non_action_lines(sections["implementation"]) + + tasks_lines = _normalize_checklist_lines(sections["tasks"]) + acceptance_lines = _normalize_checklist_lines(sections["acceptance"]) + + def join_or_placeholder(lines: list[str], placeholder: str) -> str: + content = "\n".join(line for line in lines).strip() + return content if content else placeholder + + why_text = join_or_placeholder(why_lines, "_Not provided._") + scope_text = join_or_placeholder(scope_lines, "_Not provided._") + non_goals_text = join_or_placeholder(non_goals_lines, "_Not provided._") + impl_text = join_or_placeholder(impl_lines, "_Not provided._") + tasks_text = join_or_placeholder(tasks_lines, "- [ ] _Not provided._") + acceptance_text = join_or_placeholder(acceptance_lines, "- [ ] _Not provided._") + + parts = [ + "## Why", + "", + why_text, + "", + "## Scope", + "", + scope_text, + "", + "## Non-Goals", + "", + non_goals_text, + "", + "## Tasks", + "", + tasks_text, + "", + "## Acceptance Criteria", + "", + acceptance_text, + "", + "## Implementation Notes", + "", + impl_text, + ] + return "\n".join(parts).strip() + + +def _formatted_output_valid(text: str) -> bool: + if not text: + return False + required = ["## Tasks", "## Acceptance Criteria"] + return all(section in text for section in required) + + +def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any]: + if not issue_body: + issue_body = "" + + if use_llm: + client_info = _get_llm_client() + if client_info: + client, provider = client_info + try: + from langchain_core.prompts import ChatPromptTemplate + except ImportError: + pass + else: + prompt = _load_prompt() + template = ChatPromptTemplate.from_template(prompt) + chain = template | client + response = chain.invoke({"issue_body": issue_body}) + content = getattr(response, "content", None) or str(response) + formatted = content.strip() + if _formatted_output_valid(formatted): + return { + "formatted_body": formatted, + "provider_used": provider, + "used_llm": True, + } + + formatted = _format_issue_fallback(issue_body) + return { + "formatted_body": formatted, + "provider_used": None, + "used_llm": False, + } + + +def build_label_transition() -> dict[str, list[str]]: + return { + "add": ["agents:formatted"], + "remove": ["agents:format"], + } + + +def _load_input(args: argparse.Namespace) -> str: + if args.input_file: + return Path(args.input_file).read_text(encoding="utf-8") + if args.input_text: + return args.input_text + return sys.stdin.read() + + +def main() -> None: + parser = argparse.ArgumentParser(description="Format issues into AGENT_ISSUE_TEMPLATE.") + parser.add_argument("--input-file", help="Path to raw issue text.") + parser.add_argument("--input-text", help="Raw issue text (inline).") + parser.add_argument("--output-file", help="Path to write formatted output.") + parser.add_argument("--json", action="store_true", help="Emit JSON payload to stdout.") + parser.add_argument("--no-llm", action="store_true", help="Disable LLM usage.") + args = parser.parse_args() + + raw = _load_input(args) + result = format_issue_body(raw, use_llm=not args.no_llm) + + if args.output_file: + Path(args.output_file).write_text(result["formatted_body"], encoding="utf-8") + + if args.json: + payload = { + "formatted_body": result["formatted_body"], + "provider_used": result.get("provider_used"), + "used_llm": result.get("used_llm", False), + "labels": build_label_transition(), + } + print(json.dumps(payload, ensure_ascii=True)) + else: + print(result["formatted_body"]) + + +if __name__ == "__main__": + main() diff --git a/scripts/langchain/prompts/format_issue.md b/scripts/langchain/prompts/format_issue.md new file mode 100644 index 00000000..1610c14b --- /dev/null +++ b/scripts/langchain/prompts/format_issue.md @@ -0,0 +1,20 @@ +You are a formatting assistant. Convert the raw GitHub issue body into the +AGENT_ISSUE_TEMPLATE format with the exact section headers in order: + +## Why +## Scope +## Non-Goals +## Tasks +## Acceptance Criteria +## Implementation Notes + +Rules: +- Use bullet points ONLY in Tasks and Acceptance Criteria. +- Every task/criterion must be specific, verifiable, and sized for ~10 minutes. +- Use unchecked checkboxes: "- [ ]". +- Preserve file paths and concrete details when mentioned. +- If a section lacks content, use "_Not provided._" (or "- [ ] _Not provided._" for Tasks/Acceptance). +- Output ONLY the formatted markdown with these sections (no extra commentary). + +Raw issue body: +{issue_body} diff --git a/tools/llm_provider.py b/tools/llm_provider.py new file mode 100644 index 00000000..1ab626d3 --- /dev/null +++ b/tools/llm_provider.py @@ -0,0 +1,597 @@ +""" +LLM Provider Abstraction with Fallback Chain + +Provides a unified interface for LLM calls with automatic fallback: +1. GitHub Models API (primary) - uses GITHUB_TOKEN +2. OpenAI API (fallback) - uses OPENAI_API_KEY +3. Regex patterns (last resort) - no API calls + +Usage: + from tools.llm_provider import get_llm_provider, LLMProvider + + provider = get_llm_provider() + result = provider.analyze_completion(session_text, tasks) +""" + +from __future__ import annotations + +import json +import logging +import os +from abc import ABC, abstractmethod +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +# GitHub Models API endpoint (OpenAI-compatible) +GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com" +DEFAULT_MODEL = "gpt-4o-mini" + + +@dataclass +class CompletionAnalysis: + """Result of task completion analysis.""" + + completed_tasks: list[str] # Task descriptions marked complete + in_progress_tasks: list[str] # Tasks currently being worked on + blocked_tasks: list[str] # Tasks that are blocked + confidence: float # 0.0 to 1.0 + reasoning: str # Explanation of the analysis + provider_used: str # Which provider generated this + + # Quality metrics for BS detection + raw_confidence: float | None = None # Original confidence before adjustment + confidence_adjusted: bool = False # Whether confidence was adjusted + quality_warnings: list[str] | None = None # Warnings about analysis quality + + +@dataclass +class SessionQualityContext: + """Context about session quality for validating LLM responses.""" + + has_agent_messages: bool = False + has_work_evidence: bool = False + file_change_count: int = 0 + successful_command_count: int = 0 + estimated_effort_score: int = 0 + data_quality: str = "unknown" # high, medium, low, minimal + analysis_text_length: int = 0 + + +class LLMProvider(ABC): + """Abstract base class for LLM providers.""" + + @property + @abstractmethod + def name(self) -> str: + """Provider name for logging.""" + pass + + @abstractmethod + def is_available(self) -> bool: + """Check if this provider can be used.""" + pass + + @abstractmethod + def analyze_completion( + self, + session_output: str, + tasks: list[str], + context: str | None = None, + ) -> CompletionAnalysis: + """ + Analyze session output to determine task completion status. + + Args: + session_output: Codex session output (summary or JSONL events) + tasks: List of task descriptions from PR checkboxes + context: Optional additional context (PR description, etc.) + + Returns: + CompletionAnalysis with task status breakdown + """ + pass + + +class GitHubModelsProvider(LLMProvider): + """LLM provider using GitHub Models API (OpenAI-compatible).""" + + @property + def name(self) -> str: + return "github-models" + + def is_available(self) -> bool: + return bool(os.environ.get("GITHUB_TOKEN")) + + def _get_client(self): + """Get LangChain ChatOpenAI client configured for GitHub Models.""" + try: + from langchain_openai import ChatOpenAI + except ImportError: + logger.warning("langchain_openai not installed") + return None + + return ChatOpenAI( + model=DEFAULT_MODEL, + base_url=GITHUB_MODELS_BASE_URL, + api_key=os.environ.get("GITHUB_TOKEN"), + temperature=0.1, # Low temperature for consistent analysis + ) + + def analyze_completion( + self, + session_output: str, + tasks: list[str], + context: str | None = None, + quality_context: SessionQualityContext | None = None, + ) -> CompletionAnalysis: + client = self._get_client() + if not client: + raise RuntimeError("LangChain OpenAI not available") + + prompt = self._build_analysis_prompt(session_output, tasks, context) + + try: + response = client.invoke(prompt) + return self._parse_response(response.content, tasks, quality_context) + except Exception as e: + logger.error(f"GitHub Models API error: {e}") + raise + + def _validate_confidence( + self, + raw_confidence: float, + completed_count: int, + in_progress_count: int, + quality_context: SessionQualityContext | None, + reasoning: str, + ) -> tuple[float, list[str]]: + """ + BS Detector: Validate and potentially adjust LLM confidence. + + This catches cases where the LLM reports high confidence but the + analysis is inconsistent with the session evidence. + + Args: + raw_confidence: The confidence reported by the LLM + completed_count: Number of tasks marked as completed + in_progress_count: Number of tasks marked as in progress + quality_context: Session quality metrics (if available) + reasoning: The LLM's reasoning text + + Returns: + Tuple of (adjusted_confidence, list of warnings) + """ + warnings = [] + confidence = raw_confidence + + # Sanity check: Confidence should be between 0 and 1 + confidence = max(0.0, min(1.0, confidence)) + + if quality_context is None: + # No context available, trust LLM but note it + return confidence, [] + + # BS Detection Rule 1: High confidence + zero work + evidence of work = suspicious + if ( + raw_confidence > 0.7 + and completed_count == 0 + and in_progress_count == 0 + and quality_context.has_work_evidence + ): + warnings.append( + f"High confidence ({raw_confidence:.0%}) but no tasks detected " + f"despite {quality_context.file_change_count} file changes and " + f"{quality_context.successful_command_count} successful commands" + ) + # Reduce confidence significantly - the LLM might have had insufficient data + confidence = min(confidence, 0.3) + logger.warning(f"BS detected: {warnings[-1]}") + + # BS Detection Rule 2: Very short analysis text = likely data loss + if quality_context.analysis_text_length < 200: + warnings.append( + f"Analysis text suspiciously short " + f"({quality_context.analysis_text_length} chars) - " + "possible data loss in pipeline" + ) + # Short text means limited evidence - cap confidence + confidence = min(confidence, 0.4) + logger.warning(f"Short analysis text: {quality_context.analysis_text_length} chars") + + # BS Detection Rule 3: Zero tasks + high effort score = something's wrong + if ( + quality_context.estimated_effort_score > 30 + and completed_count == 0 + and in_progress_count == 0 + ): + warnings.append( + f"Effort score ({quality_context.estimated_effort_score}) suggests work was done " + "but no tasks detected" + ) + confidence = min(confidence, 0.4) + + # BS Detection Rule 4: Reasoning mentions "no evidence" but there's evidence + no_evidence_phrases = ["no evidence", "no work", "nothing done", "no specific"] + reasoning_lower = reasoning.lower() + if ( + any(phrase in reasoning_lower for phrase in no_evidence_phrases) + and quality_context.has_work_evidence + ): + warnings.append("LLM claims 'no evidence' but session has file changes/commands") + confidence = min(confidence, 0.35) + + # BS Detection Rule 5: Data quality impacts confidence ceiling + quality_caps = { + "high": 1.0, + "medium": 0.8, + "low": 0.6, + "minimal": 0.4, + } + quality_cap = quality_caps.get(quality_context.data_quality, 0.5) + if confidence > quality_cap: + warnings.append( + f"Confidence capped from {raw_confidence:.0%} to {quality_cap:.0%} " + f"due to {quality_context.data_quality} data quality" + ) + confidence = quality_cap + + return confidence, warnings + + def _build_analysis_prompt( + self, + session_output: str, + tasks: list[str], + context: str | None = None, + ) -> str: + task_list = "\n".join(f"- [ ] {task}" for task in tasks) + + return f"""Analyze this Codex session output and determine which tasks have been completed. + +## Tasks to Track +{task_list} + +## Session Output +{session_output[:8000]} # Truncate to avoid token limits + +## Instructions +For each task, determine if it was: +- COMPLETED: Clear evidence the task was finished +- IN_PROGRESS: Work started but not finished +- BLOCKED: Cannot proceed due to an issue +- NOT_STARTED: No evidence of work on this task + +IMPORTANT: Base your analysis on CONCRETE EVIDENCE such as: +- File modifications (files being created/edited) +- Successful test runs +- Command outputs showing completed work +- Direct statements of completion + +If the session output is very short or lacks detail, lower your confidence accordingly. + +Respond in JSON format: +{{ + "completed": ["task description 1", ...], + "in_progress": ["task description 2", ...], + "blocked": ["task description 3", ...], + "confidence": 0.85, + "reasoning": "Brief explanation of your analysis with specific evidence cited" +}} + +Only include tasks in completed/in_progress/blocked if you have evidence. +Be conservative - if unsure, don't mark as completed.""" + + def _parse_response( + self, + content: str, + tasks: list[str], + quality_context: SessionQualityContext | None = None, + ) -> CompletionAnalysis: + """Parse LLM response into CompletionAnalysis with BS detection.""" + try: + # Try to extract JSON from response + json_start = content.find("{") + json_end = content.rfind("}") + 1 + if json_start >= 0 and json_end > json_start: + data = json.loads(content[json_start:json_end]) + else: + raise ValueError("No JSON found in response") + + raw_confidence = float(data.get("confidence", 0.5)) + completed = data.get("completed", []) + in_progress = data.get("in_progress", []) + reasoning = data.get("reasoning", "") + + # Apply BS detection to validate/adjust confidence + adjusted_confidence, warnings = self._validate_confidence( + raw_confidence=raw_confidence, + completed_count=len(completed), + in_progress_count=len(in_progress), + quality_context=quality_context, + reasoning=reasoning, + ) + + return CompletionAnalysis( + completed_tasks=completed, + in_progress_tasks=in_progress, + blocked_tasks=data.get("blocked", []), + confidence=adjusted_confidence, + reasoning=reasoning, + provider_used=self.name, + raw_confidence=raw_confidence if adjusted_confidence != raw_confidence else None, + confidence_adjusted=adjusted_confidence != raw_confidence, + quality_warnings=warnings if warnings else None, + ) + except (json.JSONDecodeError, ValueError) as e: + logger.warning(f"Failed to parse LLM response: {e}") + # Return empty analysis on parse failure + return CompletionAnalysis( + completed_tasks=[], + in_progress_tasks=[], + blocked_tasks=[], + confidence=0.0, + reasoning=f"Failed to parse response: {e}", + provider_used=self.name, + ) + + +class OpenAIProvider(LLMProvider): + """LLM provider using OpenAI API directly.""" + + @property + def name(self) -> str: + return "openai" + + def is_available(self) -> bool: + return bool(os.environ.get("OPENAI_API_KEY")) + + def _get_client(self): + """Get LangChain ChatOpenAI client.""" + try: + from langchain_openai import ChatOpenAI + except ImportError: + logger.warning("langchain_openai not installed") + return None + + return ChatOpenAI( + model=DEFAULT_MODEL, + api_key=os.environ.get("OPENAI_API_KEY"), + temperature=0.1, + ) + + def analyze_completion( + self, + session_output: str, + tasks: list[str], + context: str | None = None, + ) -> CompletionAnalysis: + client = self._get_client() + if not client: + raise RuntimeError("LangChain OpenAI not available") + + # Reuse the same prompt building logic + github_provider = GitHubModelsProvider() + prompt = github_provider._build_analysis_prompt(session_output, tasks, context) + + try: + response = client.invoke(prompt) + result = github_provider._parse_response(response.content, tasks) + # Override provider name + return CompletionAnalysis( + completed_tasks=result.completed_tasks, + in_progress_tasks=result.in_progress_tasks, + blocked_tasks=result.blocked_tasks, + confidence=result.confidence, + reasoning=result.reasoning, + provider_used=self.name, + ) + except Exception as e: + logger.error(f"OpenAI API error: {e}") + raise + + +class RegexFallbackProvider(LLMProvider): + """Fallback provider using regex pattern matching (no API calls).""" + + # Patterns indicating task completion + COMPLETION_PATTERNS = [ + r"(?:completed?|finished|done|implemented|fixed|resolved)\s+(?:the\s+)?(.+?)(?:\.|$)", + r"✓\s+(.+?)(?:\.|$)", + r"\[x\]\s+(.+?)(?:\.|$)", + r"successfully\s+(?:completed?|implemented|fixed)\s+(.+?)(?:\.|$)", + ] + + # Patterns indicating work in progress + PROGRESS_PATTERNS = [ + r"(?:working on|started|beginning|implementing)\s+(.+?)(?:\.|$)", + r"(?:in progress|ongoing):\s*(.+?)(?:\.|$)", + ] + + # Patterns indicating blockers + BLOCKER_PATTERNS = [ + r"(?:blocked|stuck|cannot|failed|error)\s+(?:on\s+)?(.+?)(?:\.|$)", + r"(?:issue|problem|bug)\s+(?:with\s+)?(.+?)(?:\.|$)", + ] + + @property + def name(self) -> str: + return "regex-fallback" + + def is_available(self) -> bool: + return True # Always available + + def analyze_completion( + self, + session_output: str, + tasks: list[str], + context: str | None = None, + ) -> CompletionAnalysis: + + output_lower = session_output.lower() + completed = [] + in_progress = [] + blocked = [] + + for task in tasks: + task_lower = task.lower() + # Simple keyword matching + task_words = set(task_lower.split()) + + # Check for completion signals + is_completed = any( + word in output_lower + and any( + p in output_lower + for p in ["completed", "finished", "done", "fixed", "✓", "[x]"] + ) + for word in task_words + if len(word) > 3 + ) + + # Check for progress signals + is_in_progress = any( + word in output_lower + and any( + p in output_lower + for p in ["working on", "started", "implementing", "in progress"] + ) + for word in task_words + if len(word) > 3 + ) + + # Check for blocker signals + is_blocked = any( + word in output_lower + and any( + p in output_lower for p in ["blocked", "stuck", "failed", "error", "cannot"] + ) + for word in task_words + if len(word) > 3 + ) + + if is_completed: + completed.append(task) + elif is_blocked: + blocked.append(task) + elif is_in_progress: + in_progress.append(task) + + return CompletionAnalysis( + completed_tasks=completed, + in_progress_tasks=in_progress, + blocked_tasks=blocked, + confidence=0.3, # Low confidence for regex + reasoning="Pattern-based analysis (no LLM available)", + provider_used=self.name, + ) + + +class FallbackChainProvider(LLMProvider): + """Provider that tries multiple providers in sequence.""" + + def __init__(self, providers: list[LLMProvider]): + self._providers = providers + self._active_provider: LLMProvider | None = None + + @property + def name(self) -> str: + if self._active_provider: + return f"fallback-chain({self._active_provider.name})" + return "fallback-chain" + + def is_available(self) -> bool: + return any(p.is_available() for p in self._providers) + + def analyze_completion( + self, + session_output: str, + tasks: list[str], + context: str | None = None, + ) -> CompletionAnalysis: + last_error = None + + for provider in self._providers: + if not provider.is_available(): + logger.debug(f"Provider {provider.name} not available, skipping") + continue + + try: + logger.info(f"Attempting analysis with {provider.name}") + self._active_provider = provider + result = provider.analyze_completion(session_output, tasks, context) + logger.info(f"Successfully analyzed with {provider.name}") + return result + except Exception as e: + logger.warning(f"Provider {provider.name} failed: {e}") + last_error = e + continue + + if last_error: + raise RuntimeError(f"All providers failed. Last error: {last_error}") + raise RuntimeError("No providers available") + + +def get_llm_provider(force_provider: str | None = None) -> LLMProvider: + """ + Get the best available LLM provider with fallback chain. + + Args: + force_provider: If set, use only this provider (for testing). + Options: "github-models", "openai", "regex-fallback" + + Returns a FallbackChainProvider that tries: + 1. GitHub Models API (if GITHUB_TOKEN set) + 2. OpenAI API (if OPENAI_API_KEY set) + 3. Regex fallback (always available) + """ + # Force a specific provider for testing + if force_provider: + provider_map = { + "github-models": GitHubModelsProvider, + "openai": OpenAIProvider, + "regex-fallback": RegexFallbackProvider, + } + if force_provider not in provider_map: + raise ValueError( + f"Unknown provider: {force_provider}. " f"Options: {list(provider_map.keys())}" + ) + provider = provider_map[force_provider]() + if not provider.is_available(): + raise RuntimeError( + f"Forced provider '{force_provider}' is not available. " + "Check required environment variables." + ) + logger.info(f"Using forced provider: {force_provider}") + return provider + + providers = [ + GitHubModelsProvider(), + OpenAIProvider(), + RegexFallbackProvider(), + ] + + return FallbackChainProvider(providers) + + +def check_providers() -> dict[str, bool]: + """Check which providers are available.""" + return { + "github-models": GitHubModelsProvider().is_available(), + "openai": OpenAIProvider().is_available(), + "regex-fallback": True, + } + + +if __name__ == "__main__": + import sys + + # Quick test - log to stderr + logging.basicConfig(level=logging.INFO, stream=sys.stderr) + + print("Provider availability:") + for name, available in check_providers().items(): + status = "✓" if available else "✗" + print(f" {status} {name}") + + provider = get_llm_provider() + print(f"\nActive provider chain: {provider.name}")