diff --git a/.github/scripts/agents-guard.js b/.github/scripts/agents-guard.js
index 0fc2971e..4bc11a9a 100644
--- a/.github/scripts/agents-guard.js
+++ b/.github/scripts/agents-guard.js
@@ -447,7 +447,9 @@ function evaluateGuard({
   const hasCodeownerApproval = hasExternalApproval || authorIsCodeowner;
 
   const hasProtectedChanges = modifiedProtectedPaths.size > 0;
-  // Allow label to bypass approval for automated PRs (dependabot, renovate)
+  // Security note: Allow `agents:allow-change` label to bypass CODEOWNER approval
+  // ONLY for automated dependency PRs from known bots (dependabot, renovate).
+  // Human PRs or other bot PRs still require CODEOWNER approval even with label.
   const isAutomatedPR = normalizedAuthor && (normalizedAuthor === 'dependabot[bot]' || normalizedAuthor === 'renovate[bot]');
   const needsApproval = hasProtectedChanges && !hasCodeownerApproval && !(hasAllowLabel && isAutomatedPR);
   const needsLabel = hasProtectedChanges && !hasAllowLabel && !hasCodeownerApproval;
diff --git a/.github/scripts/keepalive_loop.js b/.github/scripts/keepalive_loop.js
index cdce7012..10cef603 100644
--- a/.github/scripts/keepalive_loop.js
+++ b/.github/scripts/keepalive_loop.js
@@ -1088,8 +1088,8 @@ async function evaluateKeepaliveLoop({ github, context, core, payload: overrideP
 
   let action = 'wait';
   let reason = 'pending';
-  const verificationStatus = normalise(state?.verification?.status);
-  const verificationDone = ['done', 'verified', 'complete'].includes(verificationStatus.toLowerCase());
+  const verificationStatus = normalise(state?.verification?.status)?.toLowerCase();
+  const verificationDone = ['done', 'verified', 'complete'].includes(verificationStatus);
   const needsVerification = allComplete && !verificationDone;
 
   if (!hasAgentLabel) {
diff --git a/.github/scripts/keepalive_prompt_routing.js b/.github/scripts/keepalive_prompt_routing.js
index 8b598433..b6d9c1ba 100644
--- a/.github/scripts/keepalive_prompt_routing.js
+++ b/.github/scripts/keepalive_prompt_routing.js
@@ -31,7 +31,7 @@ const FEATURE_SCENARIOS = new Set([
   'nexttask',
 ]);
 
-const FIX_MODES = new Set(['fix', 'fix-ci', 'fix_ci', 'ci', 'ci-failure']);
+const FIX_MODES = new Set(['fix', 'fix-ci', 'fix_ci', 'ci', 'ci-failure', 'ci_failure', 'fix-ci-failure']);
 const VERIFY_MODES = new Set(['verify', 'verification', 'verify-acceptance', 'acceptance']);
 
 function resolvePromptMode({ scenario, mode, action, reason } = {}) {
diff --git a/docs/LABELS.md b/docs/LABELS.md
index bd503fbd..87df350a 100644
--- a/docs/LABELS.md
+++ b/docs/LABELS.md
@@ -200,5 +200,5 @@ To add new label-triggered functionality:
 
 ---
 
-*Last updated: December 25, 2025*
+*Last updated: January 5, 2026*
 *Source of truth: Workflows repository*
diff --git a/scripts/langchain/issue_formatter.py b/scripts/langchain/issue_formatter.py
new file mode 100755
index 00000000..ed6256f4
--- /dev/null
+++ b/scripts/langchain/issue_formatter.py
@@ -0,0 +1,366 @@
+#!/usr/bin/env python3
+"""
+Format raw issue text into the AGENT_ISSUE_TEMPLATE structure.
+
+Run with:
+    python scripts/langchain/issue_formatter.py \
+        --input-file issue.md --output-file formatted.md
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Any
+
+ISSUE_FORMATTER_PROMPT = """
+You are a formatting assistant. Convert the raw GitHub issue body into the
+AGENT_ISSUE_TEMPLATE format with the exact section headers in order:
+
+## Why
+## Scope
+## Non-Goals
+## Tasks
+## Acceptance Criteria
+## Implementation Notes
+
+Rules:
+- Use bullet points ONLY in Tasks and Acceptance Criteria.
+- Every task/criterion must be specific, verifiable, and sized for ~10 minutes.
+- Use unchecked checkboxes: "- [ ]".
+- Preserve file paths and concrete details when mentioned.
+- If a section lacks content, use "_Not provided._" (or "- [ ] _Not provided._"
+  for Tasks/Acceptance).
+- Output ONLY the formatted markdown with these sections (no extra commentary).
+
+Raw issue body:
+{issue_body}
+""".strip()
+
+PROMPT_PATH = Path(__file__).resolve().parent / "prompts" / "format_issue.md"
+FEEDBACK_PROMPT_PATH = Path(__file__).resolve().parent / "prompts" / "format_issue_feedback.md"
+
+SECTION_ALIASES = {
+    "why": ["why", "motivation", "summary", "goals"],
+    "scope": ["scope", "background", "context", "overview"],
+    "non_goals": ["non-goals", "nongoals", "out of scope", "constraints", "exclusions"],
+    "tasks": ["tasks", "task list", "tasklist", "todo", "to do", "implementation"],
+    "acceptance": [
+        "acceptance criteria",
+        "acceptance",
+        "definition of done",
+        "done criteria",
+        "success criteria",
+    ],
+    "implementation": [
+        "implementation notes",
+        "implementation note",
+        "notes",
+        "details",
+        "technical notes",
+    ],
+}
+
+SECTION_TITLES = {
+    "why": "Why",
+    "scope": "Scope",
+    "non_goals": "Non-Goals",
+    "tasks": "Tasks",
+    "acceptance": "Acceptance Criteria",
+    "implementation": "Implementation Notes",
+}
+
+LIST_ITEM_REGEX = re.compile(r"^(\s*)([-*+]|\d+[.)])\s+(.*)$")
+CHECKBOX_REGEX = re.compile(r"^\[([ xX])\]\s*(.*)$")
+
+
+def _load_prompt() -> str:
+    if PROMPT_PATH.is_file():
+        base_prompt = PROMPT_PATH.read_text(encoding="utf-8").strip()
+    else:
+        base_prompt = ISSUE_FORMATTER_PROMPT
+
+    if FEEDBACK_PROMPT_PATH.is_file():
+        feedback = FEEDBACK_PROMPT_PATH.read_text(encoding="utf-8").strip()
+        if feedback:
+            return f"{base_prompt}\n\n{feedback}\n"
+    return base_prompt
+
+
+def _get_llm_client() -> tuple[object, str] | None:
+    try:
+        from langchain_openai import ChatOpenAI
+    except ImportError:
+        return None
+
+    github_token = os.environ.get("GITHUB_TOKEN")
+    openai_token = os.environ.get("OPENAI_API_KEY")
+    if not github_token and not openai_token:
+        return None
+
+    from tools.llm_provider import DEFAULT_MODEL, GITHUB_MODELS_BASE_URL
+
+    if github_token:
+        return (
+            ChatOpenAI(
+                model=DEFAULT_MODEL,
+                base_url=GITHUB_MODELS_BASE_URL,
+                api_key=github_token,
+                temperature=0.1,
+            ),
+            "github-models",
+        )
+    return (
+        ChatOpenAI(
+            model=DEFAULT_MODEL,
+            api_key=openai_token,
+            temperature=0.1,
+        ),
+        "openai",
+    )
+
+
+def _normalize_heading(text: str) -> str:
+    cleaned = re.sub(r"[#*_:]+", " ", text).strip().lower()
+    cleaned = re.sub(r"\s+", " ", cleaned)
+    return cleaned
+
+
+def _resolve_section(label: str) -> str | None:
+    normalized = _normalize_heading(label)
+    for key, aliases in SECTION_ALIASES.items():
+        for alias in aliases:
+            if normalized == _normalize_heading(alias):
+                return key
+    return None
+
+
+def _strip_list_marker(line: str) -> str:
+    match = LIST_ITEM_REGEX.match(line)
+    if not match:
+        return line
+    return match.group(3).strip()
+
+
+def _normalize_non_action_lines(lines: list[str]) -> list[str]:
+    cleaned: list[str] = []
+    in_fence = False
+    for raw in lines:
+        stripped = raw.strip()
+        if stripped.startswith("```"):
+            in_fence = not in_fence
+            cleaned.append(raw)
+            continue
+        if in_fence:
+            cleaned.append(raw)
+            continue
+        if not stripped:
+            cleaned.append("")
+            continue
+        cleaned.append(_strip_list_marker(raw))
+    return cleaned
+
+
+def _normalize_checklist_lines(lines: list[str]) -> list[str]:
+    cleaned: list[str] = []
+    in_fence = False
+    for raw in lines:
+        stripped = raw.strip()
+        if stripped.startswith("```"):
+            in_fence = not in_fence
+            cleaned.append(raw)
+            continue
+        if in_fence:
+            cleaned.append(raw)
+            continue
+        if not stripped:
+            continue
+        match = LIST_ITEM_REGEX.match(raw)
+        if match:
+            indent, _, remainder = match.groups()
+            checkbox = CHECKBOX_REGEX.match(remainder.strip())
+            if checkbox:
+                mark = "x" if checkbox.group(1).lower() == "x" else " "
+                text = checkbox.group(2).strip()
+                if text:
+                    cleaned.append(f"{indent}- [{mark}] {text}")
+                continue
+            cleaned.append(f"{indent}- [ ] {remainder.strip()}")
+        else:
+            cleaned.append(f"- [ ] {stripped}")
+    return cleaned
+
+
+def _parse_sections(body: str) -> tuple[dict[str, list[str]], list[str]]:
+    sections: dict[str, list[str]] = {key: [] for key in SECTION_TITLES}
+    preamble: list[str] = []
+    current: str | None = None
+    for line in body.splitlines():
+        heading_match = re.match(r"^\s*#{1,6}\s+(.*)$", line)
+        if heading_match:
+            section_key = _resolve_section(heading_match.group(1))
+            if section_key:
+                current = section_key
+                continue
+        if re.match(r"^\s*(?:\*\*|__)(.+?)(?:\*\*|__)\s*:?\s*$", line):
+            inner = re.sub(r"^\s*(?:\*\*|__)(.+?)(?:\*\*|__)\s*:?\s*$", r"\1", line)
+            section_key = _resolve_section(inner)
+            if section_key:
+                current = section_key
+                continue
+        if re.match(r"^\s*[A-Za-z][A-Za-z0-9\s-]{2,}:\s*$", line):
+            label = line.split(":", 1)[0]
+            section_key = _resolve_section(label)
+            if section_key:
+                current = section_key
+                continue
+        if current:
+            sections[current].append(line)
+        else:
+            preamble.append(line)
+    return sections, preamble
+
+
+def _format_issue_fallback(issue_body: str) -> str:
+    body = issue_body.strip()
+    sections, preamble = _parse_sections(body)
+
+    if preamble and not sections["scope"]:
+        sections["scope"] = preamble
+
+    why_lines = _normalize_non_action_lines(sections["why"])
+    scope_lines = _normalize_non_action_lines(sections["scope"])
+    non_goals_lines = _normalize_non_action_lines(sections["non_goals"])
+    impl_lines = _normalize_non_action_lines(sections["implementation"])
+
+    tasks_lines = _normalize_checklist_lines(sections["tasks"])
+    acceptance_lines = _normalize_checklist_lines(sections["acceptance"])
+
+    def join_or_placeholder(lines: list[str], placeholder: str) -> str:
+        content = "\n".join(line for line in lines).strip()
+        return content if content else placeholder
+
+    why_text = join_or_placeholder(why_lines, "_Not provided._")
+    scope_text = join_or_placeholder(scope_lines, "_Not provided._")
+    non_goals_text = join_or_placeholder(non_goals_lines, "_Not provided._")
+    impl_text = join_or_placeholder(impl_lines, "_Not provided._")
+    tasks_text = join_or_placeholder(tasks_lines, "- [ ] _Not provided._")
+    acceptance_text = join_or_placeholder(acceptance_lines, "- [ ] _Not provided._")
+
+    parts = [
+        "## Why",
+        "",
+        why_text,
+        "",
+        "## Scope",
+        "",
+        scope_text,
+        "",
+        "## Non-Goals",
+        "",
+        non_goals_text,
+        "",
+        "## Tasks",
+        "",
+        tasks_text,
+        "",
+        "## Acceptance Criteria",
+        "",
+        acceptance_text,
+        "",
+        "## Implementation Notes",
+        "",
+        impl_text,
+    ]
+    return "\n".join(parts).strip()
+
+
+def _formatted_output_valid(text: str) -> bool:
+    if not text:
+        return False
+    required = ["## Tasks", "## Acceptance Criteria"]
+    return all(section in text for section in required)
+
+
+def format_issue_body(issue_body: str, *, use_llm: bool = True) -> dict[str, Any]:
+    if not issue_body:
+        issue_body = ""
+
+    if use_llm:
+        client_info = _get_llm_client()
+        if client_info:
+            client, provider = client_info
+            try:
+                from langchain_core.prompts import ChatPromptTemplate
+            except ImportError:
+                pass
+            else:
+                prompt = _load_prompt()
+                template = ChatPromptTemplate.from_template(prompt)
+                chain = template | client
+                response = chain.invoke({"issue_body": issue_body})
+                content = getattr(response, "content", None) or str(response)
+                formatted = content.strip()
+                if _formatted_output_valid(formatted):
+                    return {
+                        "formatted_body": formatted,
+                        "provider_used": provider,
+                        "used_llm": True,
+                    }
+
+    formatted = _format_issue_fallback(issue_body)
+    return {
+        "formatted_body": formatted,
+        "provider_used": None,
+        "used_llm": False,
+    }
+
+
+def build_label_transition() -> dict[str, list[str]]:
+    return {
+        "add": ["agents:formatted"],
+        "remove": ["agents:format"],
+    }
+
+
+def _load_input(args: argparse.Namespace) -> str:
+    if args.input_file:
+        return Path(args.input_file).read_text(encoding="utf-8")
+    if args.input_text:
+        return args.input_text
+    return sys.stdin.read()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Format issues into AGENT_ISSUE_TEMPLATE.")
+    parser.add_argument("--input-file", help="Path to raw issue text.")
+    parser.add_argument("--input-text", help="Raw issue text (inline).")
+    parser.add_argument("--output-file", help="Path to write formatted output.")
+    parser.add_argument("--json", action="store_true", help="Emit JSON payload to stdout.")
+    parser.add_argument("--no-llm", action="store_true", help="Disable LLM usage.")
+    args = parser.parse_args()
+
+    raw = _load_input(args)
+    result = format_issue_body(raw, use_llm=not args.no_llm)
+
+    if args.output_file:
+        Path(args.output_file).write_text(result["formatted_body"], encoding="utf-8")
+
+    if args.json:
+        payload = {
+            "formatted_body": result["formatted_body"],
+            "provider_used": result.get("provider_used"),
+            "used_llm": result.get("used_llm", False),
+            "labels": build_label_transition(),
+        }
+        print(json.dumps(payload, ensure_ascii=True))
+    else:
+        print(result["formatted_body"])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/langchain/prompts/format_issue.md b/scripts/langchain/prompts/format_issue.md
new file mode 100644
index 00000000..1610c14b
--- /dev/null
+++ b/scripts/langchain/prompts/format_issue.md
@@ -0,0 +1,20 @@
+You are a formatting assistant. Convert the raw GitHub issue body into the
+AGENT_ISSUE_TEMPLATE format with the exact section headers in order:
+
+## Why
+## Scope
+## Non-Goals
+## Tasks
+## Acceptance Criteria
+## Implementation Notes
+
+Rules:
+- Use bullet points ONLY in Tasks and Acceptance Criteria.
+- Every task/criterion must be specific, verifiable, and sized for ~10 minutes.
+- Use unchecked checkboxes: "- [ ]".
+- Preserve file paths and concrete details when mentioned.
+- If a section lacks content, use "_Not provided._" (or "- [ ] _Not provided._" for Tasks/Acceptance).
+- Output ONLY the formatted markdown with these sections (no extra commentary).
+
+Raw issue body:
+{issue_body}
diff --git a/tools/llm_provider.py b/tools/llm_provider.py
new file mode 100644
index 00000000..1ab626d3
--- /dev/null
+++ b/tools/llm_provider.py
@@ -0,0 +1,597 @@
+"""
+LLM Provider Abstraction with Fallback Chain
+
+Provides a unified interface for LLM calls with automatic fallback:
+1. GitHub Models API (primary) - uses GITHUB_TOKEN
+2. OpenAI API (fallback) - uses OPENAI_API_KEY
+3. Regex patterns (last resort) - no API calls
+
+Usage:
+    from tools.llm_provider import get_llm_provider, LLMProvider
+
+    provider = get_llm_provider()
+    result = provider.analyze_completion(session_text, tasks)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+# GitHub Models API endpoint (OpenAI-compatible)
+GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com"
+DEFAULT_MODEL = "gpt-4o-mini"
+
+
+@dataclass
+class CompletionAnalysis:
+    """Result of task completion analysis."""
+
+    completed_tasks: list[str]  # Task descriptions marked complete
+    in_progress_tasks: list[str]  # Tasks currently being worked on
+    blocked_tasks: list[str]  # Tasks that are blocked
+    confidence: float  # 0.0 to 1.0
+    reasoning: str  # Explanation of the analysis
+    provider_used: str  # Which provider generated this
+
+    # Quality metrics for BS detection
+    raw_confidence: float | None = None  # Original confidence before adjustment
+    confidence_adjusted: bool = False  # Whether confidence was adjusted
+    quality_warnings: list[str] | None = None  # Warnings about analysis quality
+
+
+@dataclass
+class SessionQualityContext:
+    """Context about session quality for validating LLM responses."""
+
+    has_agent_messages: bool = False
+    has_work_evidence: bool = False
+    file_change_count: int = 0
+    successful_command_count: int = 0
+    estimated_effort_score: int = 0
+    data_quality: str = "unknown"  # high, medium, low, minimal
+    analysis_text_length: int = 0
+
+
+class LLMProvider(ABC):
+    """Abstract base class for LLM providers."""
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Provider name for logging."""
+        pass
+
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Check if this provider can be used."""
+        pass
+
+    @abstractmethod
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+        """
+        Analyze session output to determine task completion status.
+
+        Args:
+            session_output: Codex session output (summary or JSONL events)
+            tasks: List of task descriptions from PR checkboxes
+            context: Optional additional context (PR description, etc.)
+
+        Returns:
+            CompletionAnalysis with task status breakdown
+        """
+        pass
+
+
+class GitHubModelsProvider(LLMProvider):
+    """LLM provider using GitHub Models API (OpenAI-compatible)."""
+
+    @property
+    def name(self) -> str:
+        return "github-models"
+
+    def is_available(self) -> bool:
+        return bool(os.environ.get("GITHUB_TOKEN"))
+
+    def _get_client(self):
+        """Get LangChain ChatOpenAI client configured for GitHub Models."""
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            logger.warning("langchain_openai not installed")
+            return None
+
+        return ChatOpenAI(
+            model=DEFAULT_MODEL,
+            base_url=GITHUB_MODELS_BASE_URL,
+            api_key=os.environ.get("GITHUB_TOKEN"),
+            temperature=0.1,  # Low temperature for consistent analysis
+        )
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+        quality_context: SessionQualityContext | None = None,
+    ) -> CompletionAnalysis:
+        client = self._get_client()
+        if not client:
+            raise RuntimeError("LangChain OpenAI not available")
+
+        prompt = self._build_analysis_prompt(session_output, tasks, context)
+
+        try:
+            response = client.invoke(prompt)
+            return self._parse_response(response.content, tasks, quality_context)
+        except Exception as e:
+            logger.error(f"GitHub Models API error: {e}")
+            raise
+
+    def _validate_confidence(
+        self,
+        raw_confidence: float,
+        completed_count: int,
+        in_progress_count: int,
+        quality_context: SessionQualityContext | None,
+        reasoning: str,
+    ) -> tuple[float, list[str]]:
+        """
+        BS Detector: Validate and potentially adjust LLM confidence.
+
+        This catches cases where the LLM reports high confidence but the
+        analysis is inconsistent with the session evidence.
+
+        Args:
+            raw_confidence: The confidence reported by the LLM
+            completed_count: Number of tasks marked as completed
+            in_progress_count: Number of tasks marked as in progress
+            quality_context: Session quality metrics (if available)
+            reasoning: The LLM's reasoning text
+
+        Returns:
+            Tuple of (adjusted_confidence, list of warnings)
+        """
+        warnings = []
+        confidence = raw_confidence
+
+        # Sanity check: Confidence should be between 0 and 1
+        confidence = max(0.0, min(1.0, confidence))
+
+        if quality_context is None:
+            # No context available, trust LLM but note it
+            return confidence, []
+
+        # BS Detection Rule 1: High confidence + zero work + evidence of work = suspicious
+        if (
+            raw_confidence > 0.7
+            and completed_count == 0
+            and in_progress_count == 0
+            and quality_context.has_work_evidence
+        ):
+            warnings.append(
+                f"High confidence ({raw_confidence:.0%}) but no tasks detected "
+                f"despite {quality_context.file_change_count} file changes and "
+                f"{quality_context.successful_command_count} successful commands"
+            )
+            # Reduce confidence significantly - the LLM might have had insufficient data
+            confidence = min(confidence, 0.3)
+            logger.warning(f"BS detected: {warnings[-1]}")
+
+        # BS Detection Rule 2: Very short analysis text = likely data loss
+        if quality_context.analysis_text_length < 200:
+            warnings.append(
+                f"Analysis text suspiciously short "
+                f"({quality_context.analysis_text_length} chars) - "
+                "possible data loss in pipeline"
+            )
+            # Short text means limited evidence - cap confidence
+            confidence = min(confidence, 0.4)
+            logger.warning(f"Short analysis text: {quality_context.analysis_text_length} chars")
+
+        # BS Detection Rule 3: Zero tasks + high effort score = something's wrong
+        if (
+            quality_context.estimated_effort_score > 30
+            and completed_count == 0
+            and in_progress_count == 0
+        ):
+            warnings.append(
+                f"Effort score ({quality_context.estimated_effort_score}) suggests work was done "
+                "but no tasks detected"
+            )
+            confidence = min(confidence, 0.4)
+
+        # BS Detection Rule 4: Reasoning mentions "no evidence" but there's evidence
+        no_evidence_phrases = ["no evidence", "no work", "nothing done", "no specific"]
+        reasoning_lower = reasoning.lower()
+        if (
+            any(phrase in reasoning_lower for phrase in no_evidence_phrases)
+            and quality_context.has_work_evidence
+        ):
+            warnings.append("LLM claims 'no evidence' but session has file changes/commands")
+            confidence = min(confidence, 0.35)
+
+        # BS Detection Rule 5: Data quality impacts confidence ceiling
+        quality_caps = {
+            "high": 1.0,
+            "medium": 0.8,
+            "low": 0.6,
+            "minimal": 0.4,
+        }
+        quality_cap = quality_caps.get(quality_context.data_quality, 0.5)
+        if confidence > quality_cap:
+            warnings.append(
+                f"Confidence capped from {raw_confidence:.0%} to {quality_cap:.0%} "
+                f"due to {quality_context.data_quality} data quality"
+            )
+            confidence = quality_cap
+
+        return confidence, warnings
+
+    def _build_analysis_prompt(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> str:
+        task_list = "\n".join(f"- [ ] {task}" for task in tasks)
+
+        return f"""Analyze this Codex session output and determine which tasks have been completed.
+
+## Tasks to Track
+{task_list}
+
+## Session Output
+{session_output[:8000]}  # Truncate to avoid token limits
+
+## Instructions
+For each task, determine if it was:
+- COMPLETED: Clear evidence the task was finished
+- IN_PROGRESS: Work started but not finished
+- BLOCKED: Cannot proceed due to an issue
+- NOT_STARTED: No evidence of work on this task
+
+IMPORTANT: Base your analysis on CONCRETE EVIDENCE such as:
+- File modifications (files being created/edited)
+- Successful test runs
+- Command outputs showing completed work
+- Direct statements of completion
+
+If the session output is very short or lacks detail, lower your confidence accordingly.
+
+Respond in JSON format:
+{{
+    "completed": ["task description 1", ...],
+    "in_progress": ["task description 2", ...],
+    "blocked": ["task description 3", ...],
+    "confidence": 0.85,
+    "reasoning": "Brief explanation of your analysis with specific evidence cited"
+}}
+
+Only include tasks in completed/in_progress/blocked if you have evidence.
+Be conservative - if unsure, don't mark as completed."""
+
+    def _parse_response(
+        self,
+        content: str,
+        tasks: list[str],
+        quality_context: SessionQualityContext | None = None,
+    ) -> CompletionAnalysis:
+        """Parse LLM response into CompletionAnalysis with BS detection."""
+        try:
+            # Try to extract JSON from response
+            json_start = content.find("{")
+            json_end = content.rfind("}") + 1
+            if json_start >= 0 and json_end > json_start:
+                data = json.loads(content[json_start:json_end])
+            else:
+                raise ValueError("No JSON found in response")
+
+            raw_confidence = float(data.get("confidence", 0.5))
+            completed = data.get("completed", [])
+            in_progress = data.get("in_progress", [])
+            reasoning = data.get("reasoning", "")
+
+            # Apply BS detection to validate/adjust confidence
+            adjusted_confidence, warnings = self._validate_confidence(
+                raw_confidence=raw_confidence,
+                completed_count=len(completed),
+                in_progress_count=len(in_progress),
+                quality_context=quality_context,
+                reasoning=reasoning,
+            )
+
+            return CompletionAnalysis(
+                completed_tasks=completed,
+                in_progress_tasks=in_progress,
+                blocked_tasks=data.get("blocked", []),
+                confidence=adjusted_confidence,
+                reasoning=reasoning,
+                provider_used=self.name,
+                raw_confidence=raw_confidence if adjusted_confidence != raw_confidence else None,
+                confidence_adjusted=adjusted_confidence != raw_confidence,
+                quality_warnings=warnings if warnings else None,
+            )
+        except (json.JSONDecodeError, ValueError) as e:
+            logger.warning(f"Failed to parse LLM response: {e}")
+            # Return empty analysis on parse failure
+            return CompletionAnalysis(
+                completed_tasks=[],
+                in_progress_tasks=[],
+                blocked_tasks=[],
+                confidence=0.0,
+                reasoning=f"Failed to parse response: {e}",
+                provider_used=self.name,
+            )
+
+
+class OpenAIProvider(LLMProvider):
+    """LLM provider using OpenAI API directly."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    def is_available(self) -> bool:
+        return bool(os.environ.get("OPENAI_API_KEY"))
+
+    def _get_client(self):
+        """Get LangChain ChatOpenAI client."""
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            logger.warning("langchain_openai not installed")
+            return None
+
+        return ChatOpenAI(
+            model=DEFAULT_MODEL,
+            api_key=os.environ.get("OPENAI_API_KEY"),
+            temperature=0.1,
+        )
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+        client = self._get_client()
+        if not client:
+            raise RuntimeError("LangChain OpenAI not available")
+
+        # Reuse the same prompt building logic
+        github_provider = GitHubModelsProvider()
+        prompt = github_provider._build_analysis_prompt(session_output, tasks, context)
+
+        try:
+            response = client.invoke(prompt)
+            result = github_provider._parse_response(response.content, tasks)
+            # Override provider name
+            return CompletionAnalysis(
+                completed_tasks=result.completed_tasks,
+                in_progress_tasks=result.in_progress_tasks,
+                blocked_tasks=result.blocked_tasks,
+                confidence=result.confidence,
+                reasoning=result.reasoning,
+                provider_used=self.name,
+            )
+        except Exception as e:
+            logger.error(f"OpenAI API error: {e}")
+            raise
+
+
+class RegexFallbackProvider(LLMProvider):
+    """Fallback provider using regex pattern matching (no API calls)."""
+
+    # Patterns indicating task completion
+    COMPLETION_PATTERNS = [
+        r"(?:completed?|finished|done|implemented|fixed|resolved)\s+(?:the\s+)?(.+?)(?:\.|$)",
+        r"✓\s+(.+?)(?:\.|$)",
+        r"\[x\]\s+(.+?)(?:\.|$)",
+        r"successfully\s+(?:completed?|implemented|fixed)\s+(.+?)(?:\.|$)",
+    ]
+
+    # Patterns indicating work in progress
+    PROGRESS_PATTERNS = [
+        r"(?:working on|started|beginning|implementing)\s+(.+?)(?:\.|$)",
+        r"(?:in progress|ongoing):\s*(.+?)(?:\.|$)",
+    ]
+
+    # Patterns indicating blockers
+    BLOCKER_PATTERNS = [
+        r"(?:blocked|stuck|cannot|failed|error)\s+(?:on\s+)?(.+?)(?:\.|$)",
+        r"(?:issue|problem|bug)\s+(?:with\s+)?(.+?)(?:\.|$)",
+    ]
+
+    @property
+    def name(self) -> str:
+        return "regex-fallback"
+
+    def is_available(self) -> bool:
+        return True  # Always available
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+
+        output_lower = session_output.lower()
+        completed = []
+        in_progress = []
+        blocked = []
+
+        for task in tasks:
+            task_lower = task.lower()
+            # Simple keyword matching
+            task_words = set(task_lower.split())
+
+            # Check for completion signals
+            is_completed = any(
+                word in output_lower
+                and any(
+                    p in output_lower
+                    for p in ["completed", "finished", "done", "fixed", "✓", "[x]"]
+                )
+                for word in task_words
+                if len(word) > 3
+            )
+
+            # Check for progress signals
+            is_in_progress = any(
+                word in output_lower
+                and any(
+                    p in output_lower
+                    for p in ["working on", "started", "implementing", "in progress"]
+                )
+                for word in task_words
+                if len(word) > 3
+            )
+
+            # Check for blocker signals
+            is_blocked = any(
+                word in output_lower
+                and any(
+                    p in output_lower for p in ["blocked", "stuck", "failed", "error", "cannot"]
+                )
+                for word in task_words
+                if len(word) > 3
+            )
+
+            if is_completed:
+                completed.append(task)
+            elif is_blocked:
+                blocked.append(task)
+            elif is_in_progress:
+                in_progress.append(task)
+
+        return CompletionAnalysis(
+            completed_tasks=completed,
+            in_progress_tasks=in_progress,
+            blocked_tasks=blocked,
+            confidence=0.3,  # Low confidence for regex
+            reasoning="Pattern-based analysis (no LLM available)",
+            provider_used=self.name,
+        )
+
+
+class FallbackChainProvider(LLMProvider):
+    """Provider that tries multiple providers in sequence."""
+
+    def __init__(self, providers: list[LLMProvider]):
+        self._providers = providers
+        self._active_provider: LLMProvider | None = None
+
+    @property
+    def name(self) -> str:
+        if self._active_provider:
+            return f"fallback-chain({self._active_provider.name})"
+        return "fallback-chain"
+
+    def is_available(self) -> bool:
+        return any(p.is_available() for p in self._providers)
+
+    def analyze_completion(
+        self,
+        session_output: str,
+        tasks: list[str],
+        context: str | None = None,
+    ) -> CompletionAnalysis:
+        last_error = None
+
+        for provider in self._providers:
+            if not provider.is_available():
+                logger.debug(f"Provider {provider.name} not available, skipping")
+                continue
+
+            try:
+                logger.info(f"Attempting analysis with {provider.name}")
+                self._active_provider = provider
+                result = provider.analyze_completion(session_output, tasks, context)
+                logger.info(f"Successfully analyzed with {provider.name}")
+                return result
+            except Exception as e:
+                logger.warning(f"Provider {provider.name} failed: {e}")
+                last_error = e
+                continue
+
+        if last_error:
+            raise RuntimeError(f"All providers failed. Last error: {last_error}")
+        raise RuntimeError("No providers available")
+
+
+def get_llm_provider(force_provider: str | None = None) -> LLMProvider:
+    """
+    Get the best available LLM provider with fallback chain.
+
+    Args:
+        force_provider: If set, use only this provider (for testing).
+            Options: "github-models", "openai", "regex-fallback"
+
+    Returns a FallbackChainProvider that tries:
+    1. GitHub Models API (if GITHUB_TOKEN set)
+    2. OpenAI API (if OPENAI_API_KEY set)
+    3. Regex fallback (always available)
+    """
+    # Force a specific provider for testing
+    if force_provider:
+        provider_map = {
+            "github-models": GitHubModelsProvider,
+            "openai": OpenAIProvider,
+            "regex-fallback": RegexFallbackProvider,
+        }
+        if force_provider not in provider_map:
+            raise ValueError(
+                f"Unknown provider: {force_provider}. " f"Options: {list(provider_map.keys())}"
+            )
+        provider = provider_map[force_provider]()
+        if not provider.is_available():
+            raise RuntimeError(
+                f"Forced provider '{force_provider}' is not available. "
+                "Check required environment variables."
+            )
+        logger.info(f"Using forced provider: {force_provider}")
+        return provider
+
+    providers = [
+        GitHubModelsProvider(),
+        OpenAIProvider(),
+        RegexFallbackProvider(),
+    ]
+
+    return FallbackChainProvider(providers)
+
+
+def check_providers() -> dict[str, bool]:
+    """Check which providers are available."""
+    return {
+        "github-models": GitHubModelsProvider().is_available(),
+        "openai": OpenAIProvider().is_available(),
+        "regex-fallback": True,
+    }
+
+
+if __name__ == "__main__":
+    import sys
+
+    # Quick test - log to stderr
+    logging.basicConfig(level=logging.INFO, stream=sys.stderr)
+
+    print("Provider availability:")
+    for name, available in check_providers().items():
+        status = "✓" if available else "✗"
+        print(f"  {status} {name}")
+
+    provider = get_llm_provider()
+    print(f"\nActive provider chain: {provider.name}")