stranske · stranske · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/.project_modules.txt b/.project_modules.txt
@@ -4,3 +4,5 @@
 # See: https://github.com/stranske/Workflows/pull/441
 diff_holdings
 embeddings
+# root-level module imported by tests
+langchain_analysis
diff --git a/.workflows-lib b/.workflows-lib
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,12 @@
+# Changelog
+
+<!-- Follow Keep a Changelog; keep entries short and action-oriented. -->
+
+All notable changes to this project will be documented in this file.
+
+## [Unreleased]
+- Document supported LLM providers and provider resolution order.
+
+<!-- Commit-message checklist:
+- [ ] docs: update CHANGELOG.md for LLM provider documentation
+-->
diff --git a/README.md b/README.md
@@ -3,3 +3,5 @@
 This repository contains the Manager-Database application.
 
 For setup and usage instructions, see `README_bootstrap.md`.
+<- [ ] Performance should be acceptable under LLM analysis test Sat Jan  3 00:21:14 UTC 2026 -->
+<- [ ] Performance should be acceptable under LLM analysis test Sat Jan  3 00:21:17 UTC 2026 -->
diff --git a/autofix_report_enriched.json b/autofix_report_enriched.json
@@ -1 +1 @@
-{"changed": true, "classification": {"total": 0, "new": 0, "allowed": 0}, "timestamp": "2026-01-01T08:32:52Z", "files": ["scripts/sync_test_dependencies.py"]}
+{"changed": true, "classification": {"total": 0, "new": 0, "allowed": 0}, "timestamp": "2026-01-02T22:46:41Z", "files": ["langchain_analysis.py", "tests/test_langchain_analysis.py"]}
diff --git a/codex-analysis-136.json b/codex-analysis-136.json
@@ -0,0 +1 @@
+{"provider": "github-models", "confidence": 0.9, "completed_tasks": [], "in_progress_tasks": [], "blocked_tasks": [], "reasoning": "The session output does not provide any evidence of modifications to the files related to the tasks specified. Since there are no details indicating that the README.md or CHANGELOG.md files were created or modified, I cannot conclude that any tasks have been completed or are in progress. Therefore, all tasks are marked as not started.", "data_source": "jsonl", "input_length": 36681, "analysis_text_length": 94, "session": {"event_count": 63, "message_count": 0, "command_count": 19, "file_change_count": 1, "todo_count": 0}}
diff --git a/codex-session-136.jsonl b/codex-session-136.jsonl
@@ -0,0 +1,26 @@
+# LLM provider chain
+
+This repository resolves the LLM provider in a simple, ordered chain:
+
+1. GitHub Models
+   - Selected when `GITHUB_MODELS_ENDPOINT` or `GITHUB_MODELS_TOKEN` is set.
+   - This is the first choice because it can proxy multiple model backends.
+2. OpenAI
+   - Selected when `OPENAI_API_KEY` or `OPENAI_BASE_URL` is set and GitHub
+     Models is not configured.
+3. Regex fallback
+   - If neither provider is configured, the provider is inferred by regex
+     matching on the model identifier (see `langchain_analysis.detect_llm_provider`).
+   - This is a best-effort heuristic used for logs and UI labels.
+
+## Preferred provider override
+
+Set `PREFERRED_LLM_PROVIDER` to force a specific provider name regardless of
+chain position. This is helpful when the model identifier is ambiguous or when
+running in a mixed environment.
+
+Examples:
+
+- `PREFERRED_LLM_PROVIDER=openai`
+- `PREFERRED_LLM_PROVIDER=github models`
+- `PREFERRED_LLM_PROVIDER=azure openai`
@@ -0,0 +1,33 @@
+# Documentation index
+
+<!-- Keep provider details aligned with langchain_analysis.py and docs/LLM_PROVIDER_CHAIN.md. -->
+
+## Supported LLM providers
+
+The LLM analysis layer recognizes the providers below. These names are the
+canonical labels surfaced in logs and UI.
+
+- GitHub Models
+- OpenAI
+- Azure OpenAI
+- Anthropic
+- Cohere
+- Mistral
+- Google (Gemini/PaLM)
+- Amazon Bedrock
+- Hugging Face
+- Unknown (fallback when no match is detected)
+
+## How the provider is resolved
+
+1. `PREFERRED_LLM_PROVIDER` overrides everything when set.
+2. GitHub Models is selected if `GITHUB_MODELS_ENDPOINT` or
+   `GITHUB_MODELS_TOKEN` is configured.
+3. OpenAI is selected if `OPENAI_API_KEY` or `OPENAI_BASE_URL` is configured.
+4. If no configuration is present, the provider is inferred by regex matching
+   on the model identifier (see `docs/LLM_PROVIDER_CHAIN.md`).
+
+<!-- Commit-message checklist:
+- [ ] docs: add docs/README.md for supported LLM providers
+- [ ] docs: keep provider resolution notes aligned with code
+-->
diff --git a/langchain_analysis.py b/langchain_analysis.py
@@ -0,0 +1,70 @@
+import os
+
+from utils import format_provider_name
+
+PREFERRED_LLM_PROVIDER_ENV = "PREFERRED_LLM_PROVIDER"
+GITHUB_MODELS_ENDPOINT_ENV = "GITHUB_MODELS_ENDPOINT"
+GITHUB_MODELS_TOKEN_ENV = "GITHUB_MODELS_TOKEN"
+OPENAI_API_KEY_ENV = "OPENAI_API_KEY"
+OPENAI_BASE_URL_ENV = "OPENAI_BASE_URL"
+
+SUPPORTED_LLM_PROVIDERS = (
+    "github models",
+    "openai",
+    "azure openai",
+    "anthropic",
+    "cohere",
+    "mistral",
+    "google",
+    "bedrock",
+    "huggingface",
+    "unknown",
+)
+
+
+def list_supported_llm_providers() -> tuple[str, ...]:
+    """Return known providers in display-ready form."""
+    # Keep order aligned with docs/README.md for consistent presentation.
+    return tuple(format_provider_name(provider) for provider in SUPPORTED_LLM_PROVIDERS)
+
+
+def detect_llm_provider(model_identifier: str) -> str:
+    """Infer the LLM provider based on a LangChain model identifier."""
+    if not model_identifier:
+        return format_provider_name("")
+
+    identifier = model_identifier.strip().lower()
+    # Order matters to avoid Azure OpenAI being classified as generic OpenAI.
+    if "azure" in identifier:
+        return format_provider_name("azure openai")
+    if any(token in identifier for token in ("openai", "gpt-", "text-", "o1-", "o3-")):
+        return format_provider_name("openai")
+    if "anthropic" in identifier or "claude" in identifier:
+        return format_provider_name("anthropic")
+    if "cohere" in identifier or "command" in identifier:
+        return format_provider_name("cohere")
+    if "mistral" in identifier or "mixtral" in identifier:
+        return format_provider_name("mistral")
+    if "gemini" in identifier or "palm" in identifier or "google" in identifier:
+        return format_provider_name("google")
+    if "bedrock" in identifier or "titan" in identifier or "aws" in identifier:
+        return format_provider_name("bedrock")
+    if "huggingface" in identifier or identifier.startswith("hf/"):
+        return format_provider_name("huggingface")
+
+    return format_provider_name("unknown")
+
+
+def resolve_llm_provider(model_identifier: str) -> str:
+    """Resolve the provider, honoring an explicit preference when configured."""
+    preferred = os.getenv(PREFERRED_LLM_PROVIDER_ENV, "").strip()
+    if preferred:
+        # Allow explicit preferences (ex: "openai") to override detection.
+        return format_provider_name(preferred)
+    if os.getenv(GITHUB_MODELS_ENDPOINT_ENV) or os.getenv(GITHUB_MODELS_TOKEN_ENV):
+        # Prefer GitHub Models when its endpoint or token is configured.
+        return format_provider_name("github models")
+    if os.getenv(OPENAI_API_KEY_ENV) or os.getenv(OPENAI_BASE_URL_ENV):
+        # Use OpenAI when a direct OpenAI configuration is present.
+        return format_provider_name("openai")
+    return detect_llm_provider(model_identifier)
diff --git a/manager_database.egg-info/PKG-INFO b/manager_database.egg-info/PKG-INFO
@@ -8,7 +8,7 @@ Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 Requires-Dist: httpx
 Requires-Dist: prefect
-Requires-Dist: jsonschema<4.23.0,>=4.17.3
+Requires-Dist: jsonschema<4.26.0,>=4.17.3
 Requires-Dist: beautifulsoup4
 Requires-Dist: boto3
 Requires-Dist: psycopg[binary]
@@ -19,14 +19,14 @@ Requires-Dist: altair
 Requires-Dist: fastapi
 Requires-Dist: uvicorn
 Provides-Extra: dev
-Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest>=9.0.2; extra == "dev"
 Requires-Dist: pytest-asyncio; extra == "dev"
-Requires-Dist: pytest-cov; extra == "dev"
-Requires-Dist: black; extra == "dev"
-Requires-Dist: ruff; extra == "dev"
+Requires-Dist: pytest-cov>=7.0.0; extra == "dev"
+Requires-Dist: black>=25.12.0; extra == "dev"
+Requires-Dist: ruff>=0.14.10; extra == "dev"
 Requires-Dist: pre-commit; extra == "dev"
-Requires-Dist: mypy; extra == "dev"
-Requires-Dist: coverage; extra == "dev"
+Requires-Dist: mypy>=1.19.1; extra == "dev"
+Requires-Dist: coverage>=7.13.1; extra == "dev"
 Requires-Dist: referencing; extra == "dev"
 
 # Manager-Database

diff --git a/manager_database.egg-info/SOURCES.txt b/manager_database.egg-info/SOURCES.txt
@@ -32,6 +32,7 @@ tests/test_edgar_additional.py
 tests/test_edgar_flow.py
 tests/test_embeddings.py
 tests/test_etl_flows_additional.py
+tests/test_langchain_analysis.py
 tests/test_manager_api.py
 tests/test_open_issues.py
 tests/test_placeholder.py

diff --git a/manager_database.egg-info/requires.txt b/manager_database.egg-info/requires.txt
@@ -1,6 +1,6 @@
 httpx
 prefect
-jsonschema<4.23.0,>=4.17.3
+jsonschema<4.26.0,>=4.17.3
 beautifulsoup4
 boto3
 psycopg[binary]
@@ -12,12 +12,12 @@ fastapi
 uvicorn
 
 [dev]
-pytest
+pytest>=9.0.2
 pytest-asyncio
-pytest-cov
-black
-ruff
+pytest-cov>=7.0.0
+black>=25.12.0
+ruff>=0.14.10
 pre-commit
-mypy
-coverage
+mypy>=1.19.1
+coverage>=7.13.1
 referencing
diff --git a/pr_body.md b/pr_body.md
@@ -0,0 +1,7 @@
+Testing the LangChain-enhanced task completion detection.
+
+## Tasks
+- [ ] Create a simple README.md file in a docs/ folder explaining what LLM providers are supported
+- [ ] Add a CHANGELOG.md entry for this feature
+
+This PR tests the LLM analysis feature from the Workflows repo.
diff --git a/tests/test_langchain_analysis.py b/tests/test_langchain_analysis.py
@@ -0,0 +1,61 @@
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+
+from langchain_analysis import (
+    GITHUB_MODELS_ENDPOINT_ENV,
+    GITHUB_MODELS_TOKEN_ENV,
+    OPENAI_API_KEY_ENV,
+    PREFERRED_LLM_PROVIDER_ENV,
+    detect_llm_provider,
+    resolve_llm_provider,
+)
+
+
+@pytest.mark.parametrize(
+    ("identifier", "expected"),
+    [
+        ("gpt-4o", "OpenAI"),
+        ("azure-openai:gpt-4", "Azure OpenAI"),
+        ("claude-3-opus", "Anthropic"),
+        ("command-r", "Cohere"),
+        ("gemini-1.5-pro", "Google"),
+        ("mixtral-8x7b", "Mistral"),
+        ("hf/tiiuae/falcon-7b", "Hugging Face"),
+        ("mystery-model", "Unknown"),
+    ],
+)
+def test_detect_llm_provider(identifier, expected):
+    # Exercise common LangChain model identifiers across providers.
+    assert detect_llm_provider(identifier) == expected
+
+
+def test_resolve_llm_provider_prefers_env(monkeypatch):
+    # Explicit preference should override identifier-based detection.
+    monkeypatch.setenv(PREFERRED_LLM_PROVIDER_ENV, "openai")
+    assert resolve_llm_provider("claude-3-opus") == "OpenAI"
+
+
+def test_resolve_llm_provider_falls_back_to_detection(monkeypatch):
+    # Without a preference, resolve uses the identifier heuristic.
+    monkeypatch.delenv(PREFERRED_LLM_PROVIDER_ENV, raising=False)
+    assert resolve_llm_provider("claude-3-opus") == "Anthropic"
+
+
+def test_resolve_llm_provider_prefers_github_models(monkeypatch):
+    # GitHub Models configuration should win when no explicit preference exists.
+    monkeypatch.delenv(PREFERRED_LLM_PROVIDER_ENV, raising=False)
+    monkeypatch.setenv(GITHUB_MODELS_ENDPOINT_ENV, "https://models.github.example")
+    assert resolve_llm_provider("gpt-4o") == "GitHub Models"
+
+
+def test_resolve_llm_provider_falls_back_to_openai(monkeypatch):
+    # OpenAI is used when GitHub Models is not configured.
+    monkeypatch.delenv(PREFERRED_LLM_PROVIDER_ENV, raising=False)
+    monkeypatch.delenv(GITHUB_MODELS_ENDPOINT_ENV, raising=False)
+    monkeypatch.delenv(GITHUB_MODELS_TOKEN_ENV, raising=False)
+    monkeypatch.setenv(OPENAI_API_KEY_ENV, "test-key")
+    assert resolve_llm_provider("gpt-4o") == "OpenAI"
diff --git a/utils.py b/utils.py
@@ -0,0 +1,24 @@
+def format_provider_name(provider: str) -> str:
+    """Format a provider identifier into a user-facing name."""
+    if not provider or not provider.strip():
+        return "Unknown"
+
+    normalized = provider.strip().lower().replace("_", " ").replace("-", " ")
+    normalized = " ".join(normalized.split())
+    overrides = {
+        "openai": "OpenAI",
+        "azure openai": "Azure OpenAI",
+        "anthropic": "Anthropic",
+        "cohere": "Cohere",
+        "google": "Google",
+        "mistral": "Mistral",
+        "bedrock": "Bedrock",
+        "huggingface": "Hugging Face",
+        # Use brand capitalization for GitHub Models.
+        "github models": "GitHub Models",
+    }
+    # Preserve canonical branding for known providers.
+    if normalized in overrides:
+        return overrides[normalized]
+
+    return normalized.title()
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"changed": true, "classification": {"total": 0, "new": 0, "allowed": 0}, "timestamp": "2026-01-01T08:32:52Z", "files": ["scripts/sync_test_dependencies.py"]}
		{"changed": true, "classification": {"total": 0, "new": 0, "allowed": 0}, "timestamp": "2026-01-02T22:46:41Z", "files": ["langchain_analysis.py", "tests/test_langchain_analysis.py"]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"provider": "github-models", "confidence": 0.9, "completed_tasks": [], "in_progress_tasks": [], "blocked_tasks": [], "reasoning": "The session output does not provide any evidence of modifications to the files related to the tasks specified. Since there are no details indicating that the README.md or CHANGELOG.md files were created or modified, I cannot conclude that any tasks have been completed or are in progress. Therefore, all tasks are marked as not started.", "data_source": "jsonl", "input_length": 36681, "analysis_text_length": 94, "session": {"event_count": 63, "message_count": 0, "command_count": 19, "file_change_count": 1, "todo_count": 0}}