From 9b2a17f3c03ea1a79056b433f296d1606e7068c8 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 18 Sep 2025 04:05:17 -0500 Subject: [PATCH 01/28] Add Anthropic and Grok provider support --- .../src/components/settings/RAGSettings.tsx | 7 --- .../src/server/services/credential_service.py | 25 +++++++++++ .../server/services/llm_provider_service.py | 43 +++++++++++++++++++ 3 files changed, 68 insertions(+), 7 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 83766b6c3a..d54cd51062 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -842,13 +842,6 @@ export const RAGSettings = ({ ); } })()} - {(provider.key === 'anthropic' || provider.key === 'grok' || provider.key === 'openrouter') && ( -
-
- Coming Soon -
-
- )} ))} diff --git a/python/src/server/services/credential_service.py b/python/src/server/services/credential_service.py index a57c1abbbd..954c8ab7a2 100644 --- a/python/src/server/services/credential_service.py +++ b/python/src/server/services/credential_service.py @@ -239,6 +239,14 @@ async def set_credential( self._rag_cache_timestamp = None logger.debug(f"Invalidated RAG settings cache due to update of {key}") + # Also invalidate provider service cache to ensure immediate effect + try: + from .llm_provider_service import clear_provider_cache + clear_provider_cache() + logger.debug("Also cleared LLM provider service cache") + except Exception as e: + logger.warning(f"Failed to clear provider service cache: {e}") + # Also invalidate LLM provider service cache for provider config try: from . import llm_provider_service @@ -281,6 +289,14 @@ async def delete_credential(self, key: str) -> bool: self._rag_cache_timestamp = None logger.debug(f"Invalidated RAG settings cache due to deletion of {key}") + # Also invalidate provider service cache to ensure immediate effect + try: + from .llm_provider_service import clear_provider_cache + clear_provider_cache() + logger.debug("Also cleared LLM provider service cache") + except Exception as e: + logger.warning(f"Failed to clear provider service cache: {e}") + # Also invalidate LLM provider service cache for provider config try: from . import llm_provider_service @@ -464,6 +480,9 @@ async def _get_provider_api_key(self, provider: str) -> str | None: key_mapping = { "openai": "OPENAI_API_KEY", "google": "GOOGLE_API_KEY", + "openrouter": "OPENROUTER_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + "grok": "GROK_API_KEY", "ollama": None, # No API key needed } @@ -478,6 +497,12 @@ def _get_provider_base_url(self, provider: str, rag_settings: dict) -> str | Non return rag_settings.get("LLM_BASE_URL", "http://localhost:11434/v1") elif provider == "google": return "https://generativelanguage.googleapis.com/v1beta/openai/" + elif provider == "openrouter": + return "https://openrouter.ai/api/v1" + elif provider == "anthropic": + return "https://api.anthropic.com/v1" + elif provider == "grok": + return "https://api.x.ai/v1" return None # Use default for OpenAI async def set_active_provider(self, provider: str, service_type: str = "llm") -> bool: diff --git a/python/src/server/services/llm_provider_service.py b/python/src/server/services/llm_provider_service.py index f04f0741ba..ca99b08d63 100644 --- a/python/src/server/services/llm_provider_service.py +++ b/python/src/server/services/llm_provider_service.py @@ -97,6 +97,15 @@ async def get_llm_client(provider: str | None = None, use_embedding_provider: bo # For Ollama, don't use the base_url from config - let _get_optimal_ollama_instance decide base_url = provider_config["base_url"] if provider_name != "ollama" else None + # Validate provider name + allowed_providers = {"openai", "ollama", "google", "openrouter", "anthropic", "grok"} + if provider_name not in allowed_providers: + raise ValueError(f"Unsupported provider: {provider_name}. Allowed: {allowed_providers}") + + # Validate API key format for security + if api_key and len(api_key.strip()) == 0: + api_key = None # Treat empty strings as None + logger.info(f"Creating LLM client for provider: {provider_name}") if provider_name == "openai": @@ -155,6 +164,35 @@ async def get_llm_client(provider: str | None = None, use_embedding_provider: bo ) logger.info("Google Gemini client created successfully") + elif provider_name == "openrouter": + if not api_key: + raise ValueError("OpenRouter API key not found") + + client = openai.AsyncOpenAI( + api_key=api_key, + base_url=base_url or "https://openrouter.ai/api/v1", + ) + logger.info("OpenRouter client created successfully") + + elif provider_name == "anthropic": + if not api_key: + raise ValueError("Anthropic API key not found") + + client = openai.AsyncOpenAI( + api_key=api_key, + base_url=base_url or "https://api.anthropic.com/v1", + ) + logger.info("Anthropic client created successfully") + + elif provider_name == "grok": + if not api_key: + raise ValueError("Grok API key not found") + + client = openai.AsyncOpenAI( + api_key=api_key, + base_url=base_url or "https://api.x.ai/v1", + ) + logger.info("Grok client created successfully") else: raise ValueError(f"Unsupported LLM provider: {provider_name}") @@ -250,6 +288,11 @@ async def get_embedding_model(provider: str | None = None) -> str: provider_name = provider_config["provider"] custom_model = provider_config["embedding_model"] + # Validate provider name + allowed_providers = {"openai", "ollama", "google", "openrouter", "anthropic", "grok"} + if provider_name not in allowed_providers: + logger.warning(f"Unknown embedding provider: {provider_name}, falling back to OpenAI") + provider_name = "openai" # Use custom model if specified if custom_model: return custom_model From 791ecd1e4729f20a96d11cf1579e95d54f6fa701 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Fri, 19 Sep 2025 04:53:49 -0500 Subject: [PATCH 02/28] feat: Add crucial GPT-5 and reasoning model support for OpenRouter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add requires_max_completion_tokens() function for GPT-5, o1, o3, Grok-3 series - Add prepare_chat_completion_params() for reasoning model compatibility - Implement max_tokens β†’ max_completion_tokens conversion for reasoning models - Add temperature handling for reasoning models (must be 1.0 default) - Enhanced provider validation and API key security in provider endpoints - Streamlined retry logic (3β†’2 attempts) for faster issue detection - Add failure tracking and circuit breaker analysis for debugging - Support OpenRouter format detection (openai/gpt-5-nano, openai/o1-mini) - Improved Grok provider empty response handling with structured fallbacks - Enhanced contextual embedding with provider-aware model selection Core provider functionality: - OpenRouter, Grok, Anthropic provider support with full embedding integration - Provider-specific model defaults and validation - Secure API connectivity testing endpoints - Provider context passing for code generation workflows πŸ€– Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- python/src/server/api_routes/knowledge_api.py | 20 +- python/src/server/api_routes/providers_api.py | 154 ++++++ .../contextual_embedding_service.py | 34 +- .../server/services/llm_provider_service.py | 502 ++++++++++++++++-- .../services/storage/code_storage_service.py | 387 +++++++++++++- 5 files changed, 1012 insertions(+), 85 deletions(-) create mode 100644 python/src/server/api_routes/providers_api.py diff --git a/python/src/server/api_routes/knowledge_api.py b/python/src/server/api_routes/knowledge_api.py index 5672583859..3cd0f3dbf3 100644 --- a/python/src/server/api_routes/knowledge_api.py +++ b/python/src/server/api_routes/knowledge_api.py @@ -18,6 +18,8 @@ from fastapi import APIRouter, File, Form, HTTPException, UploadFile from pydantic import BaseModel +# Basic validation - simplified inline version + # Import unified logging from ..config.logfire_config import get_logger, safe_logfire_error, safe_logfire_info from ..services.crawler_manager import get_crawler @@ -62,11 +64,25 @@ async def _validate_provider_api_key(provider: str = None) -> None: logger.info("πŸ”‘ Starting API key validation...") try: + # Basic provider validation if not provider: provider = "openai" + else: + # Simple provider validation + allowed_providers = {"openai", "ollama", "google", "openrouter", "anthropic", "grok"} + if provider not in allowed_providers: + raise HTTPException( + status_code=400, + detail={ + "error": "Invalid provider name", + "message": f"Provider '{provider}' not supported", + "error_type": "validation_error" + } + ) - logger.info(f"πŸ”‘ Testing {provider.title()} API key with minimal embedding request...") - + # Basic sanitization for logging + safe_provider = provider[:20] # Limit length + logger.info(f"πŸ”‘ Testing {safe_provider.title()} API key with minimal embedding request...") # Test API key with minimal embedding request - this will fail if key is invalid from ..services.embeddings.embedding_service import create_embedding test_result = await create_embedding(text="test") diff --git a/python/src/server/api_routes/providers_api.py b/python/src/server/api_routes/providers_api.py new file mode 100644 index 0000000000..9c405ecd43 --- /dev/null +++ b/python/src/server/api_routes/providers_api.py @@ -0,0 +1,154 @@ +""" +Provider status API endpoints for testing connectivity + +Handles server-side provider connectivity testing without exposing API keys to frontend. +""" + +import httpx +from fastapi import APIRouter, HTTPException, Path + +from ..config.logfire_config import logfire +from ..services.credential_service import credential_service +# Provider validation - simplified inline version + +router = APIRouter(prefix="/api/providers", tags=["providers"]) + + +async def test_openai_connection(api_key: str) -> bool: + """Test OpenAI API connectivity""" + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get( + "https://api.openai.com/v1/models", + headers={"Authorization": f"Bearer {api_key}"} + ) + return response.status_code == 200 + except Exception as e: + logfire.warning(f"OpenAI connectivity test failed: {e}") + return False + + +async def test_google_connection(api_key: str) -> bool: + """Test Google AI API connectivity""" + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get( + "https://generativelanguage.googleapis.com/v1/models", + headers={"x-goog-api-key": api_key} + ) + return response.status_code == 200 + except Exception: + logfire.warning("Google AI connectivity test failed") + return False + + +async def test_anthropic_connection(api_key: str) -> bool: + """Test Anthropic API connectivity""" + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get( + "https://api.anthropic.com/v1/models", + headers={ + "x-api-key": api_key, + "anthropic-version": "2023-06-01" + } + ) + return response.status_code == 200 + except Exception as e: + logfire.warning(f"Anthropic connectivity test failed: {e}") + return False + + +async def test_openrouter_connection(api_key: str) -> bool: + """Test OpenRouter API connectivity""" + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get( + "https://openrouter.ai/api/v1/models", + headers={"Authorization": f"Bearer {api_key}"} + ) + return response.status_code == 200 + except Exception as e: + logfire.warning(f"OpenRouter connectivity test failed: {e}") + return False + + +async def test_grok_connection(api_key: str) -> bool: + """Test Grok API connectivity""" + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get( + "https://api.x.ai/v1/models", + headers={"Authorization": f"Bearer {api_key}"} + ) + return response.status_code == 200 + except Exception as e: + logfire.warning(f"Grok connectivity test failed: {e}") + return False + + +PROVIDER_TESTERS = { + "openai": test_openai_connection, + "google": test_google_connection, + "anthropic": test_anthropic_connection, + "openrouter": test_openrouter_connection, + "grok": test_grok_connection, +} + + +@router.get("/{provider}/status") +async def get_provider_status( + provider: str = Path( + ..., + description="Provider name to test connectivity for", + regex="^[a-z0-9_]+$", + max_length=20 + ) +): + """Test provider connectivity using server-side API key (secure)""" + try: + # Basic provider validation + allowed_providers = {"openai", "ollama", "google", "openrouter", "anthropic", "grok"} + if provider not in allowed_providers: + raise HTTPException( + status_code=400, + detail=f"Invalid provider '{provider}'. Allowed providers: {sorted(allowed_providers)}" + ) + + # Basic sanitization for logging + safe_provider = provider[:20] # Limit length + logfire.info(f"Testing {safe_provider} connectivity server-side") + + if provider not in PROVIDER_TESTERS: + raise HTTPException( + status_code=400, + detail=f"Provider '{provider}' not supported for connectivity testing" + ) + + # Get API key server-side (never expose to client) + key_name = f"{provider.upper()}_API_KEY" + api_key = await credential_service.get_credential(key_name, decrypt=True) + + if not api_key or not isinstance(api_key, str) or not api_key.strip(): + logfire.info(f"No API key configured for {safe_provider}") + return {"ok": False, "reason": "no_key"} + + # Test connectivity using server-side key + tester = PROVIDER_TESTERS[provider] + is_connected = await tester(api_key) + + logfire.info(f"{safe_provider} connectivity test result: {is_connected}") + return { + "ok": is_connected, + "reason": "connected" if is_connected else "connection_failed", + "provider": provider # Echo back validated provider name + } + + except HTTPException: + # Re-raise HTTP exceptions (they're already properly formatted) + raise + except Exception as e: + # Basic error sanitization for logging + safe_error = str(e)[:100] # Limit length + logfire.error(f"Error testing {provider[:20]} connectivity: {safe_error}") + raise HTTPException(status_code=500, detail={"error": "Internal server error during connectivity test"}) diff --git a/python/src/server/services/embeddings/contextual_embedding_service.py b/python/src/server/services/embeddings/contextual_embedding_service.py index 76f3c59b31..559b7f11b7 100644 --- a/python/src/server/services/embeddings/contextual_embedding_service.py +++ b/python/src/server/services/embeddings/contextual_embedding_service.py @@ -12,6 +12,7 @@ from ...config.logfire_config import search_logger from ..llm_provider_service import get_llm_client from ..threading_service import get_threading_service +from ..credential_service import credential_service async def generate_contextual_embedding( @@ -32,8 +33,6 @@ async def generate_contextual_embedding( """ # Model choice is a RAG setting, get from credential service try: - from ...services.credential_service import credential_service - model_choice = await credential_service.get_credential("MODEL_CHOICE", "gpt-4.1-nano") except Exception as e: # Fallback to environment variable or default @@ -111,7 +110,7 @@ async def process_chunk_with_context( async def _get_model_choice(provider: str | None = None) -> str: - """Get model choice from credential service.""" + """Get model choice from credential service with centralized defaults.""" from ..credential_service import credential_service # Get the active provider configuration @@ -119,31 +118,36 @@ async def _get_model_choice(provider: str | None = None) -> str: model = provider_config.get("chat_model", "").strip() # Strip whitespace provider_name = provider_config.get("provider", "openai") - # Handle empty model case - fallback to provider-specific defaults or explicit config + # Handle empty model case - use centralized defaults if not model: - search_logger.warning(f"chat_model is empty for provider {provider_name}, using fallback logic") - + search_logger.warning(f"chat_model is empty for provider {provider_name}, using centralized defaults") + + # Special handling for Ollama to check specific credential if provider_name == "ollama": - # Try to get OLLAMA_CHAT_MODEL specifically try: ollama_model = await credential_service.get_credential("OLLAMA_CHAT_MODEL") if ollama_model and ollama_model.strip(): model = ollama_model.strip() search_logger.info(f"Using OLLAMA_CHAT_MODEL fallback: {model}") else: - # Use a sensible Ollama default + # Use default for Ollama model = "llama3.2:latest" - search_logger.info(f"Using Ollama default model: {model}") + search_logger.info(f"Using Ollama default: {model}") except Exception as e: search_logger.error(f"Error getting OLLAMA_CHAT_MODEL: {e}") model = "llama3.2:latest" - search_logger.info(f"Using Ollama fallback model: {model}") - elif provider_name == "google": - model = "gemini-1.5-flash" + search_logger.info(f"Using Ollama fallback: {model}") else: - # OpenAI or other providers - model = "gpt-4o-mini" - + # Use provider-specific defaults + provider_defaults = { + "openai": "gpt-4o-mini", + "openrouter": "anthropic/claude-3.5-sonnet", + "google": "gemini-1.5-flash", + "anthropic": "claude-3-5-haiku-20241022", + "grok": "grok-3-mini" + } + model = provider_defaults.get(provider_name, "gpt-4o-mini") + search_logger.debug(f"Using default model for provider {provider_name}: {model}") search_logger.debug(f"Using model from credential service: {model}") return model diff --git a/python/src/server/services/llm_provider_service.py b/python/src/server/services/llm_provider_service.py index ca99b08d63..1161939f41 100644 --- a/python/src/server/services/llm_provider_service.py +++ b/python/src/server/services/llm_provider_service.py @@ -16,28 +16,298 @@ logger = get_logger(__name__) -# Settings cache with TTL -_settings_cache: dict[str, tuple[Any, float]] = {} + +# Basic validation functions to avoid circular imports +def _is_valid_provider(provider: str) -> bool: + """Basic provider validation.""" + if not provider or not isinstance(provider, str): + return False + return provider.lower() in {"openai", "ollama", "google", "openrouter", "anthropic", "grok"} + + +def _sanitize_for_log(text: str) -> str: + """Basic text sanitization for logging.""" + if not text: + return "" + import re + sanitized = re.sub(r"sk-[a-zA-Z0-9-_]{20,}", "[REDACTED]", text) + sanitized = re.sub(r"xai-[a-zA-Z0-9-_]{20,}", "[REDACTED]", sanitized) + return sanitized[:100] + + +# Secure settings cache with TTL and validation +_settings_cache: dict[str, tuple[Any, float, str]] = {} # value, timestamp, checksum _CACHE_TTL_SECONDS = 300 # 5 minutes +_cache_access_log: list[dict] = [] # Track cache access patterns for security monitoring + + +def _calculate_cache_checksum(value: Any) -> str: + """Calculate checksum for cache entry integrity validation.""" + import hashlib + import json + + # Convert value to JSON string for consistent hashing + try: + value_str = json.dumps(value, sort_keys=True, default=str) + return hashlib.sha256(value_str.encode()).hexdigest()[:16] # First 16 chars for efficiency + except Exception: + # Fallback for non-serializable objects + return hashlib.sha256(str(value).encode()).hexdigest()[:16] + + +def _log_cache_access(key: str, action: str, hit: bool = None, security_event: str = None) -> None: + """Log cache access for security monitoring.""" + + access_entry = { + "timestamp": time.time(), + "key": _sanitize_for_log(key), + "action": action, # "get", "set", "invalidate", "clear" + "hit": hit, # For get operations + "security_event": security_event # "checksum_mismatch", "expired", etc. + } + + # Keep only last 100 access entries to prevent memory growth + _cache_access_log.append(access_entry) + if len(_cache_access_log) > 100: + _cache_access_log.pop(0) + + # Log security events at warning level + if security_event: + safe_key = _sanitize_for_log(key) + logger.warning(f"Cache security event: {security_event} for key '{safe_key}'") def _get_cached_settings(key: str) -> Any | None: - """Get cached settings if not expired.""" - if key in _settings_cache: - value, timestamp = _settings_cache[key] - if time.time() - timestamp < _CACHE_TTL_SECONDS: + """Get cached settings if not expired and valid.""" + + try: + if key in _settings_cache: + value, timestamp, stored_checksum = _settings_cache[key] + current_time = time.time() + + # Check expiration with strict TTL enforcement + if current_time - timestamp >= _CACHE_TTL_SECONDS: + # Expired, remove from cache + del _settings_cache[key] + _log_cache_access(key, "get", hit=False, security_event="expired") + return None + + # Verify cache entry integrity + current_checksum = _calculate_cache_checksum(value) + if current_checksum != stored_checksum: + # Cache tampering detected, remove entry + del _settings_cache[key] + _log_cache_access(key, "get", hit=False, security_event="checksum_mismatch") + logger.error(f"Cache integrity violation detected for key: {_sanitize_for_log(key)}") + return None + + # Additional validation for provider configurations + if "provider_config" in key and isinstance(value, dict): + # Basic validation: check required fields + if not value.get("provider") or not _is_valid_provider(value.get("provider")): + # Invalid configuration in cache, remove it + del _settings_cache[key] + _log_cache_access(key, "get", hit=False, security_event="invalid_config") + return None + + _log_cache_access(key, "get", hit=True) return value - else: - # Expired, remove from cache - del _settings_cache[key] - return None + + _log_cache_access(key, "get", hit=False) + return None + + except Exception as e: + # Cache access error, log and return None for safety + _log_cache_access(key, "get", hit=False, security_event=f"access_error: {str(e)}") + return None def _set_cached_settings(key: str, value: Any) -> None: - """Cache settings with current timestamp.""" - _settings_cache[key] = (value, time.time()) + """Cache settings with current timestamp and integrity checksum.""" + + try: + # Validate provider configurations before caching + if "provider_config" in key and isinstance(value, dict): + # Basic validation: check required fields + if not value.get("provider") or not _is_valid_provider(value.get("provider")): + _log_cache_access(key, "set", security_event="invalid_config_rejected") + logger.warning(f"Rejected caching of invalid provider config for key: {_sanitize_for_log(key)}") + return + + # Calculate integrity checksum + checksum = _calculate_cache_checksum(value) + + # Store with timestamp and checksum + _settings_cache[key] = (value, time.time(), checksum) + _log_cache_access(key, "set") + + except Exception as e: + _log_cache_access(key, "set", security_event=f"set_error: {str(e)}") + logger.error(f"Failed to cache settings for key {_sanitize_for_log(key)}: {e}") + + +def clear_provider_cache() -> None: + """Clear the provider configuration cache to force refresh on next request.""" + global _settings_cache + + cache_size_before = len(_settings_cache) + _settings_cache.clear() + _log_cache_access("*", "clear") + logger.debug(f"Provider configuration cache cleared ({cache_size_before} entries removed)") + + +def invalidate_provider_cache(provider: str = None) -> None: + """ + Invalidate specific provider cache entries or all cache entries. + + Args: + provider: Optional provider name to invalidate. If None, clears all cache. + """ + global _settings_cache + + if provider is None: + # Clear entire cache + cache_size_before = len(_settings_cache) + _settings_cache.clear() + _log_cache_access("*", "invalidate") + logger.debug(f"All provider cache entries invalidated ({cache_size_before} entries)") + else: + # Validate provider name before processing + if not _is_valid_provider(provider): + _log_cache_access(provider, "invalidate", security_event="invalid_provider_name") + logger.warning(f"Rejected cache invalidation for invalid provider: {_sanitize_for_log(provider)}") + return + + # Clear specific provider entries + keys_to_remove = [] + for key in _settings_cache.keys(): + if provider in key: + keys_to_remove.append(key) + + for key in keys_to_remove: + del _settings_cache[key] + _log_cache_access(key, "invalidate") + + safe_provider = _sanitize_for_log(provider) + logger.debug(f"Cache entries for provider '{safe_provider}' invalidated: {len(keys_to_remove)} entries removed") + + +def get_cache_stats() -> dict[str, Any]: + """ + Get cache statistics with security metrics for monitoring and debugging. + + Returns: + Dictionary containing cache statistics and security metrics + """ + global _settings_cache, _cache_access_log + current_time = time.time() + + stats = { + "total_entries": len(_settings_cache), + "fresh_entries": 0, + "stale_entries": 0, + "cache_hit_potential": 0.0, + "security_metrics": { + "integrity_violations": 0, + "expired_access_attempts": 0, + "invalid_config_rejections": 0, + "access_errors": 0, + "total_security_events": 0 + }, + "access_patterns": { + "recent_cache_hits": 0, + "recent_cache_misses": 0, + "hit_rate": 0.0 + } + } + + # Analyze cache entries + for key, (value, timestamp, checksum) in _settings_cache.items(): + age = current_time - timestamp + if age < _CACHE_TTL_SECONDS: + stats["fresh_entries"] += 1 + else: + stats["stale_entries"] += 1 + + if stats["total_entries"] > 0: + stats["cache_hit_potential"] = stats["fresh_entries"] / stats["total_entries"] + + # Analyze security events from access log + recent_threshold = current_time - 3600 # Last hour + recent_hits = 0 + recent_misses = 0 + + for access in _cache_access_log: + if access["timestamp"] >= recent_threshold: + if access["action"] == "get": + if access["hit"]: + recent_hits += 1 + else: + recent_misses += 1 + # Count security events + if access["security_event"]: + stats["security_metrics"]["total_security_events"] += 1 + if "checksum_mismatch" in access["security_event"]: + stats["security_metrics"]["integrity_violations"] += 1 + elif "expired" in access["security_event"]: + stats["security_metrics"]["expired_access_attempts"] += 1 + elif "invalid_config" in access["security_event"]: + stats["security_metrics"]["invalid_config_rejections"] += 1 + elif "error" in access["security_event"]: + stats["security_metrics"]["access_errors"] += 1 + + # Calculate hit rate + total_recent_access = recent_hits + recent_misses + if total_recent_access > 0: + stats["access_patterns"]["hit_rate"] = recent_hits / total_recent_access + + stats["access_patterns"]["recent_cache_hits"] = recent_hits + stats["access_patterns"]["recent_cache_misses"] = recent_misses + + return stats + + +def get_cache_security_report() -> dict[str, Any]: + """ + Get detailed security report for cache monitoring. + + Returns: + Detailed security analysis of cache operations + """ + global _cache_access_log + current_time = time.time() + + report = { + "timestamp": current_time, + "analysis_period_hours": 1, + "security_events": [], + "recommendations": [] + } + + # Extract security events from last hour + recent_threshold = current_time - 3600 + security_events = [ + access for access in _cache_access_log + if access["timestamp"] >= recent_threshold and access["security_event"] + ] + + report["security_events"] = security_events + + # Generate recommendations based on security events + if len(security_events) > 10: + report["recommendations"].append("High number of security events detected - investigate potential attacks") + + integrity_violations = sum(1 for event in security_events if "checksum_mismatch" in event.get("security_event", "")) + if integrity_violations > 0: + report["recommendations"].append(f"Cache integrity violations detected ({integrity_violations}) - check for memory corruption or attacks") + + invalid_configs = sum(1 for event in security_events if "invalid_config" in event.get("security_event", "")) + if invalid_configs > 3: + report["recommendations"].append(f"Multiple invalid configuration attempts ({invalid_configs}) - validate data sources") + + return report @asynccontextmanager async def get_llm_client(provider: str | None = None, use_embedding_provider: bool = False, instance_type: str | None = None, base_url: str | None = None): @@ -97,43 +367,68 @@ async def get_llm_client(provider: str | None = None, use_embedding_provider: bo # For Ollama, don't use the base_url from config - let _get_optimal_ollama_instance decide base_url = provider_config["base_url"] if provider_name != "ollama" else None - # Validate provider name - allowed_providers = {"openai", "ollama", "google", "openrouter", "anthropic", "grok"} - if provider_name not in allowed_providers: - raise ValueError(f"Unsupported provider: {provider_name}. Allowed: {allowed_providers}") + # Comprehensive provider validation with security checks + if not _is_valid_provider(provider_name): + raise ValueError(f"Provider validation failed: invalid provider '{provider_name}'") - # Validate API key format for security - if api_key and len(api_key.strip()) == 0: - api_key = None # Treat empty strings as None + # Validate API key format for security (prevent injection) + if api_key: + if len(api_key.strip()) == 0: + api_key = None # Treat empty strings as None + elif len(api_key) > 500: # Reasonable API key length limit + raise ValueError("API key length exceeds security limits") + # Additional security: check for suspicious patterns + if any(char in api_key for char in ['\n', '\r', '\t', '\0']): + raise ValueError("API key contains invalid characters") - logger.info(f"Creating LLM client for provider: {provider_name}") + # Sanitize provider name for logging + safe_provider_name = _sanitize_for_log(provider_name) + logger.info(f"Creating LLM client for provider: {safe_provider_name}") if provider_name == "openai": if not api_key: - # Check if Ollama instances are available as fallback - logger.warning("OpenAI API key not found, attempting Ollama fallback") + # Check if Ollama fallback is explicitly enabled (fail fast principle) try: - # Try to get an optimal Ollama instance for fallback - ollama_base_url = await _get_optimal_ollama_instance( - instance_type="embedding" if use_embedding_provider else "chat", - use_embedding_provider=use_embedding_provider - ) - if ollama_base_url: - logger.info(f"Falling back to Ollama instance: {ollama_base_url}") - provider_name = "ollama" - api_key = "ollama" # Ollama doesn't need a real API key - base_url = ollama_base_url - # Create Ollama client after fallback - client = openai.AsyncOpenAI( - api_key="ollama", - base_url=ollama_base_url, + enable_fallback = await credential_service.get_credential("ENABLE_OLLAMA_FALLBACK", "false") + enable_fallback = enable_fallback.lower() == "true" + except Exception: + enable_fallback = False # Default to false for fail-fast behavior + + if enable_fallback: + logger.warning("OpenAI API key not found, attempting configured Ollama fallback") + try: + # Try to get an optimal Ollama instance for fallback + ollama_base_url = await _get_optimal_ollama_instance( + instance_type="embedding" if use_embedding_provider else "chat", + use_embedding_provider=use_embedding_provider ) - logger.info(f"Ollama fallback client created successfully with base URL: {ollama_base_url}") - else: - raise ValueError("OpenAI API key not found and no Ollama instances available") - except Exception as ollama_error: - logger.error(f"Ollama fallback failed: {ollama_error}") - raise ValueError("OpenAI API key not found and Ollama fallback failed") from ollama_error + if ollama_base_url: + logger.info(f"Falling back to Ollama instance: {ollama_base_url}") + provider_name = "ollama" + api_key = "ollama" # Ollama doesn't need a real API key + base_url = ollama_base_url + # Create Ollama client after fallback + client = openai.AsyncOpenAI( + api_key="ollama", + base_url=ollama_base_url, + ) + logger.info(f"Ollama fallback client created successfully with base URL: {ollama_base_url}") + else: + raise ValueError("OpenAI API key not found and no Ollama instances available for fallback") + except Exception as ollama_error: + logger.error(f"Configured Ollama fallback failed: {ollama_error}") + raise ValueError("OpenAI API key not found and configured Ollama fallback failed") from ollama_error + else: + # Fail fast and loud - provide clear instructions + error_msg = ( + "OpenAI API key not found. To fix this:\n" + "1. Set OPENAI_API_KEY environment variable, OR\n" + "2. Configure OpenAI API key in the UI settings, OR\n" + "3. Enable Ollama fallback by setting ENABLE_OLLAMA_FALLBACK=true\n" + "Current provider configuration requires a valid OpenAI API key." + ) + logger.error(error_msg) + raise ValueError(error_msg) else: # Only create OpenAI client if we have an API key (didn't fallback to Ollama) client = openai.AsyncOpenAI(api_key=api_key) @@ -186,7 +481,19 @@ async def get_llm_client(provider: str | None = None, use_embedding_provider: bo elif provider_name == "grok": if not api_key: - raise ValueError("Grok API key not found") + raise ValueError("Grok API key not found - set GROK_API_KEY environment variable") + + # Enhanced Grok API key validation (secure - no key fragments logged) + key_format_valid = api_key.startswith("xai-") + key_length_valid = len(api_key) >= 20 + + if not key_format_valid: + logger.warning("Grok API key format validation failed - should start with 'xai-'") + + if not key_length_valid: + logger.warning("Grok API key validation failed - insufficient length") + + logger.debug(f"Grok API key validation: format_valid={key_format_valid}, length_valid={key_length_valid}") client = openai.AsyncOpenAI( api_key=api_key, @@ -288,14 +595,20 @@ async def get_embedding_model(provider: str | None = None) -> str: provider_name = provider_config["provider"] custom_model = provider_config["embedding_model"] - # Validate provider name - allowed_providers = {"openai", "ollama", "google", "openrouter", "anthropic", "grok"} - if provider_name not in allowed_providers: - logger.warning(f"Unknown embedding provider: {provider_name}, falling back to OpenAI") + # Comprehensive provider validation for embeddings + if not _is_valid_provider(provider_name): + safe_provider = _sanitize_for_log(provider_name) + logger.warning(f"Invalid embedding provider: {safe_provider}, falling back to OpenAI") provider_name = "openai" - # Use custom model if specified - if custom_model: - return custom_model + # Use custom model if specified (with validation) + if custom_model and len(custom_model.strip()) > 0: + custom_model = custom_model.strip() + # Basic model name validation (check length and basic characters) + if len(custom_model) <= 100 and not any(char in custom_model for char in ['\n', '\r', '\t', '\0']): + return custom_model + else: + safe_model = _sanitize_for_log(custom_model) + logger.warning(f"Invalid custom embedding model '{safe_model}' for provider '{provider_name}', using default") # Return provider-specific defaults if provider_name == "openai": @@ -305,7 +618,16 @@ async def get_embedding_model(provider: str | None = None) -> str: return "nomic-embed-text" elif provider_name == "google": # Google's embedding model - return "text-embedding-004" + return "gemini-embedding-001" + elif provider_name == "openrouter": + # OpenRouter supports OpenAI embedding models + return "text-embedding-3-small" + elif provider_name == "anthropic": + # Anthropic doesn't have native embedding models, fallback to OpenAI + return "text-embedding-3-small" + elif provider_name == "grok": + # Grok doesn't have embedding models yet, fallback to OpenAI + return "text-embedding-3-small" else: # Fallback to OpenAI's model return "text-embedding-3-small" @@ -426,3 +748,81 @@ async def validate_provider_instance(provider: str, instance_url: str | None = N "error_message": str(e), "validation_timestamp": time.time() } + + +def requires_max_completion_tokens(model_name: str) -> bool: + """ + Check if a model requires max_completion_tokens instead of max_tokens. + + OpenAI changed the parameter for reasoning models (o1, o3, GPT-5 series) + introduced in September 2024. + + Args: + model_name: The model name to check + + Returns: + True if the model requires max_completion_tokens, False otherwise + """ + if not model_name: + return False + + # Normalize to lowercase for comparison + model_lower = model_name.lower() + + # Models that require max_completion_tokens (reasoning models) + reasoning_model_prefixes = [ + "o1", # o1, o1-mini, o1-preview, etc. + "o3", # o3, o3-mini, etc. + "gpt-5", # gpt-5, gpt-5-nano, gpt-5-mini, etc. + "grok-3", # grok-3 series are reasoning models + ] + + # Check for reasoning models (including OpenRouter prefixed models) + for prefix in reasoning_model_prefixes: + if model_lower.startswith(prefix): + return True + # Also check for OpenRouter format: "openai/gpt-5-nano", "openai/o1-mini", etc. + if f"openai/{prefix}" in model_lower: + return True + + return False + + +def prepare_chat_completion_params(model: str, params: dict) -> dict: + """ + Convert parameters for compatibility with reasoning models (GPT-5, o1, o3 series). + + OpenAI made several API changes for reasoning models: + 1. max_tokens β†’ max_completion_tokens + 2. temperature must be 1.0 (default) - custom values not supported + + This ensures compatibility with OpenAI's API changes for newer models + while maintaining backward compatibility for existing models. + + Args: + model: The model name being used + params: Dictionary of API parameters + + Returns: + Updated parameters dictionary with correct parameters for the model + """ + if not model or not params: + return params + + # Make a copy to avoid modifying the original + updated_params = params.copy() + + is_reasoning_model = requires_max_completion_tokens(model) + + # Convert max_tokens to max_completion_tokens for reasoning models + if is_reasoning_model and "max_tokens" in updated_params: + max_tokens_value = updated_params.pop("max_tokens") + updated_params["max_completion_tokens"] = max_tokens_value + logger.debug(f"Converted max_tokens to max_completion_tokens for model {model}") + + # Remove custom temperature for reasoning models (they only support default temperature=1.0) + if is_reasoning_model and "temperature" in updated_params: + original_temp = updated_params.pop("temperature") + logger.debug(f"Removed custom temperature {original_temp} for reasoning model {model} (only supports default temperature=1.0)") + + return updated_params diff --git a/python/src/server/services/storage/code_storage_service.py b/python/src/server/services/storage/code_storage_service.py index ece5ea1007..a8518c151f 100644 --- a/python/src/server/services/storage/code_storage_service.py +++ b/python/src/server/services/storage/code_storage_service.py @@ -19,23 +19,93 @@ from ...config.logfire_config import search_logger from ..embeddings.contextual_embedding_service import generate_contextual_embeddings_batch from ..embeddings.embedding_service import create_embeddings_batch +from ..llm_provider_service import get_llm_client, prepare_chat_completion_params, requires_max_completion_tokens +from ..credential_service import credential_service -def _get_model_choice() -> str: - """Get MODEL_CHOICE with direct fallback.""" +def _is_reasoning_model(model: str) -> bool: + """ + Check if a model is a reasoning model that may return empty responses. + + Args: + model: The model identifier + + Returns: + True if the model is a reasoning model (GPT-5, o1, o3 series) + """ + return requires_max_completion_tokens(model) + + +def _supports_response_format(provider: str, model: str) -> bool: + """ + Determine if a specific provider/model combination supports response_format. + + Args: + provider: The LLM provider name + model: The model identifier + + Returns: + True if the model supports structured JSON output via response_format + """ + if not provider: + return True # Default to supporting it + + provider = provider.lower() + + if provider == "openai": + return True # OpenAI models generally support response_format + elif provider == "openrouter": + # OpenRouter: "OpenAI models, Nitro models, and some others" support it + model_lower = model.lower() + + # Known compatible model patterns on OpenRouter + compatible_patterns = [ + "openai/", # OpenAI models on OpenRouter + "gpt-", # GPT models + "nitro/", # Nitro models + "deepseek/", # DeepSeek models often support JSON + "google/", # Some Google models support it + ] + + for pattern in compatible_patterns: + if pattern in model_lower: + search_logger.debug(f"Model {model} supports response_format (pattern: {pattern})") + return True + + search_logger.debug(f"Model {model} may not support response_format, skipping") + return False + else: + # Conservative approach for other providers + return False + + +async def _get_model_choice() -> str: + """Get MODEL_CHOICE with provider-aware defaults from centralized service.""" try: - # Direct cache/env fallback - from ..credential_service import credential_service + # Get the active provider configuration + provider_config = await credential_service.get_active_provider("llm") + active_provider = provider_config.get("provider", "openai") + model = provider_config.get("chat_model") + + # If no custom model is set, use provider-specific defaults + if not model or model.strip() == "": + # Provider-specific defaults + provider_defaults = { + "openai": "gpt-4o-mini", + "openrouter": "anthropic/claude-3.5-sonnet", + "google": "gemini-1.5-flash", + "ollama": "llama3.2:latest", + "anthropic": "claude-3-5-haiku-20241022", + "grok": "grok-3-mini" + } + model = provider_defaults.get(active_provider, "gpt-4o-mini") + search_logger.debug(f"Using default model for provider {active_provider}: {model}") - if credential_service._cache_initialized and "MODEL_CHOICE" in credential_service._cache: - model = credential_service._cache["MODEL_CHOICE"] - else: - model = os.getenv("MODEL_CHOICE", "gpt-4.1-nano") - search_logger.debug(f"Using model choice: {model}") + search_logger.debug(f"Using model for provider {active_provider}: {model}") return model except Exception as e: search_logger.warning(f"Error getting model choice: {e}, using default") - return "gpt-4.1-nano" + return "gpt-4o-mini" def _get_max_workers() -> int: @@ -155,6 +225,130 @@ def score_block(block): return best_block +def _should_attempt_fallback(provider: str, model: str, is_reasoning: bool, error_context: dict) -> bool: + """ + Determine if fallback should be attempted based on error type and configuration. + + Args: + provider: The LLM provider name + model: The model identifier + is_reasoning: Whether this is a reasoning model + error_context: Context about the error that occurred + + Returns: + True if fallback should be attempted + """ + # Check for environment variable to disable fallbacks (fail-fast mode) + if os.getenv("DISABLE_LLM_FALLBACKS", "false").lower() == "true": + search_logger.debug("LLM fallbacks disabled by DISABLE_LLM_FALLBACKS environment variable") + return False + + # Only attempt fallback for specific provider/model combinations + fallback_eligible_providers = ["grok", "openai"] # Providers that support fallback + + if provider not in fallback_eligible_providers: + search_logger.debug(f"Provider {provider} not eligible for fallback") + return False + + # Only allow fallback for empty responses, not other error types + if error_context.get("response_type") != "empty_content": + search_logger.debug(f"Error type {error_context.get('response_type')} not eligible for fallback") + return False + + # Allow fallback for Grok and reasoning models that commonly have empty responses + if provider == "grok" or is_reasoning: + search_logger.debug(f"Fallback enabled for {provider}/{model} (reasoning: {is_reasoning})") + return True + + return False + + +async def _attempt_single_fallback( + original_model: str, + original_provider: str, + is_reasoning: bool, + original_params: dict, + error_context: dict +) -> str | None: + """ + Attempt a single fallback to gpt-4o-mini with structured tracking. + + Args: + original_model: The original model that failed + original_provider: The original provider that failed + is_reasoning: Whether original was a reasoning model + original_params: The original request parameters + error_context: Context about the original error + + Returns: + Response content if fallback succeeded, None if failed + """ + fallback_start_time = time.time() + + # Always fallback to reliable gpt-4o-mini + fallback_model = "gpt-4o-mini" + fallback_provider = "openai" + + fallback_context = { + "original_model": original_model, + "original_provider": original_provider, + "fallback_model": fallback_model, + "fallback_provider": fallback_provider, + "original_error": error_context, + "fallback_attempt_time": time.time() + } + + search_logger.info(f"Attempting single fallback: {original_model} β†’ {fallback_model}") + + try: + # Prepare fallback parameters (simplified, no JSON format to avoid issues) + fallback_params = { + "model": fallback_model, + "messages": original_params["messages"], + "max_tokens": min(original_params.get("max_tokens", 500), 500), # Cap for reliability + "temperature": original_params.get("temperature", 0.3) + } + + # No response_format for fallback to maximize reliability + if "response_format" in fallback_params: + del fallback_params["response_format"] + + async with get_llm_client(provider=fallback_provider) as fallback_client: + fallback_response = await fallback_client.chat.completions.create(**fallback_params) + fallback_content = fallback_response.choices[0].message.content + + if fallback_content and fallback_content.strip(): + fallback_time = time.time() - fallback_start_time + fallback_success = { + **fallback_context, + "fallback_succeeded": True, + "fallback_time": f"{fallback_time:.2f}s", + "fallback_content_length": len(fallback_content.strip()) + } + search_logger.info(f"Fallback success: {fallback_success}") + return fallback_content.strip() + else: + # Fallback returned empty - log and return None + fallback_failure = { + **fallback_context, + "fallback_succeeded": False, + "fallback_error": "empty_response" + } + search_logger.error(f"Fallback returned empty response: {fallback_failure}") + return None + + except Exception as e: + fallback_time = time.time() - fallback_start_time + fallback_error = { + **fallback_context, + "fallback_succeeded": False, + "fallback_error": str(e), + "fallback_time": f"{fallback_time:.2f}s" + } + search_logger.error(f"Fallback exception: {fallback_error}") + return None + + def extract_code_blocks(markdown_content: str, min_length: int = None) -> list[dict[str, Any]]: """ Extract code blocks from markdown content along with context. @@ -168,8 +362,6 @@ def extract_code_blocks(markdown_content: str, min_length: int = None) -> list[d """ # Load all code extraction settings with direct fallback try: - from ...services.credential_service import credential_service - def _get_setting_fallback(key: str, default: str) -> str: if credential_service._cache_initialized and key in credential_service._cache: return credential_service._cache[key] @@ -570,7 +762,172 @@ async def _generate_code_example_summary_async( temperature=0.3, ) - response_content = response.choices[0].message.content.strip() + # Try to use response_format, but handle gracefully if not supported + # Note: Grok reasoning models don't work well with response_format + if provider in ["openai", "google", "anthropic"] or (provider == "openrouter" and model_choice.startswith("openai/")): + request_params["response_format"] = {"type": "json_object"} + + # Grok-specific parameter validation and filtering + if provider == "grok": + # Remove any parameters that Grok reasoning models don't support + # Based on xAI docs: presencePenalty, frequencyPenalty, stop are not supported + unsupported_params = ["presence_penalty", "frequency_penalty", "stop", "reasoning_effort"] + for param in unsupported_params: + if param in request_params: + removed_value = request_params.pop(param) + search_logger.warning(f"Removed unsupported Grok parameter '{param}': {removed_value}") + + # Validate that we're using supported parameters only + supported_params = ["model", "messages", "max_tokens", "temperature", "response_format", "stream", "tools", "tool_choice"] + for param in request_params: + if param not in supported_params: + search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models") + + start_time = time.time() # Initialize for all models + + if provider == "grok" or is_reasoning: + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + search_logger.debug(f"{model_type} request params: {request_params}") + search_logger.debug(f"{model_type} prompt length: {len(prompt)} characters") + search_logger.debug(f"{model_type} prompt preview: {prompt[:200]}...") + + # Simplified retry logic - reduced from 3 to 2 retries to surface issues faster + max_retries = 2 if (provider == "grok" or is_reasoning) else 1 + retry_delay = 1.0 # Start with 1 second delay + failure_reasons = [] # Track failure reasons for circuit breaker analysis + + for attempt in range(max_retries): + try: + if provider == "grok" and attempt > 0: + search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay") + await asyncio.sleep(retry_delay) + + response = await client.chat.completions.create(**request_params) + + # Check for empty response - handle Grok reasoning models + message = response.choices[0].message if response.choices else None + response_content = None + + # Enhanced Grok debugging - log both content fields + if provider == "grok" and message: + content_preview = message.content[:100] if message.content else "None" + reasoning_preview = getattr(message, 'reasoning_content', 'N/A')[:100] if hasattr(message, 'reasoning_content') and getattr(message, 'reasoning_content') else "None" + search_logger.debug(f"Grok response fields - content: '{content_preview}', reasoning_content: '{reasoning_preview}'") + + if message: + # For Grok reasoning models, check content first, then reasoning_content + if provider == "grok": + # First try content (where final answer should be) + if message.content and message.content.strip(): + response_content = message.content.strip() + search_logger.debug(f"Grok using content field: {len(response_content)} chars") + # Fallback to reasoning_content if content is empty + elif hasattr(message, 'reasoning_content') and message.reasoning_content: + response_content = message.reasoning_content.strip() + search_logger.debug(f"Grok fallback to reasoning_content: {len(response_content)} chars") + else: + search_logger.debug(f"Grok no content in either field: content='{message.content}', reasoning_content='{getattr(message, 'reasoning_content', 'N/A')}'") + elif message.content: + response_content = message.content + else: + search_logger.debug(f"No content in message: content={message.content}, reasoning_content={getattr(message, 'reasoning_content', 'N/A')}") + + if response_content and response_content.strip(): + # Success - break out of retry loop + if provider == "grok" and attempt > 0: + search_logger.info(f"Grok request succeeded on attempt {attempt + 1}") + break + elif (provider == "grok" or is_reasoning) and attempt < max_retries - 1: + # Empty response from Grok or reasoning models - track failure and retry + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + failure_reason = f"empty_response_attempt_{attempt + 1}" + failure_reasons.append(failure_reason) + + search_logger.warning(f"{model_type} empty response on attempt {attempt + 1}, retrying...") + + retry_delay *= 2 # Exponential backoff + continue + else: + # Final attempt failed or not Grok - handle below + break + + except Exception as e: + if (provider == "grok" or is_reasoning) and attempt < max_retries - 1: + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + failure_reason = f"exception_attempt_{attempt + 1}_{type(e).__name__}" + failure_reasons.append(failure_reason) + + search_logger.error(f"{model_type} request failed on attempt {attempt + 1}: {e}, retrying...") + retry_delay *= 2 + continue + else: + # Re-raise on final attempt or non-Grok/reasoning providers + if failure_reasons: + # Add structured failure analysis for circuit breaker pattern + failure_analysis = { + "total_attempts": attempt + 1, + "failure_pattern": failure_reasons, + "final_error": str(e), + "model": model_choice, + "provider": provider + } + search_logger.error(f"Circuit breaker analysis: {failure_analysis}") + raise + + # Log timing for Grok requests + if provider == "grok": + elapsed_time = time.time() - start_time + search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s") + + # Handle empty response with streamlined fallback logic + if not response_content: + # Structured error analysis for debugging + error_context = { + "model": model_choice, + "provider": provider, + "request_time": f"{elapsed_time:.2f}s" if 'elapsed_time' in locals() else "unknown", + "response_type": "empty_content", + "response_choices_count": len(response.choices) if response.choices else 0 + } + search_logger.error(f"Empty response from LLM: {error_context}") + # Determine if fallback should be attempted based on error type and configuration + should_fallback = _should_attempt_fallback(provider, model_choice, is_reasoning, error_context) + + if should_fallback: + # Single fallback attempt with tracking + fallback_result = await _attempt_single_fallback( + model_choice, provider, is_reasoning, request_params, error_context + ) + if fallback_result: + response_content = fallback_result + search_logger.info(f"Fallback succeeded for {model_choice}") + else: + # Log fallback failure analysis for circuit breaker patterns + fallback_failure = { + "original_model": model_choice, + "original_provider": provider, + "fallback_attempted": True, + "fallback_succeeded": False, + "error_context": error_context + } + elif (provider == "grok" or is_reasoning) and attempt < max_retries - 1: + # Empty response from Grok or reasoning models - track failure and retry + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + failure_reason = f"empty_response_attempt_{attempt + 1}" + failure_reasons.append(failure_reason) + + search_logger.warning(f"{model_type} empty response on attempt {attempt + 1}, retrying...") + + else: + # No fallback attempted - fail fast with detailed context + search_logger.error(f"No fallback configured for {provider}/{model_choice} - failing fast") + raise ValueError(f"Empty response from {model_choice} (provider: {provider}). Check: API key validity, rate limits, model availability") + + if not response_content: + # This should not happen after fallback logic, but safety check + raise ValueError("No valid response content after all attempts") + + response_content = response_content.strip() search_logger.debug(f"LLM API response: {repr(response_content[:200])}...") result = json.loads(response_content) @@ -627,8 +984,6 @@ async def generate_code_summaries_batch( # Get max_workers from settings if not provided if max_workers is None: try: - from ...services.credential_service import credential_service - if ( credential_service._cache_initialized and "CODE_SUMMARY_MAX_WORKERS" in credential_service._cache @@ -759,8 +1114,6 @@ async def add_code_examples_to_supabase( # Check if contextual embeddings are enabled try: - from ..credential_service import credential_service - use_contextual_embeddings = credential_service._cache.get("USE_CONTEXTUAL_EMBEDDINGS") if isinstance(use_contextual_embeddings, str): use_contextual_embeddings = use_contextual_embeddings.lower() == "true" From 9a7a34d660bb8cf9719157ce0ad3ffc14d01f977 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Fri, 19 Sep 2025 00:11:04 -0500 Subject: [PATCH 03/28] fully working model providers, addressing securtiy and code related concerns, throughly hardening our code --- .../contextual_embedding_service.py | 32 ++-- .../server/services/llm_provider_service.py | 2 +- .../services/storage/code_storage_service.py | 160 +++++++++++++----- 3 files changed, 135 insertions(+), 59 deletions(-) diff --git a/python/src/server/services/embeddings/contextual_embedding_service.py b/python/src/server/services/embeddings/contextual_embedding_service.py index 559b7f11b7..f9aec36617 100644 --- a/python/src/server/services/embeddings/contextual_embedding_service.py +++ b/python/src/server/services/embeddings/contextual_embedding_service.py @@ -10,7 +10,7 @@ import openai from ...config.logfire_config import search_logger -from ..llm_provider_service import get_llm_client +from ..llm_provider_service import get_llm_client, prepare_chat_completion_params, requires_max_completion_tokens from ..threading_service import get_threading_service from ..credential_service import credential_service @@ -64,18 +64,21 @@ async def generate_contextual_embedding( # Get model from provider configuration model = await _get_model_choice(provider) - response = await client.chat.completions.create( - model=model, - messages=[ + # Prepare parameters and convert max_tokens for GPT-5/reasoning models + params = { + "model": model, + "messages": [ { "role": "system", "content": "You are a helpful assistant that provides concise contextual information.", }, {"role": "user", "content": prompt}, ], - temperature=0.3, - max_tokens=200, - ) + "temperature": 0.3, + "max_tokens": 1200 if requires_max_completion_tokens(model) else 200, # Much more tokens for reasoning models (GPT-5 needs extra for reasoning process) + } + final_params = prepare_chat_completion_params(model, params) + response = await client.chat.completions.create(**final_params) context = response.choices[0].message.content.strip() contextual_text = f"{context}\n---\n{chunk}" @@ -192,18 +195,21 @@ async def generate_contextual_embeddings_batch( batch_prompt += "For each chunk, provide a short succinct context to situate it within the overall document for improving search retrieval. Format your response as:\\nCHUNK 1: [context]\\nCHUNK 2: [context]\\netc." # Make single API call for ALL chunks - response = await client.chat.completions.create( - model=model_choice, - messages=[ + # Prepare parameters and convert max_tokens for GPT-5/reasoning models + batch_params = { + "model": model_choice, + "messages": [ { "role": "system", "content": "You are a helpful assistant that generates contextual information for document chunks.", }, {"role": "user", "content": batch_prompt}, ], - temperature=0, - max_tokens=100 * len(chunks), # Limit response size - ) + "temperature": 0, + "max_tokens": (600 if requires_max_completion_tokens(model_choice) else 100) * len(chunks), # Much more tokens for reasoning models (GPT-5 needs extra reasoning space) + } + final_batch_params = prepare_chat_completion_params(model_choice, batch_params) + response = await client.chat.completions.create(**final_batch_params) # Parse response response_text = response.choices[0].message.content diff --git a/python/src/server/services/llm_provider_service.py b/python/src/server/services/llm_provider_service.py index 1161939f41..88cfd2219a 100644 --- a/python/src/server/services/llm_provider_service.py +++ b/python/src/server/services/llm_provider_service.py @@ -500,6 +500,7 @@ async def get_llm_client(provider: str | None = None, use_embedding_provider: bo base_url=base_url or "https://api.x.ai/v1", ) logger.info("Grok client created successfully") + else: raise ValueError(f"Unsupported LLM provider: {provider_name}") @@ -774,7 +775,6 @@ def requires_max_completion_tokens(model_name: str) -> bool: "o1", # o1, o1-mini, o1-preview, etc. "o3", # o3, o3-mini, etc. "gpt-5", # gpt-5, gpt-5-nano, gpt-5-mini, etc. - "grok-3", # grok-3 series are reasoning models ] # Check for reasoning models (including OpenRouter prefixed models) diff --git a/python/src/server/services/storage/code_storage_service.py b/python/src/server/services/storage/code_storage_service.py index a8518c151f..6b988c3dee 100644 --- a/python/src/server/services/storage/code_storage_service.py +++ b/python/src/server/services/storage/code_storage_service.py @@ -6,6 +6,7 @@ import asyncio import json +import time import os import re from collections import defaultdict, deque @@ -757,14 +758,18 @@ async def _generate_code_example_summary_async( }, {"role": "user", "content": prompt}, ], - response_format={"type": "json_object"}, - max_tokens=500, - temperature=0.3, - ) + "max_tokens": 2000 if (_is_reasoning_model(model_choice) or provider == "grok") else 500, # 2000 tokens for both reasoning models (GPT-5) and Grok for complex reasoning + "temperature": 0.3, + } # Try to use response_format, but handle gracefully if not supported - # Note: Grok reasoning models don't work well with response_format - if provider in ["openai", "google", "anthropic"] or (provider == "openrouter" and model_choice.startswith("openai/")): + # Note: Grok and reasoning models (GPT-5, o1, o3) don't work well with response_format + supports_response_format = ( + provider in ["openai", "google", "anthropic"] or + (provider == "openrouter" and model_choice.startswith("openai/")) + ) + # Exclude reasoning models from using response_format + if supports_response_format and not _is_reasoning_model(model_choice): request_params["response_format"] = {"type": "json_object"} # Grok-specific parameter validation and filtering @@ -783,48 +788,71 @@ async def _generate_code_example_summary_async( if param not in supported_params: search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models") - start_time = time.time() # Initialize for all models + # Enhanced debugging for Grok provider + # Implement retry logic for Grok and reasoning models (GPT-5, o1, o3) empty responses + is_reasoning = _is_reasoning_model(model_choice) + start_time = time.time() # Initialize for all models if provider == "grok" or is_reasoning: model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" search_logger.debug(f"{model_type} request params: {request_params}") search_logger.debug(f"{model_type} prompt length: {len(prompt)} characters") search_logger.debug(f"{model_type} prompt preview: {prompt[:200]}...") - # Simplified retry logic - reduced from 3 to 2 retries to surface issues faster - max_retries = 2 if (provider == "grok" or is_reasoning) else 1 + max_retries = 3 if (provider == "grok" or is_reasoning) else 1 retry_delay = 1.0 # Start with 1 second delay failure_reasons = [] # Track failure reasons for circuit breaker analysis for attempt in range(max_retries): try: - if provider == "grok" and attempt > 0: - search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay") + if (provider == "grok" or is_reasoning) and attempt > 0: + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + search_logger.info(f"{model_type} retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay") await asyncio.sleep(retry_delay) + elif is_reasoning and attempt == 0: + # Small delay for reasoning models on first attempt to help with cold start + search_logger.debug(f"reasoning model ({model_choice}) first attempt - adding 0.5s delay for cold start") + await asyncio.sleep(0.5) - response = await client.chat.completions.create(**request_params) + # Convert max_tokens to max_completion_tokens for GPT-5/reasoning models + final_params = prepare_chat_completion_params(model_choice, request_params) + response = await client.chat.completions.create(**final_params) # Check for empty response - handle Grok reasoning models message = response.choices[0].message if response.choices else None response_content = None - # Enhanced Grok debugging - log both content fields - if provider == "grok" and message: + # Enhanced debugging for Grok and reasoning models - log both content fields + if (provider == "grok" or is_reasoning) and message: content_preview = message.content[:100] if message.content else "None" - reasoning_preview = getattr(message, 'reasoning_content', 'N/A')[:100] if hasattr(message, 'reasoning_content') and getattr(message, 'reasoning_content') else "None" - search_logger.debug(f"Grok response fields - content: '{content_preview}', reasoning_content: '{reasoning_preview}'") + reasoning_preview = getattr(message, 'reasoning_content', 'N/A')[:100] if hasattr(message, 'reasoning_content') and message.reasoning_content else "None" + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + + # Additional debugging for first attempt failures + finish_reason = getattr(response.choices[0], 'finish_reason', 'unknown') if response.choices else 'no_choices' + usage_info = getattr(response, 'usage', None) + if usage_info: + completion_tokens = getattr(usage_info, 'completion_tokens', 0) + reasoning_tokens = getattr(getattr(usage_info, 'completion_tokens_details', None), 'reasoning_tokens', 0) if hasattr(usage_info, 'completion_tokens_details') else 0 + search_logger.debug(f"{model_type} attempt {attempt + 1} - finish_reason: {finish_reason}, completion_tokens: {completion_tokens}, reasoning_tokens: {reasoning_tokens}") + else: + search_logger.debug(f"{model_type} attempt {attempt + 1} - finish_reason: {finish_reason}, no usage info") + + search_logger.debug(f"{model_type} response fields - content: '{content_preview}', reasoning_content: '{reasoning_preview}'") if message: - # For Grok reasoning models, check content first, then reasoning_content - if provider == "grok": + # For Grok and reasoning models, check content first, then reasoning_content + if provider == "grok" or is_reasoning: # First try content (where final answer should be) if message.content and message.content.strip(): response_content = message.content.strip() - search_logger.debug(f"Grok using content field: {len(response_content)} chars") + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + search_logger.debug(f"{model_type} using content field: {len(response_content)} chars") # Fallback to reasoning_content if content is empty elif hasattr(message, 'reasoning_content') and message.reasoning_content: response_content = message.reasoning_content.strip() - search_logger.debug(f"Grok fallback to reasoning_content: {len(response_content)} chars") + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + search_logger.debug(f"{model_type} fallback to reasoning_content: {len(response_content)} chars") else: search_logger.debug(f"Grok no content in either field: content='{message.content}', reasoning_content='{getattr(message, 'reasoning_content', 'N/A')}'") elif message.content: @@ -834,50 +862,35 @@ async def _generate_code_example_summary_async( if response_content and response_content.strip(): # Success - break out of retry loop - if provider == "grok" and attempt > 0: - search_logger.info(f"Grok request succeeded on attempt {attempt + 1}") + if (provider == "grok" or is_reasoning) and attempt > 0: + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + search_logger.info(f"{model_type} request succeeded on attempt {attempt + 1}") break elif (provider == "grok" or is_reasoning) and attempt < max_retries - 1: - # Empty response from Grok or reasoning models - track failure and retry + # Empty response from Grok or reasoning models - retry with exponential backoff model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" - failure_reason = f"empty_response_attempt_{attempt + 1}" - failure_reasons.append(failure_reason) - search_logger.warning(f"{model_type} empty response on attempt {attempt + 1}, retrying...") - retry_delay *= 2 # Exponential backoff continue else: - # Final attempt failed or not Grok - handle below + # Final attempt failed or not Grok/reasoning model - handle below break except Exception as e: if (provider == "grok" or is_reasoning) and attempt < max_retries - 1: model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" - failure_reason = f"exception_attempt_{attempt + 1}_{type(e).__name__}" - failure_reasons.append(failure_reason) - search_logger.error(f"{model_type} request failed on attempt {attempt + 1}: {e}, retrying...") retry_delay *= 2 continue else: # Re-raise on final attempt or non-Grok/reasoning providers - if failure_reasons: - # Add structured failure analysis for circuit breaker pattern - failure_analysis = { - "total_attempts": attempt + 1, - "failure_pattern": failure_reasons, - "final_error": str(e), - "model": model_choice, - "provider": provider - } - search_logger.error(f"Circuit breaker analysis: {failure_analysis}") raise - # Log timing for Grok requests - if provider == "grok": + # Log timing for Grok and reasoning model requests + if provider == "grok" or is_reasoning: elapsed_time = time.time() - start_time - search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s") + model_type = "Grok" if provider == "grok" else f"reasoning model ({model_choice})" + search_logger.debug(f"{model_type} total response time: {elapsed_time:.2f}s") # Handle empty response with streamlined fallback logic if not response_content: @@ -916,8 +929,65 @@ async def _generate_code_example_summary_async( failure_reason = f"empty_response_attempt_{attempt + 1}" failure_reasons.append(failure_reason) - search_logger.warning(f"{model_type} empty response on attempt {attempt + 1}, retrying...") + async with get_llm_client(provider="openai") as fallback_client: + search_logger.info("Using OpenAI fallback for Grok failure") + # Convert max_tokens to max_completion_tokens for GPT-5/reasoning models + final_fallback_params = prepare_chat_completion_params(fallback_params["model"], fallback_params) + fallback_response = await fallback_client.chat.completions.create(**final_fallback_params) + fallback_content = fallback_response.choices[0].message.content + + if fallback_content and fallback_content.strip(): + search_logger.info("OpenAI fallback succeeded") + response_content = fallback_content.strip() + else: + search_logger.error("OpenAI fallback also returned empty response") + raise ValueError("Both Grok and OpenAI fallback failed") + + except Exception as fallback_error: + search_logger.error(f"OpenAI fallback failed: {fallback_error}") + raise ValueError(f"Grok failed and fallback to OpenAI also failed: {fallback_error}") from fallback_error + elif is_reasoning: + # Implement fallback for reasoning model (GPT-5, o1, o3) failures + search_logger.error("Reasoning model empty response debugging:") + search_logger.error(f" - Model: {model_choice}") + search_logger.error(f" - Provider: {provider}") + search_logger.error(f" - Request took: {elapsed_time:.2f}s") + search_logger.error(f" - Full response: {response}") + search_logger.error(f" - Response choices length: {len(response.choices) if response.choices else 0}") + if response.choices: + search_logger.error(f" - First choice: {response.choices[0]}") + search_logger.error(f" - Message content: '{response.choices[0].message.content}'") + search_logger.error(f" - Message role: {response.choices[0].message.role}") + search_logger.error("Check: 1) API key validity, 2) rate limits, 3) model availability") + + # Implement fallback to non-reasoning model for reasoning model failures + search_logger.warning(f"Attempting fallback to gpt-4o-mini due to {model_choice} failure...") + try: + # Use a reliable non-reasoning model as fallback + fallback_params = { + "model": "gpt-4o-mini", + "messages": request_params["messages"], + "max_tokens": request_params.get("max_tokens", 500), + "temperature": request_params.get("temperature", 0.3), + "response_format": {"type": "json_object"} + } + + async with get_llm_client(provider="openai") as fallback_client: + search_logger.info(f"Using gpt-4o-mini fallback for {model_choice} failure") + # No parameter conversion needed for non-reasoning model + fallback_response = await fallback_client.chat.completions.create(**fallback_params) + fallback_content = fallback_response.choices[0].message.content + + if fallback_content and fallback_content.strip(): + search_logger.info(f"gpt-4o-mini fallback succeeded for {model_choice}") + response_content = fallback_content.strip() + else: + search_logger.error("gpt-4o-mini fallback also returned empty response") + raise ValueError(f"Both {model_choice} and gpt-4o-mini fallback failed") + except Exception as fallback_error: + search_logger.error(f"gpt-4o-mini fallback failed: {fallback_error}") + raise ValueError(f"{model_choice} failed and fallback to gpt-4o-mini also failed: {fallback_error}") from fallback_error else: # No fallback attempted - fail fast with detailed context search_logger.error(f"No fallback configured for {provider}/{model_choice} - failing fast") From e9a78e8a6e01a7342060f1cb4feba83ee89525a2 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Sat, 20 Sep 2025 06:29:51 -0500 Subject: [PATCH 04/28] added multiprovider support, embeddings model support, cleaned the pr, need to fix health check, asnyico tasks errors, and contextual embeddings error --- .../src/components/settings/RAGSettings.tsx | 371 +++++++++--------- python/src/server/api_routes/knowledge_api.py | 76 +++- .../crawling/code_extraction_service.py | 11 +- .../services/crawling/crawling_service.py | 12 + .../crawling/document_storage_operations.py | 4 +- .../src/server/services/credential_service.py | 67 +++- .../services/embeddings/embedding_service.py | 23 +- .../server/services/llm_provider_service.py | 195 ++++++++- .../services/provider_discovery_service.py | 54 ++- .../services/storage/code_storage_service.py | 261 ++---------- 10 files changed, 639 insertions(+), 435 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index d54cd51062..e472d701dc 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -9,6 +9,82 @@ import { credentialsService } from '../../services/credentialsService'; import OllamaModelDiscoveryModal from './OllamaModelDiscoveryModal'; import OllamaModelSelectionModal from './OllamaModelSelectionModal'; +type ProviderKey = 'openai' | 'google' | 'ollama' | 'anthropic' | 'grok' | 'openrouter'; + +interface ProviderModels { + chatModel: string; + embeddingModel: string; +} + +type ProviderModelMap = Record; + +// Provider model persistence helpers +const PROVIDER_MODELS_KEY = 'archon_provider_models'; + +const getDefaultModels = (provider: ProviderKey): ProviderModels => { + const chatDefaults: Record = { + openai: 'gpt-4o-mini', + anthropic: 'claude-3-5-sonnet-20241022', + google: 'gemini-1.5-flash', + grok: 'grok-3-mini', // Updated to use grok-3-mini as default + openrouter: 'openai/gpt-4o-mini', + ollama: 'llama3:8b' + }; + + const embeddingDefaults: Record = { + openai: 'text-embedding-3-small', + anthropic: 'text-embedding-3-small', // Fallback to OpenAI + google: 'text-embedding-004', + grok: 'text-embedding-3-small', // Fallback to OpenAI + openrouter: 'text-embedding-3-small', + ollama: 'nomic-embed-text' + }; + + return { + chatModel: chatDefaults[provider], + embeddingModel: embeddingDefaults[provider] + }; +}; + +const saveProviderModels = (providerModels: ProviderModelMap): void => { + try { + localStorage.setItem(PROVIDER_MODELS_KEY, JSON.stringify(providerModels)); + } catch (error) { + console.error('Failed to save provider models:', error); + } +}; + +const loadProviderModels = (): ProviderModelMap => { + try { + const saved = localStorage.getItem(PROVIDER_MODELS_KEY); + if (saved) { + return JSON.parse(saved); + } + } catch (error) { + console.error('Failed to load provider models:', error); + } + + // Return defaults for all providers if nothing saved + const providers: ProviderKey[] = ['openai', 'google', 'openrouter', 'ollama', 'anthropic', 'grok']; + const defaultModels: ProviderModelMap = {} as ProviderModelMap; + + providers.forEach(provider => { + defaultModels[provider] = getDefaultModels(provider); + }); + + return defaultModels; +}; + +// Static color styles mapping (prevents Tailwind JIT purging) +const colorStyles: Record = { + openai: 'border-green-500 bg-green-500/10', + google: 'border-blue-500 bg-blue-500/10', + openrouter: 'border-cyan-500 bg-cyan-500/10', + ollama: 'border-purple-500 bg-purple-500/10', + anthropic: 'border-orange-500 bg-orange-500/10', + grok: 'border-yellow-500 bg-yellow-500/10', +}; + interface RAGSettingsProps { ragSettings: { MODEL_CHOICE: string; @@ -57,7 +133,10 @@ export const RAGSettings = ({ // Model selection modals state const [showLLMModelSelectionModal, setShowLLMModelSelectionModal] = useState(false); const [showEmbeddingModelSelectionModal, setShowEmbeddingModelSelectionModal] = useState(false); - + + // Provider-specific model persistence state + const [providerModels, setProviderModels] = useState(() => loadProviderModels()); + // Instance configurations const [llmInstanceConfig, setLLMInstanceConfig] = useState({ name: '', @@ -113,6 +192,25 @@ export const RAGSettings = ({ } }, [ragSettings.OLLAMA_EMBEDDING_URL, ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME]); + // Provider model persistence effects + useEffect(() => { + // Update provider models when current models change + const currentProvider = ragSettings.LLM_PROVIDER as ProviderKey; + if (currentProvider && ragSettings.MODEL_CHOICE && ragSettings.EMBEDDING_MODEL) { + setProviderModels(prev => { + const updated = { + ...prev, + [currentProvider]: { + chatModel: ragSettings.MODEL_CHOICE, + embeddingModel: ragSettings.EMBEDDING_MODEL + } + }; + saveProviderModels(updated); + return updated; + }); + } + }, [ragSettings.MODEL_CHOICE, ragSettings.EMBEDDING_MODEL, ragSettings.LLM_PROVIDER]); + // Load API credentials for status checking useEffect(() => { const loadApiCredentials = async () => { @@ -197,58 +295,27 @@ export const RAGSettings = ({ }>({}); // Test connection to external providers - const testProviderConnection = async (provider: string, apiKey: string): Promise => { + const testProviderConnection = async (provider: string): Promise => { setProviderConnectionStatus(prev => ({ ...prev, [provider]: { ...prev[provider], checking: true } })); try { - switch (provider) { - case 'openai': - // Test OpenAI connection with a simple completion request - const openaiResponse = await fetch('https://api.openai.com/v1/models', { - method: 'GET', - headers: { - 'Authorization': `Bearer ${apiKey}`, - 'Content-Type': 'application/json' - } - }); - - if (openaiResponse.ok) { - setProviderConnectionStatus(prev => ({ - ...prev, - openai: { connected: true, checking: false, lastChecked: new Date() } - })); - return true; - } else { - throw new Error(`OpenAI API returned ${openaiResponse.status}`); - } + // Use server-side API endpoint for secure connectivity testing + const response = await fetch(`/api/providers/${provider}/status`); + const result = await response.json(); - case 'google': - // Test Google Gemini connection - const googleResponse = await fetch(`https://generativelanguage.googleapis.com/v1/models?key=${apiKey}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json' - } - }); - - if (googleResponse.ok) { - setProviderConnectionStatus(prev => ({ - ...prev, - google: { connected: true, checking: false, lastChecked: new Date() } - })); - return true; - } else { - throw new Error(`Google API returned ${googleResponse.status}`); - } + const isConnected = result.ok && result.reason === 'connected'; - default: - return false; - } + setProviderConnectionStatus(prev => ({ + ...prev, + [provider]: { connected: isConnected, checking: false, lastChecked: new Date() } + })); + + return isConnected; } catch (error) { - console.error(`Failed to test ${provider} connection:`, error); + console.error(`Error testing ${provider} connection:`, error); setProviderConnectionStatus(prev => ({ ...prev, [provider]: { connected: false, checking: false, lastChecked: new Date() } @@ -260,37 +327,27 @@ export const RAGSettings = ({ // Test provider connections when API credentials change useEffect(() => { const testConnections = async () => { - const providers = ['openai', 'google']; - + // Test all supported providers + const providers = ['openai', 'google', 'anthropic', 'openrouter', 'grok']; + for (const provider of providers) { - const keyName = provider === 'openai' ? 'OPENAI_API_KEY' : 'GOOGLE_API_KEY'; - const apiKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === keyName); - const keyValue = apiKey ? apiCredentials[apiKey] : undefined; - - if (keyValue && keyValue.trim().length > 0) { - // Don't test if we've already checked recently (within last 30 seconds) - const lastChecked = providerConnectionStatus[provider]?.lastChecked; - const now = new Date(); - const timeSinceLastCheck = lastChecked ? now.getTime() - lastChecked.getTime() : Infinity; - - if (timeSinceLastCheck > 30000) { // 30 seconds - console.log(`πŸ”„ Testing ${provider} connection...`); - await testProviderConnection(provider, keyValue); - } - } else { - // No API key, mark as disconnected - setProviderConnectionStatus(prev => ({ - ...prev, - [provider]: { connected: false, checking: false, lastChecked: new Date() } - })); + // Don't test if we've already checked recently (within last 30 seconds) + const lastChecked = providerConnectionStatus[provider]?.lastChecked; + const now = new Date(); + const timeSinceLastCheck = lastChecked ? now.getTime() - lastChecked.getTime() : Infinity; + + if (timeSinceLastCheck > 30000) { // 30 seconds + console.log(`πŸ”„ Testing ${provider} connection...`); + await testProviderConnection(provider); } } }; - // Only test if we have credentials loaded - if (Object.keys(apiCredentials).length > 0) { - testConnections(); - } + // Test connections periodically (every 60 seconds) + testConnections(); + const interval = setInterval(testConnections, 60000); + + return () => clearInterval(interval); }, [apiCredentials]); // Test when credentials change // Ref to track if initial test has been run (will be used after function definitions) @@ -662,20 +719,23 @@ export const RAGSettings = ({ if (llmStatus.online || embeddingStatus.online) return 'partial'; return 'missing'; case 'anthropic': - // Check if Anthropic API key is configured (case insensitive) - const anthropicKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === 'ANTHROPIC_API_KEY'); - const hasAnthropicKey = anthropicKey && apiCredentials[anthropicKey] && apiCredentials[anthropicKey].trim().length > 0; - return hasAnthropicKey ? 'configured' : 'missing'; + // Use server-side connection status + const anthropicConnected = providerConnectionStatus['anthropic']?.connected || false; + const anthropicChecking = providerConnectionStatus['anthropic']?.checking || false; + if (anthropicChecking) return 'partial'; + return anthropicConnected ? 'configured' : 'missing'; case 'grok': - // Check if Grok API key is configured (case insensitive) - const grokKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === 'GROK_API_KEY'); - const hasGrokKey = grokKey && apiCredentials[grokKey] && apiCredentials[grokKey].trim().length > 0; - return hasGrokKey ? 'configured' : 'missing'; + // Use server-side connection status + const grokConnected = providerConnectionStatus['grok']?.connected || false; + const grokChecking = providerConnectionStatus['grok']?.checking || false; + if (grokChecking) return 'partial'; + return grokConnected ? 'configured' : 'missing'; case 'openrouter': - // Check if OpenRouter API key is configured (case insensitive) - const openRouterKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === 'OPENROUTER_API_KEY'); - const hasOpenRouterKey = openRouterKey && apiCredentials[openRouterKey] && apiCredentials[openRouterKey].trim().length > 0; - return hasOpenRouterKey ? 'configured' : 'missing'; + // Use server-side connection status + const openRouterConnected = providerConnectionStatus['openrouter']?.connected || false; + const openRouterChecking = providerConnectionStatus['openrouter']?.checking || false; + if (openRouterChecking) return 'partial'; + return openRouterConnected ? 'configured' : 'missing'; default: return 'missing'; } @@ -750,55 +810,32 @@ export const RAGSettings = ({ {[ { key: 'openai', name: 'OpenAI', logo: '/img/OpenAI.png', color: 'green' }, { key: 'google', name: 'Google', logo: '/img/google-logo.svg', color: 'blue' }, + { key: 'openrouter', name: 'OpenRouter', logo: '/img/OpenRouter.png', color: 'cyan' }, { key: 'ollama', name: 'Ollama', logo: '/img/Ollama.png', color: 'purple' }, { key: 'anthropic', name: 'Anthropic', logo: '/img/claude-logo.svg', color: 'orange' }, - { key: 'grok', name: 'Grok', logo: '/img/Grok.png', color: 'yellow' }, - { key: 'openrouter', name: 'OpenRouter', logo: '/img/OpenRouter.png', color: 'cyan' } + { key: 'grok', name: 'Grok', logo: '/img/Grok.png', color: 'yellow' } ].map(provider => ( ))} + + {/* API Key Validation Warnings */} + {(() => { + const chatStatus = getProviderStatus(chatProvider); + const embeddingStatus = getProviderStatus(embeddingProvider); + const missingProviders = []; + + if (chatStatus === 'missing') { + missingProviders.push({ name: chatProvider, type: 'Chat', color: 'green' }); + } + if (embeddingStatus === 'missing' && embeddingProvider !== chatProvider) { + missingProviders.push({ name: embeddingProvider, type: 'Embedding', color: 'purple' }); + } + + if (missingProviders.length > 0) { + return ( +
+
+ + + + + Missing API Key Configuration + +
+

+ Please configure API keys for: {missingProviders.map(p => `${p.name} (${p.type})`).join(', ')} +

+
+ ); + } + return null; + })()} + + + {shouldShowProviderAlert && ( +
+

{providerAlertMessage}

+
+ )} - {/* Provider-specific configuration */} - {ragSettings.LLM_PROVIDER === 'ollama' && ( -
+
+ {/* Context-Aware Model Input */} +
+ {activeSelection === 'chat' ? ( + chatProvider !== 'ollama' ? ( + setRagSettings({ + ...ragSettings, + MODEL_CHOICE: e.target.value + })} + placeholder={getModelPlaceholder(chatProvider)} + accentColor="green" + /> + ) : ( +
+ +
+ Configured via Ollama instance +
+
+ Current: {getDisplayedChatModel(ragSettings) || 'Not selected'} +
+
+ ) + ) : ( + embeddingProvider !== 'ollama' ? ( + setRagSettings({ + ...ragSettings, + EMBEDDING_MODEL: e.target.value + })} + placeholder={getEmbeddingPlaceholder(embeddingProvider)} + accentColor="purple" + /> + ) : ( +
+ +
+ Configured via Ollama instance +
+
+ Current: {getDisplayedEmbeddingModel(ragSettings) || 'Not selected'} +
+
+ ) + )} +
+ + {/* Ollama Configuration Gear Icon */} + {((activeSelection === 'chat' && chatProvider === 'ollama') || + (activeSelection === 'embedding' && embeddingProvider === 'ollama')) && ( + + )} + + {/* Save Settings Button */} + +
+ + {/* Expandable Ollama Configuration Container */} + {showOllamaConfig && ((activeSelection === 'chat' && chatProvider === 'ollama') || + (activeSelection === 'embedding' && embeddingProvider === 'ollama')) && ( +
-

Ollama Configuration

-

Configure separate Ollama instances for LLM and embedding models

+

+ {activeSelection === 'chat' ? 'LLM Chat Configuration' : 'Embedding Configuration'} +

+

+ {activeSelection === 'chat' + ? 'Configure Ollama instance for chat completions' + : 'Configure Ollama instance for text embeddings'} +

- {(llmStatus.online && embeddingStatus.online) ? "2 / 2 Online" : - (llmStatus.online || embeddingStatus.online) ? "1 / 2 Online" : "0 / 2 Online"} + {(activeSelection === 'chat' ? llmStatus.online : embeddingStatus.online) + ? "Online" : "Offline"}
- {/* LLM Instance Card */} -
-
+ {/* Configuration Content */} +
+ {activeSelection === 'chat' ? ( + // Chat Model Configuration
-

LLM Instance

-

For chat completions and text generation

-
-
- {llmStatus.checking ? ( - Checking... - ) : llmStatus.online ? ( - Online ({llmStatus.responseTime}ms) - ) : ( - Offline - )} - {llmInstanceConfig.name && llmInstanceConfig.url && ( - - )} -
-
- -
-
{llmInstanceConfig.name && llmInstanceConfig.url ? ( <>
@@ -977,45 +1371,53 @@ export const RAGSettings = ({
Model:
{getDisplayedChatModel(ragSettings)}
- -
+ +
{llmStatus.checking ? ( ) : null} - {ollamaMetrics.loading ? 'Loading...' : `${ollamaMetrics.llmInstanceModels.total} models available`} + {ollamaMetrics.loading ? 'Loading...' : `${ollamaMetrics.llmInstanceModels?.chat || 0} chat models available`} +
+ +
+ + +
) : (
No LLM instance configured
-
Configure an instance to use LLM features
- - {/* Quick setup for single host users */} - {!embeddingInstanceConfig.url && ( -
- -
Sets up both LLM and Embedding for one host
-
- )} - -
+
)}
- - {llmInstanceConfig.name && llmInstanceConfig.url && ( -
- - - -
- )} -
-
- - {/* Embedding Instance Card */} -
-
+ ) : ( + // Embedding Model Configuration
-

Embedding Instance

-

For generating text embeddings and vector search

-
-
- {embeddingStatus.checking ? ( - Checking... - ) : embeddingStatus.online ? ( - Online ({embeddingStatus.responseTime}ms) - ) : ( - Offline - )} - {embeddingInstanceConfig.name && embeddingInstanceConfig.url && ( - - )} -
-
- -
-
{embeddingInstanceConfig.name && embeddingInstanceConfig.url ? ( <>
@@ -1096,22 +1439,50 @@ export const RAGSettings = ({
Model:
{getDisplayedEmbeddingModel(ragSettings)}
- -
+ +
{embeddingStatus.checking ? ( ) : null} - {ollamaMetrics.loading ? 'Loading...' : `${ollamaMetrics.embeddingInstanceModels.total} models available`} + {ollamaMetrics.loading ? 'Loading...' : `${ollamaMetrics.embeddingInstanceModels?.embedding || 0} embedding models available`} +
+ +
+ + +
) : (
No Embedding instance configured
Configure an instance to use embedding features
-
)}
- - {embeddingInstanceConfig.name && embeddingInstanceConfig.url && ( -
- - - -
- )} -
+ )}
- {/* Single Host Indicator */} - {llmInstanceConfig.url && embeddingInstanceConfig.url && - llmInstanceConfig.url === embeddingInstanceConfig.url && ( -
-
- - - - Single Host Setup -
-

- Both LLM and Embedding instances are using the same Ollama host ({llmInstanceConfig.name}) -

-
- )} + {/* Context-Aware Configuration Summary */} +
+

+ {activeSelection === 'chat' ? 'LLM Instance Summary' : 'Embedding Instance Summary'} +

- {/* Configuration Summary */} -
-

Configuration Summary

- - {/* Instance Comparison Table */}
- - + - + + + - - @@ -1219,67 +1556,54 @@ export const RAGSettings = ({ -
ConfigurationLLM InstanceEmbedding Instance + {activeSelection === 'chat' ? 'LLM Instance' : 'Embedding Instance'} +
Instance Name - {llmInstanceConfig.name || Not configured} + {activeSelection === 'chat' + ? (llmInstanceConfig.name || Not configured) + : (embeddingInstanceConfig.name || Not configured) + } - {embeddingInstanceConfig.name || Not configured} +
Instance URL + {activeSelection === 'chat' + ? (llmInstanceConfig.url || Not configured) + : (embeddingInstanceConfig.url || Not configured) + }
Status - - {llmStatus.checking ? "Checking..." : llmStatus.online ? `Online (${llmStatus.responseTime}ms)` : "Offline"} - - - - {embeddingStatus.checking ? "Checking..." : embeddingStatus.online ? `Online (${embeddingStatus.responseTime}ms)` : "Offline"} - + {activeSelection === 'chat' ? ( + + {llmStatus.checking ? "Checking..." : llmStatus.online ? `Online (${llmStatus.responseTime}ms)` : "Offline"} + + ) : ( + + {embeddingStatus.checking ? "Checking..." : embeddingStatus.online ? `Online (${embeddingStatus.responseTime}ms)` : "Offline"} + + )}
Selected Model - {getDisplayedChatModel(ragSettings) || No model selected} - - {getDisplayedEmbeddingModel(ragSettings) || No model selected} + {activeSelection === 'chat' + ? (getDisplayedChatModel(ragSettings) || No model selected) + : (getDisplayedEmbeddingModel(ragSettings) || No model selected) + }
{ollamaMetrics.loading ? ( - ) : ( + ) : activeSelection === 'chat' ? (
-
{ollamaMetrics.llmInstanceModels.total} Total Models
- {ollamaMetrics.llmInstanceModels.total > 0 && ( -
- - {ollamaMetrics.llmInstanceModels.chat} Chat - - - {ollamaMetrics.llmInstanceModels.embedding} Embedding - -
- )} + {ollamaMetrics.llmInstanceModels?.chat || 0} + chat models
- )} -
- {ollamaMetrics.loading ? ( - ) : (
-
{ollamaMetrics.embeddingInstanceModels.total} Total Models
- {ollamaMetrics.embeddingInstanceModels.total > 0 && ( -
- - {ollamaMetrics.embeddingInstanceModels.chat} Chat - - - {ollamaMetrics.embeddingInstanceModels.embedding} Embedding - -
- )} + {ollamaMetrics.embeddingInstanceModels?.embedding || 0} + embedding models
)}
- - {/* System Readiness Summary */} + + {/* Instance-Specific Readiness */}
- System Readiness: - - {(llmStatus.online && embeddingStatus.online) ? "βœ“ Ready (Both Instances Online)" : - (llmStatus.online || embeddingStatus.online) ? "⚠ Partial (1 of 2 Online)" : "βœ— Not Ready (No Instances Online)"} + + {activeSelection === 'chat' ? 'LLM Instance Status:' : 'Embedding Instance Status:'} + + + {activeSelection === 'chat' + ? (llmStatus.online ? "βœ“ Ready" : "βœ— Not Ready") + : (embeddingStatus.online ? "βœ“ Ready" : "βœ— Not Ready") + }
- - {/* Overall Model Metrics */} + + {/* Instance-Specific Model Metrics */}
- Overall Available: + Available on this instance: {ollamaMetrics.loading ? ( + ) : activeSelection === 'chat' ? ( + `${ollamaMetrics.llmInstanceModels?.chat || 0} chat models` ) : ( - `${ollamaMetrics.totalModels} total (${ollamaMetrics.chatModels} chat, ${ollamaMetrics.embeddingModels} embedding)` + `${ollamaMetrics.embeddingInstanceModels?.embedding || 0} embedding models` )}
@@ -1289,83 +1613,9 @@ export const RAGSettings = ({
)} - - {shouldShowProviderAlert && ( -
-

{providerAlertMessage}

-
- )} - -
- -
- {/* Model Settings Row - Only show for non-Ollama providers */} - {ragSettings.LLM_PROVIDER !== 'ollama' && ( -
-
- setRagSettings({ - ...ragSettings, - MODEL_CHOICE: e.target.value - })} - placeholder={getModelPlaceholder(ragSettings.LLM_PROVIDER || 'openai')} - accentColor="green" - /> -
-
- setRagSettings({ - ...ragSettings, - EMBEDDING_MODEL: e.target.value - })} - placeholder={getEmbeddingPlaceholder(ragSettings.LLM_PROVIDER || 'openai')} - accentColor="green" - /> -
-
- )} - + {/* Second row: Contextual Embeddings, Max Workers, and description */}
@@ -1778,7 +2028,7 @@ export const RAGSettings = ({ showToast('LLM instance updated successfully', 'success'); // Wait 1 second then automatically test connection and refresh models setTimeout(() => { - manualTestConnection(llmInstanceConfig.url, setLLMStatus, llmInstanceConfig.name); + manualTestConnection(llmInstanceConfig.url, setLLMStatus, llmInstanceConfig.name, 'chat'); fetchOllamaMetrics(); // Refresh model metrics after saving }, 1000); }} @@ -1829,7 +2079,7 @@ export const RAGSettings = ({ showToast('Embedding instance updated successfully', 'success'); // Wait 1 second then automatically test connection and refresh models setTimeout(() => { - manualTestConnection(embeddingInstanceConfig.url, setEmbeddingStatus, embeddingInstanceConfig.name); + manualTestConnection(embeddingInstanceConfig.url, setEmbeddingStatus, embeddingInstanceConfig.name, 'embedding'); fetchOllamaMetrics(); // Refresh model metrics after saving }, 1000); }} diff --git a/archon-ui-main/src/services/credentialsService.ts b/archon-ui-main/src/services/credentialsService.ts index f52d96790e..b2d2da52fa 100644 --- a/archon-ui-main/src/services/credentialsService.ts +++ b/archon-ui-main/src/services/credentialsService.ts @@ -23,6 +23,7 @@ export interface RagSettings { OLLAMA_EMBEDDING_URL?: string; OLLAMA_EMBEDDING_INSTANCE_NAME?: string; EMBEDDING_MODEL?: string; + EMBEDDING_PROVIDER?: string; // Crawling Performance Settings CRAWL_BATCH_SIZE?: number; CRAWL_MAX_CONCURRENT?: number; @@ -75,6 +76,16 @@ import { getApiUrl } from "../config/api"; class CredentialsService { private baseUrl = getApiUrl(); + private notifyCredentialUpdate(keys: string[]): void { + if (typeof window === "undefined") { + return; + } + + window.dispatchEvent( + new CustomEvent("archon:credentials-updated", { detail: { keys } }) + ); + } + private handleCredentialError(error: any, context: string): Error { const errorMessage = error instanceof Error ? error.message : String(error); @@ -182,15 +193,16 @@ class CredentialsService { USE_CONTEXTUAL_EMBEDDINGS: false, CONTEXTUAL_EMBEDDINGS_MAX_WORKERS: 3, USE_HYBRID_SEARCH: true, - USE_AGENTIC_RAG: true, - USE_RERANKING: true, - MODEL_CHOICE: "gpt-4.1-nano", - LLM_PROVIDER: "openai", - LLM_BASE_URL: "", - LLM_INSTANCE_NAME: "", - OLLAMA_EMBEDDING_URL: "", - OLLAMA_EMBEDDING_INSTANCE_NAME: "", - EMBEDDING_MODEL: "", + USE_AGENTIC_RAG: true, + USE_RERANKING: true, + MODEL_CHOICE: "gpt-4.1-nano", + LLM_PROVIDER: "openai", + LLM_BASE_URL: "", + LLM_INSTANCE_NAME: "", + OLLAMA_EMBEDDING_URL: "", + OLLAMA_EMBEDDING_INSTANCE_NAME: "", + EMBEDDING_PROVIDER: "openai", + EMBEDDING_MODEL: "", // Crawling Performance Settings defaults CRAWL_BATCH_SIZE: 50, CRAWL_MAX_CONCURRENT: 10, @@ -221,6 +233,7 @@ class CredentialsService { "LLM_INSTANCE_NAME", "OLLAMA_EMBEDDING_URL", "OLLAMA_EMBEDDING_INSTANCE_NAME", + "EMBEDDING_PROVIDER", "EMBEDDING_MODEL", "CRAWL_WAIT_STRATEGY", ].includes(cred.key) @@ -278,7 +291,9 @@ class CredentialsService { throw new Error(`HTTP ${response.status}: ${errorText}`); } - return response.json(); + const updated = await response.json(); + this.notifyCredentialUpdate([credential.key]); + return updated; } catch (error) { throw this.handleCredentialError( error, @@ -302,7 +317,9 @@ class CredentialsService { throw new Error(`HTTP ${response.status}: ${errorText}`); } - return response.json(); + const created = await response.json(); + this.notifyCredentialUpdate([credential.key]); + return created; } catch (error) { throw this.handleCredentialError( error, @@ -321,6 +338,8 @@ class CredentialsService { const errorText = await response.text(); throw new Error(`HTTP ${response.status}: ${errorText}`); } + + this.notifyCredentialUpdate([key]); } catch (error) { throw this.handleCredentialError(error, `Deleting credential '${key}'`); } diff --git a/python/src/server/services/crawling/code_extraction_service.py b/python/src/server/services/crawling/code_extraction_service.py index 1a540f5732..21c11b1aaf 100644 --- a/python/src/server/services/crawling/code_extraction_service.py +++ b/python/src/server/services/crawling/code_extraction_service.py @@ -159,7 +159,7 @@ async def extract_and_store_code_examples( if progress_callback: async def extraction_progress(data: dict): # Scale progress to 0-20% range - raw_progress = data.get("progress", 0) + raw_progress = data.get("progress", data.get("percentage", 0)) scaled_progress = int(raw_progress * 0.2) # 0-20% data["progress"] = scaled_progress await progress_callback(data) @@ -197,7 +197,7 @@ async def extraction_progress(data: dict): if progress_callback: async def summary_progress(data: dict): # Scale progress to 20-90% range - raw_progress = data.get("progress", 0) + raw_progress = data.get("progress", data.get("percentage", 0)) scaled_progress = 20 + int(raw_progress * 0.7) # 20-90% data["progress"] = scaled_progress await progress_callback(data) @@ -216,7 +216,7 @@ async def summary_progress(data: dict): if progress_callback: async def storage_progress(data: dict): # Scale progress to 90-100% range - raw_progress = data.get("progress", 0) + raw_progress = data.get("progress", data.get("percentage", 0)) scaled_progress = 90 + int(raw_progress * 0.1) # 90-100% data["progress"] = scaled_progress await progress_callback(data) diff --git a/python/src/server/services/credential_service.py b/python/src/server/services/credential_service.py index 62fbb47ac8..e39f793062 100644 --- a/python/src/server/services/credential_service.py +++ b/python/src/server/services/credential_service.py @@ -36,42 +36,6 @@ class CredentialItem: description: str | None = None -def _detect_embedding_provider_from_model(embedding_model: str) -> str: - """ - Detect the appropriate embedding provider based on model name. - - Args: - embedding_model: The embedding model name - - Returns: - Provider name: 'google', 'openai', or 'openai' (default) - """ - if not embedding_model: - return "openai" # Default - - model_lower = embedding_model.lower() - - # Google embedding models - google_patterns = [ - "text-embedding-004", - "text-embedding-005", - "text-multilingual-embedding", - "gemini-embedding", - "multimodalembedding" - ] - - if any(pattern in model_lower for pattern in google_patterns): - return "google" - - # OpenAI embedding models (and default for unknown) - openai_patterns = [ - "text-embedding-ada-002", - "text-embedding-3-small", - "text-embedding-3-large" - ] - - # Default to OpenAI for OpenAI models or unknown models - return "openai" class CredentialService: @@ -475,26 +439,17 @@ async def get_active_provider(self, service_type: str = "llm") -> dict[str, Any] # Get the selected provider based on service type if service_type == "embedding": - # Get the LLM provider setting to determine embedding provider - llm_provider = rag_settings.get("LLM_PROVIDER", "openai") - embedding_model = rag_settings.get("EMBEDDING_MODEL", "text-embedding-3-small") - - # Determine embedding provider based on LLM provider - if llm_provider == "google": - provider = "google" - elif llm_provider == "ollama": - provider = "ollama" - elif llm_provider == "openrouter": - # OpenRouter supports both OpenAI and Google embedding models - provider = _detect_embedding_provider_from_model(embedding_model) - elif llm_provider in ["anthropic", "grok"]: - # Anthropic and Grok support both OpenAI and Google embedding models - provider = _detect_embedding_provider_from_model(embedding_model) + # First check for explicit EMBEDDING_PROVIDER setting (new split provider approach) + explicit_embedding_provider = rag_settings.get("EMBEDDING_PROVIDER") + + if explicit_embedding_provider and explicit_embedding_provider != "": + # Use the explicitly set embedding provider + provider = explicit_embedding_provider + logger.debug(f"Using explicit embedding provider: '{provider}'") else: - # Default case (openai, or unknown providers) + # Fall back to OpenAI as default embedding provider for backward compatibility provider = "openai" - - logger.debug(f"Determined embedding provider '{provider}' from LLM provider '{llm_provider}' and embedding model '{embedding_model}'") + logger.debug(f"No explicit embedding provider set, defaulting to OpenAI for backward compatibility") else: provider = rag_settings.get("LLM_PROVIDER", "openai") # Ensure provider is a valid string, not a boolean or other type diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py index 4f825f1dc9..b5dc578f95 100644 --- a/python/src/server/services/embeddings/embedding_service.py +++ b/python/src/server/services/embeddings/embedding_service.py @@ -6,14 +6,16 @@ import asyncio import os +from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Any +import httpx import openai from ...config.logfire_config import safe_span, search_logger from ..credential_service import credential_service -from ..llm_provider_service import get_embedding_model, get_llm_client, is_google_embedding_model, is_openai_embedding_model +from ..llm_provider_service import get_embedding_model, get_llm_client from ..threading_service import get_threading_service from .embedding_exceptions import ( EmbeddingAPIError, @@ -64,6 +66,120 @@ def total_requested(self) -> int: return self.success_count + self.failure_count +class EmbeddingProviderAdapter(ABC): + """Adapter interface for embedding providers.""" + + @abstractmethod + async def create_embeddings( + self, + texts: list[str], + model: str, + dimensions: int | None = None, + ) -> list[list[float]]: + """Create embeddings for the given texts.""" + + +class OpenAICompatibleEmbeddingAdapter(EmbeddingProviderAdapter): + """Adapter for providers using the OpenAI embeddings API shape.""" + + def __init__(self, client: Any): + self._client = client + + async def create_embeddings( + self, + texts: list[str], + model: str, + dimensions: int | None = None, + ) -> list[list[float]]: + request_args: dict[str, Any] = { + "model": model, + "input": texts, + } + if dimensions is not None: + request_args["dimensions"] = dimensions + + response = await self._client.embeddings.create(**request_args) + return [item.embedding for item in response.data] + + +class GoogleEmbeddingAdapter(EmbeddingProviderAdapter): + """Adapter for Google's native embedding endpoint.""" + + async def create_embeddings( + self, + texts: list[str], + model: str, + dimensions: int | None = None, + ) -> list[list[float]]: + try: + if dimensions is not None: + _ = dimensions # Maintains adapter signature; Google controls dimensions server-side. + + google_api_key = await credential_service.get_credential("GOOGLE_API_KEY") + if not google_api_key: + raise EmbeddingAPIError("Google API key not found") + + async with httpx.AsyncClient(timeout=30.0) as http_client: + embeddings = await asyncio.gather( + *( + self._fetch_single_embedding(http_client, google_api_key, model, text) + for text in texts + ) + ) + + return embeddings + + except httpx.HTTPStatusError as error: + error_content = error.response.text + search_logger.error( + f"Google embedding API returned {error.response.status_code} - {error_content}", + exc_info=True, + ) + raise EmbeddingAPIError( + f"Google embedding API error: {error.response.status_code} - {error_content}", + original_error=error, + ) from error + except Exception as error: + search_logger.error(f"Error calling Google embedding API: {error}", exc_info=True) + raise EmbeddingAPIError( + f"Google embedding error: {str(error)}", original_error=error + ) from error + + async def _fetch_single_embedding( + self, + http_client: httpx.AsyncClient, + api_key: str, + model: str, + text: str, + ) -> list[float]: + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:embedContent" + headers = { + "x-goog-api-key": api_key, + "Content-Type": "application/json", + } + payload = { + "model": f"models/{model}", + "content": {"parts": [{"text": text}]}, + } + + response = await http_client.post(url, headers=headers, json=payload) + response.raise_for_status() + + result = response.json() + embedding = result.get("embedding", {}) + values = embedding.get("values") if isinstance(embedding, dict) else None + if not isinstance(values, list): + raise EmbeddingAPIError(f"Invalid embedding payload from Google: {result}") + + return values + + +def _get_embedding_adapter(provider: str, client: Any) -> EmbeddingProviderAdapter: + provider_name = (provider or "").lower() + if provider_name == "google": + return GoogleEmbeddingAdapter() + return OpenAICompatibleEmbeddingAdapter(client) + # Provider-aware client factory get_openai_client = get_llm_client @@ -185,22 +301,14 @@ async def create_embeddings_batch( "create_embeddings_batch", text_count=len(texts), total_chars=sum(len(t) for t in texts) ) as span: try: - # Intelligent embedding provider routing based on model type - # Get the embedding model first to determine the correct provider - embedding_model = await get_embedding_model(provider=provider) - - # Route to correct provider based on model type - if is_google_embedding_model(embedding_model): - embedding_provider = "google" - search_logger.info(f"Routing to Google for embedding model: {embedding_model}") - elif is_openai_embedding_model(embedding_model) or "openai/" in embedding_model.lower(): - embedding_provider = "openai" - search_logger.info(f"Routing to OpenAI for embedding model: {embedding_model}") - else: - # Keep original provider for ollama and other providers - embedding_provider = provider - search_logger.info(f"Using original provider '{provider}' for embedding model: {embedding_model}") + embedding_config = await credential_service.get_active_provider(service_type="embedding") + embedding_provider = embedding_config.get("provider") + + if not embedding_provider: + search_logger.error("No embedding provider configured") + raise ValueError("No embedding provider configured. Please set EMBEDDING_PROVIDER environment variable.") + search_logger.info(f"Using embedding provider: '{embedding_provider}' (from EMBEDDING_PROVIDER setting)") async with get_llm_client(provider=embedding_provider, use_embedding_provider=True) as client: # Load batch size and dimensions from settings try: @@ -215,6 +323,8 @@ async def create_embeddings_batch( embedding_dimensions = 1536 total_tokens_used = 0 + adapter = _get_embedding_adapter(embedding_provider, client) + dimensions_to_use = embedding_dimensions if embedding_dimensions > 0 else None for i in range(0, len(texts), batch_size): batch = texts[i : i + batch_size] @@ -243,16 +353,14 @@ async def rate_limit_callback(data: dict): try: # Create embeddings for this batch embedding_model = await get_embedding_model(provider=embedding_provider) - - response = await client.embeddings.create( - model=embedding_model, - input=batch, - dimensions=embedding_dimensions, + embeddings = await adapter.create_embeddings( + batch, + embedding_model, + dimensions=dimensions_to_use, ) - # Add successful embeddings - for text, item in zip(batch, response.data, strict=False): - result.add_success(item.embedding, text) + for text, vector in zip(batch, embeddings, strict=False): + result.add_success(vector, text) break # Success, exit retry loop From 94e323077d2dfd2e8106f495ca3e3ed10a5ed612 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 07:50:48 -0500 Subject: [PATCH 16/28] added warning labels and updated ollama health checks --- .../src/components/settings/RAGSettings.tsx | 338 +++++++++++++++--- 1 file changed, 295 insertions(+), 43 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 432040ccdb..f9d3ce5301 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -100,7 +100,10 @@ const providerAlertStyles: Record = { grok: 'bg-yellow-50 dark:bg-yellow-900/20 border-yellow-200 dark:border-yellow-800 text-yellow-800 dark:text-yellow-300', }; -const providerAlertMessages: Record = { +const providerWarningAlertStyle = 'bg-yellow-50 dark:bg-yellow-900/20 border-yellow-200 dark:border-yellow-800 text-yellow-800 dark:text-yellow-300'; +const providerErrorAlertStyle = 'bg-red-50 dark:bg-red-900/20 border-red-200 dark:border-red-800 text-red-800 dark:text-red-300'; + +const defaultProviderAlertMessages: Record = { openai: 'Configure your OpenAI API key in the credentials section to use GPT models.', google: 'Configure your Google API key in the credentials section to use Gemini models.', openrouter: 'Configure your OpenRouter API key in the credentials section to use models.', @@ -343,6 +346,55 @@ export const RAGSettings = ({ return () => clearInterval(interval); }, [ragSettings.LLM_PROVIDER]); // Only restart interval if provider changes + useEffect(() => { + const needsDetection = chatProvider === 'ollama' || embeddingProvider === 'ollama'; + + if (!needsDetection) { + setOllamaServerStatus('unknown'); + return; + } + + const baseUrl = ( + ragSettings.LLM_BASE_URL?.trim() || + llmInstanceConfig.url?.trim() || + ragSettings.OLLAMA_EMBEDDING_URL?.trim() || + embeddingInstanceConfig.url?.trim() || + DEFAULT_OLLAMA_URL + ); + + const normalizedUrl = baseUrl.replace('/v1', '').replace(/\/$/, ''); + + let cancelled = false; + + (async () => { + try { + const response = await fetch( + `/api/ollama/instances/health?instance_urls=${encodeURIComponent(normalizedUrl)}`, + { method: 'GET', headers: { Accept: 'application/json' } } + ); + + if (cancelled) return; + + if (!response.ok) { + setOllamaServerStatus('offline'); + return; + } + + const data = await response.json(); + const instanceStatus = data.instance_status?.[normalizedUrl]; + setOllamaServerStatus(instanceStatus?.is_healthy ? 'online' : 'offline'); + } catch (error) { + if (!cancelled) { + setOllamaServerStatus('offline'); + } + } + })(); + + return () => { + cancelled = true; + }; + }, [chatProvider, embeddingProvider, ragSettings.LLM_BASE_URL, ragSettings.OLLAMA_EMBEDDING_URL, llmInstanceConfig.url, embeddingInstanceConfig.url]); + // Sync independent provider states with ragSettings (one-way: ragSettings -> local state) useEffect(() => { if (ragSettings.LLM_PROVIDER && ragSettings.LLM_PROVIDER !== chatProvider) { @@ -356,6 +408,11 @@ export const RAGSettings = ({ } }, [ragSettings.EMBEDDING_PROVIDER]); // Remove embeddingProvider dependency to avoid loops + useEffect(() => { + setOllamaManualConfirmed(false); + setOllamaServerStatus('unknown'); + }, [ragSettings.LLM_BASE_URL, ragSettings.OLLAMA_EMBEDDING_URL, chatProvider, embeddingProvider]); + // Update ragSettings when independent providers change (one-way: local state -> ragSettings) // Split the β€œfirst‐run” guard into two refs so chat and embedding effects don’t interfere. const updateChatRagSettingsRef = useRef(false); @@ -396,6 +453,8 @@ export const RAGSettings = ({ const [providerConnectionStatus, setProviderConnectionStatus] = useState<{ [key: string]: { connected: boolean; checking: boolean; lastChecked?: Date } }>({}); + const [ollamaServerStatus, setOllamaServerStatus] = useState<'unknown' | 'online' | 'offline'>('unknown'); + const [ollamaManualConfirmed, setOllamaManualConfirmed] = useState(false); useEffect(() => { return () => { @@ -537,12 +596,14 @@ export const RAGSettings = ({ }; // Manual test function with user feedback using backend proxy - const manualTestConnection = async ( +const manualTestConnection = async ( url: string, setStatus: React.Dispatch>, instanceName: string, - context?: 'chat' | 'embedding' + context?: 'chat' | 'embedding', + options?: { suppressToast?: boolean } ): Promise => { + const suppressToast = options?.suppressToast ?? false; setStatus(prev => ({ ...prev, checking: true })); const startTime = Date.now(); @@ -582,7 +643,9 @@ export const RAGSettings = ({ modelType = 'embedding models'; } - showToast(`${instanceName} connection successful: ${modelCount} ${modelType} available (${responseTime}ms)`, 'success'); + if (!suppressToast) { + showToast(`${instanceName} connection successful: ${modelCount} ${modelType} available (${responseTime}ms)`, 'success'); + } // Scenario 2: Manual "Test Connection" button - refresh Ollama metrics if Ollama provider is selected if (ragSettings.LLM_PROVIDER === 'ollama') { @@ -593,21 +656,27 @@ export const RAGSettings = ({ return true; } else { setStatus({ online: false, responseTime: null, checking: false }); - showToast(`${instanceName} connection failed: ${instanceStatus?.error_message || 'Instance is not healthy'}`, 'error'); + if (!suppressToast) { + showToast(`${instanceName} connection failed: ${instanceStatus?.error_message || 'Instance is not healthy'}`, 'error'); + } return false; } } else { setStatus({ online: false, responseTime: null, checking: false }); - showToast(`${instanceName} connection failed: Backend proxy error (HTTP ${response.status})`, 'error'); + if (!suppressToast) { + showToast(`${instanceName} connection failed: Backend proxy error (HTTP ${response.status})`, 'error'); + } return false; } } catch (error: any) { setStatus({ online: false, responseTime: null, checking: false }); - if (error.name === 'AbortError') { - showToast(`${instanceName} connection failed: Request timeout (>15s)`, 'error'); - } else { - showToast(`${instanceName} connection failed: ${error.message || 'Unknown error'}`, 'error'); + if (!suppressToast) { + if (error.name === 'AbortError') { + showToast(`${instanceName} connection failed: Request timeout (>15s)`, 'error'); + } else { + showToast(`${instanceName} connection failed: ${error.message || 'Unknown error'}`, 'error'); + } } return false; @@ -854,10 +923,21 @@ export const RAGSettings = ({ return googleConnected ? 'configured' : 'missing'; case 'ollama': - if (llmStatus.checking || embeddingStatus.checking) return 'partial'; - if (llmStatus.online && embeddingStatus.online) return 'configured'; - if (llmStatus.online || embeddingStatus.online) return 'partial'; - return 'missing'; + { + if (ollamaManualConfirmed || llmStatus.online || embeddingStatus.online) { + return 'configured'; + } + + if (ollamaServerStatus === 'online') { + return 'partial'; + } + + if (ollamaServerStatus === 'offline') { + return 'missing'; + } + + return 'missing'; + } case 'anthropic': // Use server-side connection status const anthropicConnected = providerConnectionStatus['anthropic']?.connected || false; @@ -885,18 +965,29 @@ export const RAGSettings = ({ ? (ragSettings.LLM_PROVIDER as ProviderKey) : undefined; const selectedProviderStatus = selectedProviderKey ? getProviderStatus(selectedProviderKey) : undefined; - const shouldShowProviderAlert = Boolean( - selectedProviderKey && selectedProviderStatus === 'missing' - ); - const providerAlertClassName = shouldShowProviderAlert && selectedProviderKey - ? providerAlertStyles[selectedProviderKey] - : ''; - const providerAlertMessage = shouldShowProviderAlert && selectedProviderKey - ? providerAlertMessages[selectedProviderKey] - : ''; + + let providerAlertMessage: string | null = null; + let providerAlertClassName = ''; + + if (selectedProviderKey === 'ollama') { + if (selectedProviderStatus === 'missing' || ollamaServerStatus === 'offline' || ollamaServerStatus === 'unknown') { + providerAlertMessage = 'Local Ollama service is not running. Start the Ollama server and ensure it is reachable at the configured URL.'; + providerAlertClassName = providerErrorAlertStyle; + } else if (selectedProviderStatus === 'partial') { + providerAlertMessage = 'Local Ollama service detected. Click "Test Connection" to confirm model availability.'; + providerAlertClassName = providerWarningAlertStyle; + } + } else if (selectedProviderKey && selectedProviderStatus === 'missing') { + providerAlertMessage = defaultProviderAlertMessages[selectedProviderKey] ?? null; + providerAlertClassName = providerAlertStyles[selectedProviderKey] ?? ''; + } + + const shouldShowProviderAlert = Boolean(providerAlertMessage); + const initialChatOllamaTestRef = useRef(false); useEffect(() => { if (chatProvider !== 'ollama') { + initialChatOllamaTestRef.current = false; return; } @@ -918,7 +1009,13 @@ export const RAGSettings = ({ } const runTest = async () => { - const success = await manualTestConnection(baseUrl, setLLMStatus, instanceName, 'chat'); + const success = await manualTestConnection( + baseUrl, + setLLMStatus, + instanceName, + 'chat', + { suppressToast: true } + ); if (!success && chatProvider === 'ollama') { llmRetryTimeoutRef.current = window.setTimeout(runTest, 5000); @@ -937,8 +1034,44 @@ export const RAGSettings = ({ // eslint-disable-next-line react-hooks/exhaustive-deps }, [chatProvider, ragSettings.LLM_BASE_URL, ragSettings.LLM_INSTANCE_NAME]); + useEffect(() => { + if (chatProvider !== 'ollama') { + initialChatOllamaTestRef.current = false; + return; + } + + if (initialChatOllamaTestRef.current) { + return; + } + + initialChatOllamaTestRef.current = true; + + const baseUrl = (ragSettings.LLM_BASE_URL && ragSettings.LLM_BASE_URL.trim().length > 0) + ? ragSettings.LLM_BASE_URL.trim() + : (llmInstanceConfig.url && llmInstanceConfig.url.trim().length > 0) + ? llmInstanceConfig.url.trim() + : DEFAULT_OLLAMA_URL; + + const instanceName = llmInstanceConfig.name?.trim().length + ? llmInstanceConfig.name + : 'LLM Instance'; + + setLLMStatus(prev => ({ ...prev, checking: true })); + setTimeout(() => { + manualTestConnection( + baseUrl, + setLLMStatus, + instanceName, + 'chat', + { suppressToast: true } + ); + }, 200); + }, [chatProvider, ragSettings.LLM_BASE_URL, llmInstanceConfig.url, llmInstanceConfig.name]); + + const initialEmbeddingOllamaTestRef = useRef(false); useEffect(() => { if (embeddingProvider !== 'ollama') { + initialEmbeddingOllamaTestRef.current = false; return; } @@ -960,7 +1093,13 @@ export const RAGSettings = ({ } const runTest = async () => { - const success = await manualTestConnection(baseUrl, setEmbeddingStatus, instanceName, 'embedding'); + const success = await manualTestConnection( + baseUrl, + setEmbeddingStatus, + instanceName, + 'embedding', + { suppressToast: true } + ); if (!success && embeddingProvider === 'ollama') { embeddingRetryTimeoutRef.current = window.setTimeout(runTest, 5000); @@ -979,6 +1118,40 @@ export const RAGSettings = ({ // eslint-disable-next-line react-hooks/exhaustive-deps }, [embeddingProvider, ragSettings.OLLAMA_EMBEDDING_URL, ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME]); + useEffect(() => { + if (embeddingProvider !== 'ollama') { + initialEmbeddingOllamaTestRef.current = false; + return; + } + + if (initialEmbeddingOllamaTestRef.current) { + return; + } + + initialEmbeddingOllamaTestRef.current = true; + + const baseUrl = (ragSettings.OLLAMA_EMBEDDING_URL && ragSettings.OLLAMA_EMBEDDING_URL.trim().length > 0) + ? ragSettings.OLLAMA_EMBEDDING_URL.trim() + : (embeddingInstanceConfig.url && embeddingInstanceConfig.url.trim().length > 0) + ? embeddingInstanceConfig.url.trim() + : DEFAULT_OLLAMA_URL; + + const instanceName = embeddingInstanceConfig.name?.trim().length + ? embeddingInstanceConfig.name + : 'Embedding Instance'; + + setEmbeddingStatus(prev => ({ ...prev, checking: true })); + setTimeout(() => { + manualTestConnection( + baseUrl, + setEmbeddingStatus, + instanceName, + 'embedding', + { suppressToast: true } + ); + }, 250); + }, [embeddingProvider, ragSettings.OLLAMA_EMBEDDING_URL, embeddingInstanceConfig.url, embeddingInstanceConfig.name]); + // Test Ollama connectivity when Settings page loads (scenario 4: page load) // This useEffect is placed after function definitions to ensure access to manualTestConnection useEffect(() => { @@ -995,33 +1168,70 @@ export const RAGSettings = ({ // Only run once when data is properly loaded and not run before if (!hasRunInitialTestRef.current && ragSettings.LLM_PROVIDER === 'ollama' && - Object.keys(ragSettings).length > 0 && - (llmInstanceConfig.url || embeddingInstanceConfig.url)) { + Object.keys(ragSettings).length > 0) { hasRunInitialTestRef.current = true; console.log('πŸ”„ Settings page loaded with Ollama - Testing connectivity'); - - // Test LLM instance if configured (use URL presence as the key indicator) - // Only test if URL is explicitly set in ragSettings, not just using the default - if (llmInstanceConfig.url && ragSettings.LLM_BASE_URL) { + + // Test LLM instance if a URL is available (either saved or default) + if (llmInstanceConfig.url) { setTimeout(() => { const instanceName = llmInstanceConfig.name || 'LLM Instance'; console.log('πŸ” Testing LLM instance on page load:', instanceName, llmInstanceConfig.url); - manualTestConnection(llmInstanceConfig.url, setLLMStatus, instanceName, 'chat'); + manualTestConnection( + llmInstanceConfig.url, + setLLMStatus, + instanceName, + 'chat', + { suppressToast: true } + ); }, 1000); // Increased delay to ensure component is fully ready } - + // If no saved URL, run tests against default endpoint + else { + setTimeout(() => { + const defaultInstanceName = 'Local Ollama (Default)'; + console.log('πŸ” Testing default Ollama chat instance on page load:', DEFAULT_OLLAMA_URL); + manualTestConnection( + DEFAULT_OLLAMA_URL, + setLLMStatus, + defaultInstanceName, + 'chat', + { suppressToast: true } + ); + }, 1000); + } + // Test Embedding instance if configured and different from LLM instance - // Only test if URL is explicitly set in ragSettings, not just using the default - if (embeddingInstanceConfig.url && ragSettings.OLLAMA_EMBEDDING_URL && + if (embeddingInstanceConfig.url && embeddingInstanceConfig.url !== llmInstanceConfig.url) { setTimeout(() => { const instanceName = embeddingInstanceConfig.name || 'Embedding Instance'; console.log('πŸ” Testing Embedding instance on page load:', instanceName, embeddingInstanceConfig.url); - manualTestConnection(embeddingInstanceConfig.url, setEmbeddingStatus, instanceName, 'embedding'); + manualTestConnection( + embeddingInstanceConfig.url, + setEmbeddingStatus, + instanceName, + 'embedding', + { suppressToast: true } + ); }, 1500); // Stagger the tests } - + // If embedding provider is also Ollama but no specific URL is set, test default as fallback + else if (embeddingProvider === 'ollama' && !embeddingInstanceConfig.url) { + setTimeout(() => { + const defaultEmbeddingName = 'Local Ollama (Default)'; + console.log('πŸ” Testing default Ollama embedding instance on page load:', DEFAULT_OLLAMA_URL); + manualTestConnection( + DEFAULT_OLLAMA_URL, + setEmbeddingStatus, + defaultEmbeddingName, + 'embedding', + { suppressToast: true } + ); + }, 1500); + } + // Fetch Ollama metrics after testing connections setTimeout(() => { console.log('πŸ“Š Fetching Ollama metrics on page load'); @@ -1188,10 +1398,14 @@ export const RAGSettings = ({ const embeddingStatus = getProviderStatus(embeddingProvider); const missingProviders = []; - if (chatStatus === 'missing') { + if (chatStatus === 'missing' && chatProvider !== selectedProviderKey) { missingProviders.push({ name: chatProvider, type: 'Chat', color: 'green' }); } - if (embeddingStatus === 'missing' && embeddingProvider !== chatProvider) { + if ( + embeddingStatus === 'missing' && + embeddingProvider !== chatProvider && + embeddingProvider !== selectedProviderKey + ) { missingProviders.push({ name: embeddingProvider, type: 'Embedding', color: 'purple' }); } @@ -1394,7 +1608,17 @@ export const RAGSettings = ({ size="sm" accentColor="green" className="text-white border-emerald-400 hover:bg-emerald-500/10" - onClick={() => manualTestConnection(llmInstanceConfig.url, setLLMStatus, llmInstanceConfig.name, 'chat')} + onClick={async () => { + const success = await manualTestConnection( + llmInstanceConfig.url, + setLLMStatus, + llmInstanceConfig.name, + 'chat' + ); + + setOllamaManualConfirmed(success); + setOllamaServerStatus(success ? 'online' : 'offline'); + }} disabled={llmStatus.checking} > {llmStatus.checking ? 'Testing...' : 'Test Connection'} @@ -1460,7 +1684,17 @@ export const RAGSettings = ({ variant="outline" size="sm" className="text-purple-300 border-purple-400 hover:bg-purple-500/10" - onClick={() => manualTestConnection(embeddingInstanceConfig.url, setEmbeddingStatus, embeddingInstanceConfig.name, 'embedding')} + onClick={async () => { + const success = await manualTestConnection( + embeddingInstanceConfig.url, + setEmbeddingStatus, + embeddingInstanceConfig.name, + 'embedding' + ); + + setOllamaManualConfirmed(success); + setOllamaServerStatus(success ? 'online' : 'offline'); + }} disabled={embeddingStatus.checking} > {embeddingStatus.checking ? 'Testing...' : 'Test Connection'} @@ -2028,7 +2262,16 @@ export const RAGSettings = ({ showToast('LLM instance updated successfully', 'success'); // Wait 1 second then automatically test connection and refresh models setTimeout(() => { - manualTestConnection(llmInstanceConfig.url, setLLMStatus, llmInstanceConfig.name, 'chat'); + manualTestConnection( + llmInstanceConfig.url, + setLLMStatus, + llmInstanceConfig.name, + 'chat', + { suppressToast: true } + ).then((success) => { + setOllamaManualConfirmed(success); + setOllamaServerStatus(success ? 'online' : 'offline'); + }); fetchOllamaMetrics(); // Refresh model metrics after saving }, 1000); }} @@ -2079,7 +2322,16 @@ export const RAGSettings = ({ showToast('Embedding instance updated successfully', 'success'); // Wait 1 second then automatically test connection and refresh models setTimeout(() => { - manualTestConnection(embeddingInstanceConfig.url, setEmbeddingStatus, embeddingInstanceConfig.name, 'embedding'); + manualTestConnection( + embeddingInstanceConfig.url, + setEmbeddingStatus, + embeddingInstanceConfig.name, + 'embedding', + { suppressToast: true } + ).then((success) => { + setOllamaManualConfirmed(success); + setOllamaServerStatus(success ? 'online' : 'offline'); + }); fetchOllamaMetrics(); // Refresh model metrics after saving }, 1000); }} From 810c80f761dbffd5b2715f8b12157a4b645e5ab1 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 08:01:08 -0500 Subject: [PATCH 17/28] ready for review, fixed som error warnings and consildated ollama status health checks --- .../src/components/settings/RAGSettings.tsx | 148 ++++++------------ 1 file changed, 48 insertions(+), 100 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index f9d3ce5301..9b9c1b2f2a 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -970,10 +970,10 @@ const manualTestConnection = async ( let providerAlertClassName = ''; if (selectedProviderKey === 'ollama') { - if (selectedProviderStatus === 'missing' || ollamaServerStatus === 'offline' || ollamaServerStatus === 'unknown') { + if (ollamaServerStatus === 'offline') { providerAlertMessage = 'Local Ollama service is not running. Start the Ollama server and ensure it is reachable at the configured URL.'; providerAlertClassName = providerErrorAlertStyle; - } else if (selectedProviderStatus === 'partial') { + } else if (selectedProviderStatus === 'partial' && ollamaServerStatus === 'online') { providerAlertMessage = 'Local Ollama service detected. Click "Test Connection" to confirm model availability.'; providerAlertClassName = providerWarningAlertStyle; } @@ -984,31 +984,34 @@ const manualTestConnection = async ( const shouldShowProviderAlert = Boolean(providerAlertMessage); - const initialChatOllamaTestRef = useRef(false); useEffect(() => { if (chatProvider !== 'ollama') { - initialChatOllamaTestRef.current = false; + if (llmRetryTimeoutRef.current) { + clearTimeout(llmRetryTimeoutRef.current); + llmRetryTimeoutRef.current = null; + } return; } - const baseUrl = (ragSettings.LLM_BASE_URL && ragSettings.LLM_BASE_URL.trim().length > 0) - ? ragSettings.LLM_BASE_URL.trim() - : DEFAULT_OLLAMA_URL; + const baseUrl = ( + ragSettings.LLM_BASE_URL?.trim() || + llmInstanceConfig.url?.trim() || + DEFAULT_OLLAMA_URL + ); if (!baseUrl) { return; } - const instanceName = (ragSettings.LLM_INSTANCE_NAME && ragSettings.LLM_INSTANCE_NAME.trim().length > 0) - ? ragSettings.LLM_INSTANCE_NAME.trim() + const instanceName = llmInstanceConfig.name?.trim().length + ? llmInstanceConfig.name : 'LLM Instance'; - if (llmRetryTimeoutRef.current) { - clearTimeout(llmRetryTimeoutRef.current); - llmRetryTimeoutRef.current = null; - } + let cancelled = false; const runTest = async () => { + if (cancelled) return; + const success = await manualTestConnection( baseUrl, setLLMStatus, @@ -1017,82 +1020,56 @@ const manualTestConnection = async ( { suppressToast: true } ); - if (!success && chatProvider === 'ollama') { + if (!success && chatProvider === 'ollama' && !cancelled) { llmRetryTimeoutRef.current = window.setTimeout(runTest, 5000); } }; + if (llmRetryTimeoutRef.current) { + clearTimeout(llmRetryTimeoutRef.current); + llmRetryTimeoutRef.current = null; + } + setLLMStatus(prev => ({ ...prev, checking: true })); - llmRetryTimeoutRef.current = window.setTimeout(runTest, 100); + runTest(); return () => { + cancelled = true; if (llmRetryTimeoutRef.current) { clearTimeout(llmRetryTimeoutRef.current); llmRetryTimeoutRef.current = null; } }; - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [chatProvider, ragSettings.LLM_BASE_URL, ragSettings.LLM_INSTANCE_NAME]); - - useEffect(() => { - if (chatProvider !== 'ollama') { - initialChatOllamaTestRef.current = false; - return; - } - - if (initialChatOllamaTestRef.current) { - return; - } - - initialChatOllamaTestRef.current = true; - - const baseUrl = (ragSettings.LLM_BASE_URL && ragSettings.LLM_BASE_URL.trim().length > 0) - ? ragSettings.LLM_BASE_URL.trim() - : (llmInstanceConfig.url && llmInstanceConfig.url.trim().length > 0) - ? llmInstanceConfig.url.trim() - : DEFAULT_OLLAMA_URL; - - const instanceName = llmInstanceConfig.name?.trim().length - ? llmInstanceConfig.name - : 'LLM Instance'; - - setLLMStatus(prev => ({ ...prev, checking: true })); - setTimeout(() => { - manualTestConnection( - baseUrl, - setLLMStatus, - instanceName, - 'chat', - { suppressToast: true } - ); - }, 200); - }, [chatProvider, ragSettings.LLM_BASE_URL, llmInstanceConfig.url, llmInstanceConfig.name]); + }, [chatProvider, ragSettings.LLM_BASE_URL, ragSettings.LLM_INSTANCE_NAME, llmInstanceConfig.url, llmInstanceConfig.name]); - const initialEmbeddingOllamaTestRef = useRef(false); useEffect(() => { if (embeddingProvider !== 'ollama') { - initialEmbeddingOllamaTestRef.current = false; + if (embeddingRetryTimeoutRef.current) { + clearTimeout(embeddingRetryTimeoutRef.current); + embeddingRetryTimeoutRef.current = null; + } return; } - const baseUrl = (ragSettings.OLLAMA_EMBEDDING_URL && ragSettings.OLLAMA_EMBEDDING_URL.trim().length > 0) - ? ragSettings.OLLAMA_EMBEDDING_URL.trim() - : DEFAULT_OLLAMA_URL; + const baseUrl = ( + ragSettings.OLLAMA_EMBEDDING_URL?.trim() || + embeddingInstanceConfig.url?.trim() || + DEFAULT_OLLAMA_URL + ); if (!baseUrl) { return; } - const instanceName = (ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME && ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME.trim().length > 0) - ? ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME.trim() + const instanceName = embeddingInstanceConfig.name?.trim().length + ? embeddingInstanceConfig.name : 'Embedding Instance'; - if (embeddingRetryTimeoutRef.current) { - clearTimeout(embeddingRetryTimeoutRef.current); - embeddingRetryTimeoutRef.current = null; - } + let cancelled = false; const runTest = async () => { + if (cancelled) return; + const success = await manualTestConnection( baseUrl, setEmbeddingStatus, @@ -1101,56 +1078,27 @@ const manualTestConnection = async ( { suppressToast: true } ); - if (!success && embeddingProvider === 'ollama') { + if (!success && embeddingProvider === 'ollama' && !cancelled) { embeddingRetryTimeoutRef.current = window.setTimeout(runTest, 5000); } }; + if (embeddingRetryTimeoutRef.current) { + clearTimeout(embeddingRetryTimeoutRef.current); + embeddingRetryTimeoutRef.current = null; + } + setEmbeddingStatus(prev => ({ ...prev, checking: true })); - embeddingRetryTimeoutRef.current = window.setTimeout(runTest, 100); + runTest(); return () => { + cancelled = true; if (embeddingRetryTimeoutRef.current) { clearTimeout(embeddingRetryTimeoutRef.current); embeddingRetryTimeoutRef.current = null; } }; - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [embeddingProvider, ragSettings.OLLAMA_EMBEDDING_URL, ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME]); - - useEffect(() => { - if (embeddingProvider !== 'ollama') { - initialEmbeddingOllamaTestRef.current = false; - return; - } - - if (initialEmbeddingOllamaTestRef.current) { - return; - } - - initialEmbeddingOllamaTestRef.current = true; - - const baseUrl = (ragSettings.OLLAMA_EMBEDDING_URL && ragSettings.OLLAMA_EMBEDDING_URL.trim().length > 0) - ? ragSettings.OLLAMA_EMBEDDING_URL.trim() - : (embeddingInstanceConfig.url && embeddingInstanceConfig.url.trim().length > 0) - ? embeddingInstanceConfig.url.trim() - : DEFAULT_OLLAMA_URL; - - const instanceName = embeddingInstanceConfig.name?.trim().length - ? embeddingInstanceConfig.name - : 'Embedding Instance'; - - setEmbeddingStatus(prev => ({ ...prev, checking: true })); - setTimeout(() => { - manualTestConnection( - baseUrl, - setEmbeddingStatus, - instanceName, - 'embedding', - { suppressToast: true } - ); - }, 250); - }, [embeddingProvider, ragSettings.OLLAMA_EMBEDDING_URL, embeddingInstanceConfig.url, embeddingInstanceConfig.name]); + }, [embeddingProvider, ragSettings.OLLAMA_EMBEDDING_URL, ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME, embeddingInstanceConfig.url, embeddingInstanceConfig.name]); // Test Ollama connectivity when Settings page loads (scenario 4: page load) // This useEffect is placed after function definitions to ensure access to manualTestConnection From 11388d6db82645a360b1cbd68938934448c649b0 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 08:18:50 -0500 Subject: [PATCH 18/28] fixed FAILED test_async_embedding_service.py --- .../services/embeddings/embedding_service.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py index b5dc578f95..bbd0788270 100644 --- a/python/src/server/services/embeddings/embedding_service.py +++ b/python/src/server/services/embeddings/embedding_service.py @@ -5,6 +5,7 @@ """ import asyncio +import inspect import os from abc import ABC, abstractmethod from dataclasses import dataclass, field @@ -180,6 +181,12 @@ def _get_embedding_adapter(provider: str, client: Any) -> EmbeddingProviderAdapt return GoogleEmbeddingAdapter() return OpenAICompatibleEmbeddingAdapter(client) + +async def _maybe_await(value: Any) -> Any: + """Await the value if it is awaitable, otherwise return as-is.""" + + return await value if inspect.isawaitable(value) else value + # Provider-aware client factory get_openai_client = get_llm_client @@ -301,8 +308,14 @@ async def create_embeddings_batch( "create_embeddings_batch", text_count=len(texts), total_chars=sum(len(t) for t in texts) ) as span: try: - embedding_config = await credential_service.get_active_provider(service_type="embedding") - embedding_provider = embedding_config.get("provider") + embedding_config = await _maybe_await( + credential_service.get_active_provider(service_type="embedding") + ) + + embedding_provider = provider or embedding_config.get("provider") + + if not isinstance(embedding_provider, str) or not embedding_provider.strip(): + embedding_provider = "openai" if not embedding_provider: search_logger.error("No embedding provider configured") @@ -312,8 +325,8 @@ async def create_embeddings_batch( async with get_llm_client(provider=embedding_provider, use_embedding_provider=True) as client: # Load batch size and dimensions from settings try: - rag_settings = await credential_service.get_credentials_by_category( - "rag_strategy" + rag_settings = await _maybe_await( + credential_service.get_credentials_by_category("rag_strategy") ) batch_size = int(rag_settings.get("EMBEDDING_BATCH_SIZE", "100")) embedding_dimensions = int(rag_settings.get("EMBEDDING_DIMENSIONS", "1536")) From aa354caf06667c2161781435c4d264d475d839b3 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 09:04:58 -0500 Subject: [PATCH 19/28] code rabbit fixes --- .../src/components/settings/RAGSettings.tsx | 23 ++++++++----------- .../src/services/credentialsService.ts | 2 +- .../crawling/code_extraction_service.py | 22 ++++++++++++++---- .../src/server/services/credential_service.py | 9 +++++++- .../services/embeddings/embedding_service.py | 10 ++++++-- .../services/storage/code_storage_service.py | 4 ++-- 6 files changed, 46 insertions(+), 24 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 9b9c1b2f2a..db195bcec2 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -860,12 +860,14 @@ const manualTestConnection = async ( // Fetch Ollama metrics only when Ollama provider is initially selected (not on URL changes during typing) React.useEffect(() => { - if (ragSettings.LLM_PROVIDER === 'ollama') { + if ( + ragSettings.LLM_PROVIDER === 'ollama' || embeddingProvider === 'ollama' + ) { const currentProvider = ragSettings.LLM_PROVIDER; const lastProvider = lastMetricsFetchRef.current.provider; - + // Only fetch if provider changed to Ollama (scenario 1: user clicks on Ollama Provider) - if (currentProvider !== lastProvider) { + if (currentProvider !== lastProvider || embeddingProvider === 'ollama') { lastMetricsFetchRef.current = { provider: currentProvider, llmUrl: llmInstanceConfig.url, @@ -877,7 +879,8 @@ const manualTestConnection = async ( fetchOllamaMetrics(); } } - }, [ragSettings.LLM_PROVIDER]); // Only watch provider changes, not URL changes + }, [ragSettings.LLM_PROVIDER, embeddingProvider, llmInstanceConfig.url, llmInstanceConfig.name, + embeddingInstanceConfig.url, embeddingInstanceConfig.name]); // Include embeddingProvider in deps // Function to check if a provider is properly configured const getProviderStatus = (providerKey: string): 'configured' | 'missing' | 'partial' => { @@ -893,15 +896,7 @@ const manualTestConnection = async ( const openAIConnected = providerConnectionStatus['openai']?.connected || false; const isChecking = providerConnectionStatus['openai']?.checking || false; - console.log('πŸ” OpenAI status check:', { - openAIKey, - keyValue: keyValue ? `${keyValue.substring(0, 10)}...` : keyValue, - hasValue: !!keyValue, - hasOpenAIKey, - openAIConnected, - isChecking, - allCredentials: Object.keys(apiCredentials) - }); + // Intentionally avoid logging API key material. if (!hasOpenAIKey) return 'missing'; if (isChecking) return 'partial'; @@ -2386,7 +2381,7 @@ function getDisplayedChatModel(ragSettings: any): string { } function getDisplayedEmbeddingModel(ragSettings: any): string { - const provider = ragSettings.LLM_PROVIDER || 'openai'; + const provider = ragSettings.EMBEDDING_PROVIDER || ragSettings.LLM_PROVIDER || 'openai'; const embeddingModel = ragSettings.EMBEDDING_MODEL; // Always prioritize user input to allow editing diff --git a/archon-ui-main/src/services/credentialsService.ts b/archon-ui-main/src/services/credentialsService.ts index b2d2da52fa..8287be76f8 100644 --- a/archon-ui-main/src/services/credentialsService.ts +++ b/archon-ui-main/src/services/credentialsService.ts @@ -195,7 +195,7 @@ class CredentialsService { USE_HYBRID_SEARCH: true, USE_AGENTIC_RAG: true, USE_RERANKING: true, - MODEL_CHOICE: "gpt-4.1-nano", + MODEL_CHOICE: "gpt-4o-mini", LLM_PROVIDER: "openai", LLM_BASE_URL: "", LLM_INSTANCE_NAME: "", diff --git a/python/src/server/services/crawling/code_extraction_service.py b/python/src/server/services/crawling/code_extraction_service.py index 21c11b1aaf..f21ee3eee7 100644 --- a/python/src/server/services/crawling/code_extraction_service.py +++ b/python/src/server/services/crawling/code_extraction_service.py @@ -197,8 +197,15 @@ async def extraction_progress(data: dict): if progress_callback: async def summary_progress(data: dict): # Scale progress to 20-90% range - raw_progress = data.get("progress", data.get("percentage", 0)) - scaled_progress = 20 + int(raw_progress * 0.7) # 20-90% + raw = data.get("progress", data.get("percentage", 0)) + try: + raw_num = float(raw) + except (TypeError, ValueError): + raw_num = 0.0 + if 0.0 <= raw_num <= 1.0: + raw_num *= 100.0 + # 20-90% with clamping + scaled_progress = min(90, max(20, 20 + int(raw_num * 0.7))) data["progress"] = scaled_progress await progress_callback(data) summary_callback = summary_progress @@ -216,8 +223,15 @@ async def summary_progress(data: dict): if progress_callback: async def storage_progress(data: dict): # Scale progress to 90-100% range - raw_progress = data.get("progress", data.get("percentage", 0)) - scaled_progress = 90 + int(raw_progress * 0.1) # 90-100% + raw = data.get("progress", data.get("percentage", 0)) + try: + raw_num = float(raw) + except (TypeError, ValueError): + raw_num = 0.0 + if 0.0 <= raw_num <= 1.0: + raw_num *= 100.0 + # 90-100% with clamping + scaled_progress = min(100, max(90, 90 + int(raw_num * 0.1))) data["progress"] = scaled_progress await progress_callback(data) storage_callback = storage_progress diff --git a/python/src/server/services/credential_service.py b/python/src/server/services/credential_service.py index e39f793062..a8aee8491d 100644 --- a/python/src/server/services/credential_service.py +++ b/python/src/server/services/credential_service.py @@ -442,12 +442,19 @@ async def get_active_provider(self, service_type: str = "llm") -> dict[str, Any] # First check for explicit EMBEDDING_PROVIDER setting (new split provider approach) explicit_embedding_provider = rag_settings.get("EMBEDDING_PROVIDER") - if explicit_embedding_provider and explicit_embedding_provider != "": + # Validate that embedding provider actually supports embeddings + embedding_capable_providers = {"openai", "google", "ollama"} + + if (explicit_embedding_provider and + explicit_embedding_provider != "" and + explicit_embedding_provider in embedding_capable_providers): # Use the explicitly set embedding provider provider = explicit_embedding_provider logger.debug(f"Using explicit embedding provider: '{provider}'") else: # Fall back to OpenAI as default embedding provider for backward compatibility + if explicit_embedding_provider and explicit_embedding_provider not in embedding_capable_providers: + logger.warning(f"Invalid embedding provider '{explicit_embedding_provider}' doesn't support embeddings, defaulting to OpenAI") provider = "openai" logger.debug(f"No explicit embedding provider set, defaulting to OpenAI for backward compatibility") else: diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py index bbd0788270..1f1837d865 100644 --- a/python/src/server/services/embeddings/embedding_service.py +++ b/python/src/server/services/embeddings/embedding_service.py @@ -153,13 +153,19 @@ async def _fetch_single_embedding( model: str, text: str, ) -> list[float]: - url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:embedContent" + if model.startswith("models/"): + url_model = model[len("models/") :] + payload_model = model + else: + url_model = model + payload_model = f"models/{model}" + url = f"https://generativelanguage.googleapis.com/v1beta/models/{url_model}:embedContent" headers = { "x-goog-api-key": api_key, "Content-Type": "application/json", } payload = { - "model": f"models/{model}", + "model": payload_model, "content": {"parts": [{"text": text}]}, } diff --git a/python/src/server/services/storage/code_storage_service.py b/python/src/server/services/storage/code_storage_service.py index a993bc70b8..2bcbfbdcd4 100644 --- a/python/src/server/services/storage/code_storage_service.py +++ b/python/src/server/services/storage/code_storage_service.py @@ -1183,8 +1183,8 @@ async def add_code_examples_to_supabase( # Use original combined texts batch_texts = combined_texts - # Create embeddings for the batch - result = await create_embeddings_batch(batch_texts, provider=provider) + # Create embeddings for the batch (let credential service determine provider) + result = await create_embeddings_batch(batch_texts) # Log any failures if result.has_failures: From 64835ec4e690e68f2274c5168fded8fe6a67714f Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 10:35:33 -0500 Subject: [PATCH 20/28] Separated the code-summary LLM provider from the embedding provider, so code example storage now forwards a dedicated embedding provider override end-to-end without hijacking the embedding pipeline. this fixes code rabbits (Preserve provider override in create_embeddings_batch) suggesting --- .../crawling/code_extraction_service.py | 18 +++++++++++++++++- .../services/crawling/crawling_service.py | 18 ++++++++++++++++-- .../crawling/document_storage_operations.py | 10 +++++++++- .../services/storage/code_storage_service.py | 9 ++++++--- python/tests/test_code_extraction_source_id.py | 4 ++-- 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/python/src/server/services/crawling/code_extraction_service.py b/python/src/server/services/crawling/code_extraction_service.py index f21ee3eee7..d3fa2f07f8 100644 --- a/python/src/server/services/crawling/code_extraction_service.py +++ b/python/src/server/services/crawling/code_extraction_service.py @@ -140,6 +140,7 @@ async def extract_and_store_code_examples( progress_callback: Callable | None = None, cancellation_check: Callable[[], None] | None = None, provider: str | None = None, + embedding_provider: str | None = None, ) -> int: """ Extract code examples from crawled documents and store them. @@ -150,6 +151,8 @@ async def extract_and_store_code_examples( source_id: The unique source_id for all documents progress_callback: Optional async callback for progress updates cancellation_check: Optional function to check for cancellation + provider: Optional LLM provider identifier for summary generation + embedding_provider: Optional embedding provider override for vector creation Returns: Number of code examples stored @@ -238,7 +241,11 @@ async def storage_progress(data: dict): # Store code examples in database return await self._store_code_examples( - storage_data, url_to_full_document, storage_callback, provider + storage_data, + url_to_full_document, + storage_callback, + provider, + embedding_provider, ) async def _extract_code_blocks_from_documents( @@ -1684,12 +1691,20 @@ async def _store_code_examples( url_to_full_document: dict[str, str], progress_callback: Callable | None = None, provider: str | None = None, + embedding_provider: str | None = None, ) -> int: """ Store code examples in the database. Returns: Number of code examples stored + + Args: + storage_data: Prepared code example payloads + url_to_full_document: Mapping of URLs to their full document content + progress_callback: Optional callback for progress updates + provider: Optional LLM provider identifier for summaries + embedding_provider: Optional embedding provider override for vector storage """ # Create progress callback for storage phase storage_progress_callback = None @@ -1727,6 +1742,7 @@ async def storage_callback(data: dict): url_to_full_document=url_to_full_document, progress_callback=storage_progress_callback, provider=provider, + embedding_provider=embedding_provider, ) # Report completion of code extraction/storage phase diff --git a/python/src/server/services/crawling/crawling_service.py b/python/src/server/services/crawling/crawling_service.py index 69c6571909..acafc0a50e 100644 --- a/python/src/server/services/crawling/crawling_service.py +++ b/python/src/server/services/crawling/crawling_service.py @@ -14,6 +14,7 @@ from ...config.logfire_config import get_logger, safe_logfire_error, safe_logfire_info from ...utils import get_supabase_client from ...utils.progress.progress_tracker import ProgressTracker +from ..credential_service import credential_service # Import strategies # Import operations @@ -477,15 +478,27 @@ async def code_progress_callback(data: dict): try: # Extract provider from request or use credential service default provider = request.get("provider") + embedding_provider = None + if not provider: try: - from ..credential_service import credential_service provider_config = await credential_service.get_active_provider("llm") provider = provider_config.get("provider", "openai") except Exception as e: - logger.warning(f"Failed to get provider from credential service: {e}, defaulting to openai") + logger.warning( + f"Failed to get provider from credential service: {e}, defaulting to openai" + ) provider = "openai" + try: + embedding_config = await credential_service.get_active_provider("embedding") + embedding_provider = embedding_config.get("provider") + except Exception as e: + logger.warning( + f"Failed to get embedding provider from credential service: {e}. Using configured default." + ) + embedding_provider = None + code_examples_count = await self.doc_storage_ops.extract_and_store_code_examples( crawl_results, storage_results["url_to_full_document"], @@ -493,6 +506,7 @@ async def code_progress_callback(data: dict): code_progress_callback, self._check_cancellation, provider, + embedding_provider, ) except RuntimeError as e: # Code extraction failed, continue crawl with warning diff --git a/python/src/server/services/crawling/document_storage_operations.py b/python/src/server/services/crawling/document_storage_operations.py index 88ed8e80df..8bfa4560a3 100644 --- a/python/src/server/services/crawling/document_storage_operations.py +++ b/python/src/server/services/crawling/document_storage_operations.py @@ -352,6 +352,7 @@ async def extract_and_store_code_examples( progress_callback: Callable | None = None, cancellation_check: Callable[[], None] | None = None, provider: str | None = None, + embedding_provider: str | None = None, ) -> int: """ Extract code examples from crawled documents and store them. @@ -363,12 +364,19 @@ async def extract_and_store_code_examples( progress_callback: Optional callback for progress updates cancellation_check: Optional function to check for cancellation provider: Optional LLM provider to use for code summaries + embedding_provider: Optional embedding provider override for code example embeddings Returns: Number of code examples stored """ result = await self.code_extraction_service.extract_and_store_code_examples( - crawl_results, url_to_full_document, source_id, progress_callback, cancellation_check, provider + crawl_results, + url_to_full_document, + source_id, + progress_callback, + cancellation_check, + provider, + embedding_provider, ) return result diff --git a/python/src/server/services/storage/code_storage_service.py b/python/src/server/services/storage/code_storage_service.py index 2bcbfbdcd4..8e237f7ea0 100644 --- a/python/src/server/services/storage/code_storage_service.py +++ b/python/src/server/services/storage/code_storage_service.py @@ -1091,6 +1091,7 @@ async def add_code_examples_to_supabase( url_to_full_document: dict[str, str] | None = None, progress_callback: Callable | None = None, provider: str | None = None, + embedding_provider: str | None = None, ): """ Add code examples to the Supabase code_examples table in batches. @@ -1105,6 +1106,8 @@ async def add_code_examples_to_supabase( batch_size: Size of each batch for insertion url_to_full_document: Optional mapping of URLs to full document content progress_callback: Optional async callback for progress updates + provider: Optional LLM provider used for summary generation tracking + embedding_provider: Optional embedding provider override for vector generation """ if not urls: return @@ -1183,8 +1186,8 @@ async def add_code_examples_to_supabase( # Use original combined texts batch_texts = combined_texts - # Create embeddings for the batch (let credential service determine provider) - result = await create_embeddings_batch(batch_texts) + # Create embeddings for the batch (optionally overriding the embedding provider) + result = await create_embeddings_batch(batch_texts, provider=embedding_provider) # Log any failures if result.has_failures: @@ -1201,7 +1204,7 @@ async def add_code_examples_to_supabase( from ..llm_provider_service import get_embedding_model # Get embedding model name - embedding_model_name = await get_embedding_model(provider=provider) + embedding_model_name = await get_embedding_model(provider=embedding_provider) # Get LLM chat model (used for code summaries and contextual embeddings if enabled) llm_chat_model = None diff --git a/python/tests/test_code_extraction_source_id.py b/python/tests/test_code_extraction_source_id.py index 05405ee790..7899c7fc58 100644 --- a/python/tests/test_code_extraction_source_id.py +++ b/python/tests/test_code_extraction_source_id.py @@ -111,8 +111,8 @@ async def test_document_storage_passes_source_id(self): assert args[2] == source_id assert args[3] is None assert args[4] is None - if len(args) > 5: - assert args[5] is None + assert args[5] is None + assert args[6] is None assert result == 5 @pytest.mark.asyncio From fedf5957f68b053b5a5a93940718f261a7bf5e5b Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 11:21:18 -0500 Subject: [PATCH 21/28] - Swapped API credential storage to booleans so decrypted keys never sit in React state (archon-ui-main/src/components/ settings/RAGSettings.tsx). - Normalized Ollama instance URLs and gated the metrics effect on real state changes to avoid mis-counts and duplicate fetches (RAGSettings.tsx). - Tightened crawl progress scaling and indented-block parsing to handle min_length=None safely (python/src/server/ services/crawling/code_extraction_service.py:160, python/src/server/services/crawling/code_extraction_service.py:911). - Added provider-agnostic embedding rate-limit retries so Google and friends back off gracefully (python/src/server/ services/embeddings/embedding_service.py:427). - Made the orchestration registry async + thread-safe and updated every caller to await it (python/src/server/services/ crawling/crawling_service.py:34, python/src/server/api_routes/knowledge_api.py:1291). --- .../src/components/settings/RAGSettings.tsx | 105 ++++++++++-------- python/src/server/api_routes/knowledge_api.py | 4 +- .../crawling/code_extraction_service.py | 28 ++++- .../services/crawling/crawling_service.py | 25 +++-- .../services/embeddings/embedding_service.py | 11 ++ 5 files changed, 106 insertions(+), 67 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index db195bcec2..618c1ccfa2 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -118,6 +118,16 @@ const isProviderKey = (value: unknown): value is ProviderKey => // Default base URL for Ollama instances when not explicitly configured const DEFAULT_OLLAMA_URL = 'http://host.docker.internal:11434/v1'; +const normalizeBaseUrl = (url?: string | null): string | null => { + if (!url) return null; + const trimmed = url.trim(); + if (!trimmed) return null; + + let normalized = trimmed.replace(/\/+$/, ''); + normalized = normalized.replace(/\/v1$/i, ''); + return normalized || null; +}; + interface RAGSettingsProps { ragSettings: { MODEL_CHOICE: string; @@ -282,12 +292,10 @@ export const RAGSettings = ({ const keyNames = ['OPENAI_API_KEY', 'GOOGLE_API_KEY', 'ANTHROPIC_API_KEY']; const statusResults = await credentialsService.checkCredentialStatus(keyNames); - const credentials: {[key: string]: string} = {}; - + const credentials: {[key: string]: boolean} = {}; + for (const [key, result] of Object.entries(statusResults)) { - if (result.has_value && result.value && result.value.trim().length > 0) { - credentials[key] = result.value; - } + credentials[key] = !!result.has_value; } console.log('πŸ”‘ Loaded API credentials for status checking:', Object.keys(credentials)); @@ -311,12 +319,10 @@ export const RAGSettings = ({ const keyNames = ['OPENAI_API_KEY', 'GOOGLE_API_KEY', 'ANTHROPIC_API_KEY']; const statusResults = await credentialsService.checkCredentialStatus(keyNames); - const credentials: {[key: string]: string} = {}; - + const credentials: {[key: string]: boolean} = {}; + for (const [key, result] of Object.entries(statusResults)) { - if (result.has_value && result.value && result.value.trim().length > 0) { - credentials[key] = result.value; - } + credentials[key] = !!result.has_value; } console.log('πŸ”„ Reloaded API credentials for status checking:', Object.keys(credentials)); @@ -448,7 +454,7 @@ export const RAGSettings = ({ const embeddingRetryTimeoutRef = useRef(null); // API key credentials for status checking - const [apiCredentials, setApiCredentials] = useState<{[key: string]: string}>({}); + const [apiCredentials, setApiCredentials] = useState<{[key: string]: boolean}>({}); // Provider connection status tracking const [providerConnectionStatus, setProviderConnectionStatus] = useState<{ [key: string]: { connected: boolean; checking: boolean; lastChecked?: Date } @@ -732,11 +738,14 @@ const manualTestConnection = async ( try { setOllamaMetrics(prev => ({ ...prev, loading: true })); - // Prepare instance URLs for the API call - const instanceUrls = []; - if (llmInstanceConfig.url) instanceUrls.push(llmInstanceConfig.url); - if (embeddingInstanceConfig.url && embeddingInstanceConfig.url !== llmInstanceConfig.url) { - instanceUrls.push(embeddingInstanceConfig.url); + // Prepare normalized instance URLs for the API call + const instanceUrls: string[] = []; + const llmUrlBase = normalizeBaseUrl(llmInstanceConfig.url); + const embUrlBase = normalizeBaseUrl(embeddingInstanceConfig.url); + + if (llmUrlBase) instanceUrls.push(llmUrlBase); + if (embUrlBase && embUrlBase !== llmUrlBase) { + instanceUrls.push(embUrlBase); } if (instanceUrls.length === 0) { @@ -760,18 +769,18 @@ const manualTestConnection = async ( // Count models for LLM instance const llmChatModels = allChatModels.filter((model: any) => - model.instance_url === llmInstanceConfig.url + normalizeBaseUrl(model.instance_url) === llmUrlBase ); const llmEmbeddingModels = allEmbeddingModels.filter((model: any) => - model.instance_url === llmInstanceConfig.url + normalizeBaseUrl(model.instance_url) === llmUrlBase ); - + // Count models for Embedding instance const embChatModels = allChatModels.filter((model: any) => - model.instance_url === embeddingInstanceConfig.url + normalizeBaseUrl(model.instance_url) === embUrlBase ); const embEmbeddingModels = allEmbeddingModels.filter((model: any) => - model.instance_url === embeddingInstanceConfig.url + normalizeBaseUrl(model.instance_url) === embUrlBase ); // Calculate totals @@ -810,7 +819,7 @@ const manualTestConnection = async ( // Use refs to prevent infinite connection testing const lastTestedLLMConfigRef = useRef({ url: '', name: '', provider: '' }); const lastTestedEmbeddingConfigRef = useRef({ url: '', name: '', provider: '' }); - const lastMetricsFetchRef = useRef({ provider: '', llmUrl: '', embUrl: '', llmOnline: false, embOnline: false }); + const lastMetricsFetchRef = useRef({ provider: '', embProvider: '', llmUrl: '', embUrl: '', llmOnline: false, embOnline: false }); // Auto-testing disabled to prevent API calls on every keystroke per user request // Connection testing should only happen on manual "Test Connection" or "Save Changes" button clicks @@ -858,29 +867,31 @@ const manualTestConnection = async ( // } // }, [embeddingInstanceConfig.url, embeddingInstanceConfig.name, ragSettings.LLM_PROVIDER]); - // Fetch Ollama metrics only when Ollama provider is initially selected (not on URL changes during typing) React.useEffect(() => { - if ( - ragSettings.LLM_PROVIDER === 'ollama' || embeddingProvider === 'ollama' - ) { - const currentProvider = ragSettings.LLM_PROVIDER; - const lastProvider = lastMetricsFetchRef.current.provider; - - // Only fetch if provider changed to Ollama (scenario 1: user clicks on Ollama Provider) - if (currentProvider !== lastProvider || embeddingProvider === 'ollama') { - lastMetricsFetchRef.current = { - provider: currentProvider, - llmUrl: llmInstanceConfig.url, - embUrl: embeddingInstanceConfig.url, - llmOnline: llmStatus.online, - embOnline: embeddingStatus.online - }; - console.log('πŸ”„ Fetching Ollama metrics - Provider selected'); - fetchOllamaMetrics(); - } + const current = { + provider: ragSettings.LLM_PROVIDER, + embProvider: embeddingProvider, + llmUrl: normalizeBaseUrl(llmInstanceConfig.url) ?? '', + embUrl: normalizeBaseUrl(embeddingInstanceConfig.url) ?? '', + llmOnline: llmStatus.online, + embOnline: embeddingStatus.online, + }; + const last = lastMetricsFetchRef.current; + + const meaningfulChange = + current.provider !== last.provider || + current.embProvider !== last.embProvider || + current.llmUrl !== last.llmUrl || + current.embUrl !== last.embUrl || + current.llmOnline !== last.llmOnline || + current.embOnline !== last.embOnline; + + if ((current.provider === 'ollama' || current.embProvider === 'ollama') && meaningfulChange) { + lastMetricsFetchRef.current = current; + console.log('πŸ”„ Fetching Ollama metrics - state changed'); + fetchOllamaMetrics(); } - }, [ragSettings.LLM_PROVIDER, embeddingProvider, llmInstanceConfig.url, llmInstanceConfig.name, - embeddingInstanceConfig.url, embeddingInstanceConfig.name]); // Include embeddingProvider in deps + }, [ragSettings.LLM_PROVIDER, embeddingProvider, llmStatus.online, embeddingStatus.online]); // Function to check if a provider is properly configured const getProviderStatus = (providerKey: string): 'configured' | 'missing' | 'partial' => { @@ -888,9 +899,7 @@ const manualTestConnection = async ( case 'openai': // Check if OpenAI API key is configured (case insensitive) const openAIKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === 'OPENAI_API_KEY'); - const keyValue = openAIKey ? apiCredentials[openAIKey] : undefined; - // Don't consider encrypted placeholders as valid API keys for connection testing - const hasOpenAIKey = openAIKey && keyValue && keyValue.trim().length > 0 && !keyValue.includes('[ENCRYPTED]'); + const hasOpenAIKey = openAIKey ? !!apiCredentials[openAIKey] : false; // Only show configured if we have both API key AND confirmed connection const openAIConnected = providerConnectionStatus['openai']?.connected || false; @@ -905,9 +914,7 @@ const manualTestConnection = async ( case 'google': // Check if Google API key is configured (case insensitive) const googleKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === 'GOOGLE_API_KEY'); - const googleKeyValue = googleKey ? apiCredentials[googleKey] : undefined; - // Don't consider encrypted placeholders as valid API keys for connection testing - const hasGoogleKey = googleKey && googleKeyValue && googleKeyValue.trim().length > 0 && !googleKeyValue.includes('[ENCRYPTED]'); + const hasGoogleKey = googleKey ? !!apiCredentials[googleKey] : false; // Only show configured if we have both API key AND confirmed connection const googleConnected = providerConnectionStatus['google']?.connected || false; diff --git a/python/src/server/api_routes/knowledge_api.py b/python/src/server/api_routes/knowledge_api.py index 1f26dace8f..47a3d9db2e 100644 --- a/python/src/server/api_routes/knowledge_api.py +++ b/python/src/server/api_routes/knowledge_api.py @@ -1288,7 +1288,7 @@ async def stop_crawl_task(progress_id: str): found = False # Step 1: Cancel the orchestration service - orchestration = get_active_orchestration(progress_id) + orchestration = await get_active_orchestration(progress_id) if orchestration: orchestration.cancel() found = True @@ -1306,7 +1306,7 @@ async def stop_crawl_task(progress_id: str): found = True # Step 3: Remove from active orchestrations registry - unregister_orchestration(progress_id) + await unregister_orchestration(progress_id) # Step 4: Update progress tracker to reflect cancellation (only if we found and cancelled something) if found: diff --git a/python/src/server/services/crawling/code_extraction_service.py b/python/src/server/services/crawling/code_extraction_service.py index d3fa2f07f8..b1705b029e 100644 --- a/python/src/server/services/crawling/code_extraction_service.py +++ b/python/src/server/services/crawling/code_extraction_service.py @@ -161,9 +161,16 @@ async def extract_and_store_code_examples( extraction_callback = None if progress_callback: async def extraction_progress(data: dict): - # Scale progress to 0-20% range - raw_progress = data.get("progress", data.get("percentage", 0)) - scaled_progress = int(raw_progress * 0.2) # 0-20% + # Scale progress to 0-20% range with normalization similar to later phases + raw = data.get("progress", data.get("percentage", 0)) + try: + raw_num = float(raw) + except (TypeError, ValueError): + raw_num = 0.0 + if 0.0 <= raw_num <= 1.0: + raw_num *= 100.0 + # 0-20% with clamping + scaled_progress = min(20, max(0, int(raw_num * 0.2))) data["progress"] = scaled_progress await progress_callback(data) extraction_callback = extraction_progress @@ -901,9 +908,20 @@ async def _extract_text_file_code_blocks( current_indent = indent block_start_idx = i current_block.append(line) - elif current_block and len("\n".join(current_block)) >= min_length: + elif current_block: + block_text = "\n".join(current_block) + threshold = ( + min_length + if min_length is not None + else await self._get_min_code_length() + ) + if len(block_text) < threshold: + current_block = [] + current_indent = None + continue + # End of indented block, check if it's code - code_content = "\n".join(current_block) + code_content = block_text # Try to detect language from content language = self._detect_language_from_content(code_content) diff --git a/python/src/server/services/crawling/crawling_service.py b/python/src/server/services/crawling/crawling_service.py index acafc0a50e..e02f43388d 100644 --- a/python/src/server/services/crawling/crawling_service.py +++ b/python/src/server/services/crawling/crawling_service.py @@ -33,22 +33,25 @@ # Global registry to track active orchestration services for cancellation support _active_orchestrations: dict[str, "CrawlingService"] = {} +_orchestration_lock = asyncio.Lock() -def get_active_orchestration(progress_id: str) -> Optional["CrawlingService"]: +async def get_active_orchestration(progress_id: str) -> Optional["CrawlingService"]: """Get an active orchestration service by progress ID.""" - return _active_orchestrations.get(progress_id) + async with _orchestration_lock: + return _active_orchestrations.get(progress_id) -def register_orchestration(progress_id: str, orchestration: "CrawlingService"): +async def register_orchestration(progress_id: str, orchestration: "CrawlingService"): """Register an active orchestration service.""" - _active_orchestrations[progress_id] = orchestration + async with _orchestration_lock: + _active_orchestrations[progress_id] = orchestration -def unregister_orchestration(progress_id: str): +async def unregister_orchestration(progress_id: str): """Unregister an orchestration service.""" - if progress_id in _active_orchestrations: - del _active_orchestrations[progress_id] + async with _orchestration_lock: + _active_orchestrations.pop(progress_id, None) class CrawlingService: @@ -247,7 +250,7 @@ async def orchestrate_crawl(self, request: dict[str, Any]) -> dict[str, Any]: # Register this orchestration service for cancellation support if self.progress_id: - register_orchestration(self.progress_id, self) + await register_orchestration(self.progress_id, self) # Start the crawl as an async task in the main event loop # Store the task reference for proper cancellation @@ -562,7 +565,7 @@ async def code_progress_callback(data: dict): # Unregister after successful completion if self.progress_id: - unregister_orchestration(self.progress_id) + await unregister_orchestration(self.progress_id) safe_logfire_info( f"Unregistered orchestration service after completion | progress_id={self.progress_id}" ) @@ -581,7 +584,7 @@ async def code_progress_callback(data: dict): ) # Unregister on cancellation if self.progress_id: - unregister_orchestration(self.progress_id) + await unregister_orchestration(self.progress_id) safe_logfire_info( f"Unregistered orchestration service on cancellation | progress_id={self.progress_id}" ) @@ -605,7 +608,7 @@ async def code_progress_callback(data: dict): await self.progress_tracker.error(error_message) # Unregister on error if self.progress_id: - unregister_orchestration(self.progress_id) + await unregister_orchestration(self.progress_id) safe_logfire_info( f"Unregistered orchestration service on error | progress_id={self.progress_id}" ) diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py index 1f1837d865..1a71cfdcef 100644 --- a/python/src/server/services/embeddings/embedding_service.py +++ b/python/src/server/services/embeddings/embedding_service.py @@ -424,6 +424,17 @@ async def rate_limit_callback(data: dict): await asyncio.sleep(wait_time) else: raise # Will be caught by outer try + except EmbeddingRateLimitError as e: + retry_count += 1 + if retry_count < max_retries: + wait_time = 2**retry_count + search_logger.warning( + f"Embedding rate limit for batch {batch_index}: {e}. " + f"Waiting {wait_time}s before retry {retry_count}/{max_retries}" + ) + await asyncio.sleep(wait_time) + else: + raise except Exception as e: # This batch failed - track failures but continue with next batch From ec4ceee1044c99dea57a83dec31ffbfc92cdc87f Mon Sep 17 00:00:00 2001 From: Josh Date: Thu, 25 Sep 2025 11:33:11 -0500 Subject: [PATCH 22/28] Update RAGSettings.tsx - header for 'LLM Settings' is now 'LLM Provider Settings' --- archon-ui-main/src/components/settings/RAGSettings.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 618c1ccfa2..73f6fd057c 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -1199,10 +1199,10 @@ const manualTestConnection = async ( knowledge retrieval.

- {/* LLM Settings Header */} + {/* LLM Provider Settings Header */}

- LLM Settings + LLM Provider Settings

From 7b8c834410c644d0a11fb27f565cb1450e85c82d Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 12:02:26 -0500 Subject: [PATCH 23/28] (RAG Settings) - Ollama Health Checks & Metrics - Added a 10-second timeout to the health fetch so it doesn't hang. - Adjusted logic so metric refreshes run for embedding-only Ollama setups too. - Initial page load now checks Ollama if either chat or embedding provider uses it. - Metrics and alerts now respect which provider (chat/embedding) is currently selected. - Provider Sync & Alerts - Fixed a sync bug so the very first provider change updates settings as expected. - Alerts now track the active provider (chat vs embedding) rather than only the LLM provider. - Warnings about missing credentials now skip whichever provider is currently selected. - Modals & Types - Normalize URLs before handing them to selection modals to keep consistent data. - Strengthened helper function types (getDisplayedChatModel, getModelPlaceholder, etc.). (Crawling Service) - Made the orchestration registry lock lazy-initialized to avoid issues in Python 3.12 and wrapped registry commands (register, unregister) in async calls. This keeps things thread-safe even during concurrent crawling and cancellation. --- .../src/components/settings/RAGSettings.tsx | 52 ++++++++++--------- .../services/crawling/crawling_service.py | 18 +++++-- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 618c1ccfa2..9998e4fd8f 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -376,7 +376,7 @@ export const RAGSettings = ({ try { const response = await fetch( `/api/ollama/instances/health?instance_urls=${encodeURIComponent(normalizedUrl)}`, - { method: 'GET', headers: { Accept: 'application/json' } } + { method: 'GET', headers: { Accept: 'application/json' }, signal: AbortSignal.timeout(10000) } ); if (cancelled) return; @@ -421,8 +421,8 @@ export const RAGSettings = ({ // Update ragSettings when independent providers change (one-way: local state -> ragSettings) // Split the β€œfirst‐run” guard into two refs so chat and embedding effects don’t interfere. - const updateChatRagSettingsRef = useRef(false); - const updateEmbeddingRagSettingsRef = useRef(false); + const updateChatRagSettingsRef = useRef(true); + const updateEmbeddingRagSettingsRef = useRef(true); useEffect(() => { // Only update if this is a user‐initiated change, not a sync from ragSettings @@ -654,7 +654,7 @@ const manualTestConnection = async ( } // Scenario 2: Manual "Test Connection" button - refresh Ollama metrics if Ollama provider is selected - if (ragSettings.LLM_PROVIDER === 'ollama') { + if (ragSettings.LLM_PROVIDER === 'ollama' || embeddingProvider === 'ollama' || context === 'embedding') { console.log('πŸ”„ Fetching Ollama metrics - Test Connection button clicked'); fetchOllamaMetrics(); } @@ -963,15 +963,16 @@ const manualTestConnection = async ( } }; - const selectedProviderKey = isProviderKey(ragSettings.LLM_PROVIDER) - ? (ragSettings.LLM_PROVIDER as ProviderKey) + const resolvedProviderForAlert = activeSelection === 'chat' ? chatProvider : embeddingProvider; + const activeProviderKey = isProviderKey(resolvedProviderForAlert) + ? (resolvedProviderForAlert as ProviderKey) : undefined; - const selectedProviderStatus = selectedProviderKey ? getProviderStatus(selectedProviderKey) : undefined; + const selectedProviderStatus = activeProviderKey ? getProviderStatus(activeProviderKey) : undefined; let providerAlertMessage: string | null = null; let providerAlertClassName = ''; - if (selectedProviderKey === 'ollama') { + if (activeProviderKey === 'ollama') { if (ollamaServerStatus === 'offline') { providerAlertMessage = 'Local Ollama service is not running. Start the Ollama server and ensure it is reachable at the configured URL.'; providerAlertClassName = providerErrorAlertStyle; @@ -979,9 +980,9 @@ const manualTestConnection = async ( providerAlertMessage = 'Local Ollama service detected. Click "Test Connection" to confirm model availability.'; providerAlertClassName = providerWarningAlertStyle; } - } else if (selectedProviderKey && selectedProviderStatus === 'missing') { - providerAlertMessage = defaultProviderAlertMessages[selectedProviderKey] ?? null; - providerAlertClassName = providerAlertStyles[selectedProviderKey] ?? ''; + } else if (activeProviderKey && selectedProviderStatus === 'missing') { + providerAlertMessage = defaultProviderAlertMessages[activeProviderKey] ?? null; + providerAlertClassName = providerAlertStyles[activeProviderKey] ?? ''; } const shouldShowProviderAlert = Boolean(providerAlertMessage); @@ -1116,9 +1117,11 @@ const manualTestConnection = async ( }); // Only run once when data is properly loaded and not run before - if (!hasRunInitialTestRef.current && - ragSettings.LLM_PROVIDER === 'ollama' && - Object.keys(ragSettings).length > 0) { + if ( + !hasRunInitialTestRef.current && + (ragSettings.LLM_PROVIDER === 'ollama' || embeddingProvider === 'ollama') && + Object.keys(ragSettings).length > 0 + ) { hasRunInitialTestRef.current = true; console.log('πŸ”„ Settings page loaded with Ollama - Testing connectivity'); @@ -1199,10 +1202,10 @@ const manualTestConnection = async ( knowledge retrieval.

- {/* LLM Settings Header */} + {/* LLM Provider Settings Header */}

- LLM Settings + LLM Provider Settings

@@ -1347,14 +1350,15 @@ const manualTestConnection = async ( const chatStatus = getProviderStatus(chatProvider); const embeddingStatus = getProviderStatus(embeddingProvider); const missingProviders = []; + const providerToIgnore = activeProviderKey; - if (chatStatus === 'missing' && chatProvider !== selectedProviderKey) { + if (chatStatus === 'missing' && (!providerToIgnore || chatProvider !== providerToIgnore)) { missingProviders.push({ name: chatProvider, type: 'Chat', color: 'green' }); } if ( embeddingStatus === 'missing' && embeddingProvider !== chatProvider && - embeddingProvider !== selectedProviderKey + (!providerToIgnore || embeddingProvider !== providerToIgnore) ) { missingProviders.push({ name: embeddingProvider, type: 'Embedding', color: 'purple' }); } @@ -2306,7 +2310,7 @@ const manualTestConnection = async ( ]} currentModel={ragSettings.MODEL_CHOICE} modelType="chat" - selectedInstanceUrl={llmInstanceConfig.url.replace('/v1', '')} + selectedInstanceUrl={normalizeBaseUrl(llmInstanceConfig.url) ?? ''} onSelectModel={(modelName: string) => { setRagSettings({ ...ragSettings, MODEL_CHOICE: modelName }); showToast(`Selected LLM model: ${modelName}`, 'success'); @@ -2325,7 +2329,7 @@ const manualTestConnection = async ( ]} currentModel={ragSettings.EMBEDDING_MODEL} modelType="embedding" - selectedInstanceUrl={embeddingInstanceConfig.url.replace('/v1', '')} + selectedInstanceUrl={normalizeBaseUrl(embeddingInstanceConfig.url) ?? ''} onSelectModel={(modelName: string) => { setRagSettings({ ...ragSettings, EMBEDDING_MODEL: modelName }); showToast(`Selected embedding model: ${modelName}`, 'success'); @@ -2359,7 +2363,7 @@ const manualTestConnection = async ( }; // Helper functions to get provider-specific model display -function getDisplayedChatModel(ragSettings: any): string { +function getDisplayedChatModel(ragSettings: RAGSettingsProps["ragSettings"]): string { const provider = ragSettings.LLM_PROVIDER || 'openai'; const modelChoice = ragSettings.MODEL_CHOICE; @@ -2387,7 +2391,7 @@ function getDisplayedChatModel(ragSettings: any): string { } } -function getDisplayedEmbeddingModel(ragSettings: any): string { +function getDisplayedEmbeddingModel(ragSettings: RAGSettingsProps["ragSettings"]): string { const provider = ragSettings.EMBEDDING_PROVIDER || ragSettings.LLM_PROVIDER || 'openai'; const embeddingModel = ragSettings.EMBEDDING_MODEL; @@ -2416,7 +2420,7 @@ function getDisplayedEmbeddingModel(ragSettings: any): string { } // Helper functions for model placeholders -function getModelPlaceholder(provider: string): string { +function getModelPlaceholder(provider: ProviderKey): string { switch (provider) { case 'openai': return 'e.g., gpt-4o-mini'; @@ -2435,7 +2439,7 @@ function getModelPlaceholder(provider: string): string { } } -function getEmbeddingPlaceholder(provider: string): string { +function getEmbeddingPlaceholder(provider: ProviderKey): string { switch (provider) { case 'openai': return 'Default: text-embedding-3-small'; diff --git a/python/src/server/services/crawling/crawling_service.py b/python/src/server/services/crawling/crawling_service.py index e02f43388d..82a98c0c83 100644 --- a/python/src/server/services/crawling/crawling_service.py +++ b/python/src/server/services/crawling/crawling_service.py @@ -33,24 +33,34 @@ # Global registry to track active orchestration services for cancellation support _active_orchestrations: dict[str, "CrawlingService"] = {} -_orchestration_lock = asyncio.Lock() +_orchestration_lock: asyncio.Lock | None = None + + +def _ensure_orchestration_lock() -> asyncio.Lock: + global _orchestration_lock + if _orchestration_lock is None: + _orchestration_lock = asyncio.Lock() + return _orchestration_lock async def get_active_orchestration(progress_id: str) -> Optional["CrawlingService"]: """Get an active orchestration service by progress ID.""" - async with _orchestration_lock: + lock = _ensure_orchestration_lock() + async with lock: return _active_orchestrations.get(progress_id) async def register_orchestration(progress_id: str, orchestration: "CrawlingService"): """Register an active orchestration service.""" - async with _orchestration_lock: + lock = _ensure_orchestration_lock() + async with lock: _active_orchestrations[progress_id] = orchestration async def unregister_orchestration(progress_id: str): """Unregister an orchestration service.""" - async with _orchestration_lock: + lock = _ensure_orchestration_lock() + async with lock: _active_orchestrations.pop(progress_id, None) From 21cf54c8204281f61b3e5f77f6999a2227ef649a Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 17:11:02 -0500 Subject: [PATCH 24/28] - migration/complete_setup.sql:101 seeds Google/OpenRouter/Anthropic/Grok API key rows so fresh databases expose every provider by default. - migration/0.1.0/009_add_provider_placeholders.sql:1 backfills the same rows for existing Supabase instances and records the migration. - archon-ui-main/src/components/settings/RAGSettings.tsx:121 introduces a shared credentialprovider map, reloadApiCredentials runs through all five providers, and the status poller includes the new keys. - archon-ui-main/src/components/settings/RAGSettings.tsx:353 subscribes to the archon:credentials-updated browser event so adding/removing a key immediately refetches credential status and pings the corresponding connectivity test. - archon-ui-main/src/components/settings/RAGSettings.tsx:926 now treats missing Anthropic/OpenRouter/Grok keys as missing, preventing stale connected badges when a key is removed. --- .../src/components/settings/RAGSettings.tsx | 177 +++++++++++------- .../0.1.0/009_add_provider_placeholders.sql | 18 ++ migration/complete_setup.sql | 5 +- 3 files changed, 131 insertions(+), 69 deletions(-) create mode 100644 migration/0.1.0/009_add_provider_placeholders.sql diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 9998e4fd8f..9e42f9ce28 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect, useRef } from 'react'; +import React, { useState, useEffect, useRef, useCallback } from 'react'; import { Settings, Check, Save, Loader, ChevronDown, ChevronUp, Zap, Database, Trash2, Cog } from 'lucide-react'; import { Card } from '../ui/Card'; import { Input } from '../ui/Input'; @@ -118,6 +118,24 @@ const isProviderKey = (value: unknown): value is ProviderKey => // Default base URL for Ollama instances when not explicitly configured const DEFAULT_OLLAMA_URL = 'http://host.docker.internal:11434/v1'; +const PROVIDER_CREDENTIAL_KEYS = [ + 'OPENAI_API_KEY', + 'GOOGLE_API_KEY', + 'ANTHROPIC_API_KEY', + 'OPENROUTER_API_KEY', + 'GROK_API_KEY', +] as const; + +type ProviderCredentialKey = typeof PROVIDER_CREDENTIAL_KEYS[number]; + +const CREDENTIAL_PROVIDER_MAP: Record = { + OPENAI_API_KEY: 'openai', + GOOGLE_API_KEY: 'google', + ANTHROPIC_API_KEY: 'anthropic', + OPENROUTER_API_KEY: 'openrouter', + GROK_API_KEY: 'grok', +}; + const normalizeBaseUrl = (url?: string | null): string | null => { if (!url) return null; const trimmed = url.trim(); @@ -284,73 +302,53 @@ export const RAGSettings = ({ } }, [ragSettings.EMBEDDING_MODEL, embeddingProvider]); - // Load API credentials for status checking - useEffect(() => { - const loadApiCredentials = async () => { - try { - // Get decrypted values for the API keys we need for status checking - const keyNames = ['OPENAI_API_KEY', 'GOOGLE_API_KEY', 'ANTHROPIC_API_KEY']; - const statusResults = await credentialsService.checkCredentialStatus(keyNames); - - const credentials: {[key: string]: boolean} = {}; - - for (const [key, result] of Object.entries(statusResults)) { - credentials[key] = !!result.has_value; - } - - console.log('πŸ”‘ Loaded API credentials for status checking:', Object.keys(credentials)); - setApiCredentials(credentials); - } catch (error) { - console.error('Failed to load API credentials for status checking:', error); - } - }; - - loadApiCredentials(); - }, []); - - // Reload API credentials when ragSettings change (e.g., after saving) - // Use a ref to track if we've loaded credentials to prevent infinite loops const hasLoadedCredentialsRef = useRef(false); - - // Manual reload function for external calls - const reloadApiCredentials = async () => { + + const reloadApiCredentials = useCallback(async () => { try { - // Get decrypted values for the API keys we need for status checking - const keyNames = ['OPENAI_API_KEY', 'GOOGLE_API_KEY', 'ANTHROPIC_API_KEY']; - const statusResults = await credentialsService.checkCredentialStatus(keyNames); - - const credentials: {[key: string]: boolean} = {}; + const statusResults = await credentialsService.checkCredentialStatus( + Array.from(PROVIDER_CREDENTIAL_KEYS), + ); + + const credentials: { [key: string]: boolean } = {}; - for (const [key, result] of Object.entries(statusResults)) { - credentials[key] = !!result.has_value; + for (const key of PROVIDER_CREDENTIAL_KEYS) { + const result = statusResults[key]; + credentials[key] = !!result?.has_value; } - - console.log('πŸ”„ Reloaded API credentials for status checking:', Object.keys(credentials)); + + console.log( + 'πŸ”‘ Updated API credential status snapshot:', + Object.keys(credentials), + ); setApiCredentials(credentials); hasLoadedCredentialsRef.current = true; } catch (error) { - console.error('Failed to reload API credentials:', error); + console.error('Failed to load API credentials for status checking:', error); } - }; - + }, []); + + useEffect(() => { + void reloadApiCredentials(); + }, [reloadApiCredentials]); + useEffect(() => { - // Only reload if we have ragSettings and haven't loaded yet, or if LLM_PROVIDER changed - if (Object.keys(ragSettings).length > 0 && (!hasLoadedCredentialsRef.current || ragSettings.LLM_PROVIDER)) { - reloadApiCredentials(); + if (!hasLoadedCredentialsRef.current) { + return; } - }, [ragSettings.LLM_PROVIDER]); // Only depend on LLM_PROVIDER changes - - // Reload credentials periodically to catch updates from other components (like onboarding) + + void reloadApiCredentials(); + }, [ragSettings.LLM_PROVIDER, reloadApiCredentials]); + useEffect(() => { - // Set up periodic reload every 30 seconds when component is active (reduced from 2s) const interval = setInterval(() => { if (Object.keys(ragSettings).length > 0) { - reloadApiCredentials(); + void reloadApiCredentials(); } - }, 30000); // Changed from 2000ms to 30000ms (30 seconds) + }, 30000); return () => clearInterval(interval); - }, [ragSettings.LLM_PROVIDER]); // Only restart interval if provider changes + }, [ragSettings.LLM_PROVIDER, reloadApiCredentials]); useEffect(() => { const needsDetection = chatProvider === 'ollama' || embeddingProvider === 'ollama'; @@ -476,7 +474,7 @@ export const RAGSettings = ({ }, []); // Test connection to external providers - const testProviderConnection = async (provider: string): Promise => { + const testProviderConnection = useCallback(async (provider: string): Promise => { setProviderConnectionStatus(prev => ({ ...prev, [provider]: { ...prev[provider], checking: true } @@ -503,7 +501,7 @@ export const RAGSettings = ({ })); return false; } - }; + }, []); // Test provider connections when API credentials change useEffect(() => { @@ -529,7 +527,39 @@ export const RAGSettings = ({ const interval = setInterval(testConnections, 60000); return () => clearInterval(interval); - }, [apiCredentials]); // Test when credentials change + }, [apiCredentials, testProviderConnection]); // Test when credentials change + + useEffect(() => { + const handleCredentialUpdate = (event: Event) => { + const detail = (event as CustomEvent<{ keys?: string[] }>).detail; + const updatedKeys = (detail?.keys ?? []).map(key => key.toUpperCase()); + + if (updatedKeys.length === 0) { + void reloadApiCredentials(); + return; + } + + const touchedProviderKeys = updatedKeys.filter(key => key in CREDENTIAL_PROVIDER_MAP); + if (touchedProviderKeys.length === 0) { + return; + } + + void reloadApiCredentials(); + + touchedProviderKeys.forEach(key => { + const provider = CREDENTIAL_PROVIDER_MAP[key as ProviderCredentialKey]; + if (provider) { + void testProviderConnection(provider); + } + }); + }; + + window.addEventListener('archon:credentials-updated', handleCredentialUpdate); + + return () => { + window.removeEventListener('archon:credentials-updated', handleCredentialUpdate); + }; + }, [reloadApiCredentials, testProviderConnection]); // Ref to track if initial test has been run (will be used after function definitions) const hasRunInitialTestRef = useRef(false); @@ -893,33 +923,41 @@ const manualTestConnection = async ( } }, [ragSettings.LLM_PROVIDER, embeddingProvider, llmStatus.online, embeddingStatus.online]); + const hasApiCredential = (credentialKey: ProviderCredentialKey): boolean => { + if (credentialKey in apiCredentials) { + return Boolean(apiCredentials[credentialKey]); + } + + const fallbackKey = Object.keys(apiCredentials).find( + key => key.toUpperCase() === credentialKey, + ); + + return fallbackKey ? Boolean(apiCredentials[fallbackKey]) : false; + }; + // Function to check if a provider is properly configured const getProviderStatus = (providerKey: string): 'configured' | 'missing' | 'partial' => { switch (providerKey) { case 'openai': - // Check if OpenAI API key is configured (case insensitive) - const openAIKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === 'OPENAI_API_KEY'); - const hasOpenAIKey = openAIKey ? !!apiCredentials[openAIKey] : false; - + const hasOpenAIKey = hasApiCredential('OPENAI_API_KEY'); + // Only show configured if we have both API key AND confirmed connection const openAIConnected = providerConnectionStatus['openai']?.connected || false; const isChecking = providerConnectionStatus['openai']?.checking || false; - + // Intentionally avoid logging API key material. - + if (!hasOpenAIKey) return 'missing'; if (isChecking) return 'partial'; return openAIConnected ? 'configured' : 'missing'; case 'google': - // Check if Google API key is configured (case insensitive) - const googleKey = Object.keys(apiCredentials).find(key => key.toUpperCase() === 'GOOGLE_API_KEY'); - const hasGoogleKey = googleKey ? !!apiCredentials[googleKey] : false; + const hasGoogleKey = hasApiCredential('GOOGLE_API_KEY'); // Only show configured if we have both API key AND confirmed connection const googleConnected = providerConnectionStatus['google']?.connected || false; const googleChecking = providerConnectionStatus['google']?.checking || false; - + if (!hasGoogleKey) return 'missing'; if (googleChecking) return 'partial'; return googleConnected ? 'configured' : 'missing'; @@ -941,21 +979,24 @@ const manualTestConnection = async ( return 'missing'; } case 'anthropic': - // Use server-side connection status + const hasAnthropicKey = hasApiCredential('ANTHROPIC_API_KEY'); const anthropicConnected = providerConnectionStatus['anthropic']?.connected || false; const anthropicChecking = providerConnectionStatus['anthropic']?.checking || false; + if (!hasAnthropicKey) return 'missing'; if (anthropicChecking) return 'partial'; return anthropicConnected ? 'configured' : 'missing'; case 'grok': - // Use server-side connection status + const hasGrokKey = hasApiCredential('GROK_API_KEY'); const grokConnected = providerConnectionStatus['grok']?.connected || false; const grokChecking = providerConnectionStatus['grok']?.checking || false; + if (!hasGrokKey) return 'missing'; if (grokChecking) return 'partial'; return grokConnected ? 'configured' : 'missing'; case 'openrouter': - // Use server-side connection status + const hasOpenRouterKey = hasApiCredential('OPENROUTER_API_KEY'); const openRouterConnected = providerConnectionStatus['openrouter']?.connected || false; const openRouterChecking = providerConnectionStatus['openrouter']?.checking || false; + if (!hasOpenRouterKey) return 'missing'; if (openRouterChecking) return 'partial'; return openRouterConnected ? 'configured' : 'missing'; default: diff --git a/migration/0.1.0/009_add_provider_placeholders.sql b/migration/0.1.0/009_add_provider_placeholders.sql new file mode 100644 index 0000000000..85d526e6c0 --- /dev/null +++ b/migration/0.1.0/009_add_provider_placeholders.sql @@ -0,0 +1,18 @@ +-- Migration: 009_add_provider_placeholders.sql +-- Description: Add placeholder API key rows for OpenRouter, Anthropic, and Grok +-- Version: 0.1.0 +-- Author: Archon Team +-- Date: 2025 + +-- Insert provider API key placeholders (idempotent) +INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description) +VALUES + ('OPENROUTER_API_KEY', NULL, true, 'api_keys', 'OpenRouter API key for hosted community models. Get from: https://openrouter.ai/keys'), + ('ANTHROPIC_API_KEY', NULL, true, 'api_keys', 'Anthropic API key for Claude models. Get from: https://console.anthropic.com/account/keys'), + ('GROK_API_KEY', NULL, true, 'api_keys', 'Grok API key for xAI models. Get from: https://console.x.ai/') +ON CONFLICT (key) DO NOTHING; + +-- Record migration application for tracking +INSERT INTO archon_migrations (version, migration_name) +VALUES ('0.1.0', '009_add_provider_placeholders') +ON CONFLICT (version, migration_name) DO NOTHING; diff --git a/migration/complete_setup.sql b/migration/complete_setup.sql index 1609060cf3..801b07b423 100644 --- a/migration/complete_setup.sql +++ b/migration/complete_setup.sql @@ -100,7 +100,10 @@ ON CONFLICT (key) DO NOTHING; -- Add provider API key placeholders INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description) VALUES -('GOOGLE_API_KEY', NULL, true, 'api_keys', 'Google API Key for Gemini models. Get from: https://aistudio.google.com/apikey') +('GOOGLE_API_KEY', NULL, true, 'api_keys', 'Google API key for Gemini models. Get from: https://aistudio.google.com/apikey'), +('OPENROUTER_API_KEY', NULL, true, 'api_keys', 'OpenRouter API key for hosted community models. Get from: https://openrouter.ai/keys'), +('ANTHROPIC_API_KEY', NULL, true, 'api_keys', 'Anthropic API key for Claude models. Get from: https://console.anthropic.com/account/keys'), +('GROK_API_KEY', NULL, true, 'api_keys', 'Grok API key for xAI models. Get from: https://console.x.ai/') ON CONFLICT (key) DO NOTHING; -- Code Extraction Settings Migration From b6906959afbf70897b033276c2ed902867a5e19e Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Thu, 25 Sep 2025 17:29:33 -0500 Subject: [PATCH 25/28] - archon-ui-main/src/components/settings/RAGSettings.tsx:90 adds a simple display-name map and reuses one red alert style. - archon-ui-main/src/components/settings/RAGSettings.tsx:1016 now shows exactly one red banner when the active provider - Removed the old duplicate Missing API Key Configuration block, so the panel no longer stacks two warnings. --- .../src/components/settings/RAGSettings.tsx | 71 ++++--------------- 1 file changed, 12 insertions(+), 59 deletions(-) diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 9e42f9ce28..62739fc77a 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -91,25 +91,17 @@ const colorStyles: Record = { grok: 'border-yellow-500 bg-yellow-500/10', }; -const providerAlertStyles: Record = { - openai: 'bg-green-50 dark:bg-green-900/20 border-green-200 dark:border-green-800 text-green-800 dark:text-green-300', - google: 'bg-blue-50 dark:bg-blue-900/20 border-blue-200 dark:border-blue-800 text-blue-800 dark:text-blue-300', - openrouter: 'bg-cyan-50 dark:bg-cyan-900/20 border-cyan-200 dark:border-cyan-800 text-cyan-800 dark:text-cyan-300', - ollama: 'bg-purple-50 dark:bg-purple-900/20 border-purple-200 dark:border-purple-800 text-purple-800 dark:text-purple-300', - anthropic: 'bg-orange-50 dark:bg-orange-900/20 border-orange-200 dark:border-orange-800 text-orange-800 dark:text-orange-300', - grok: 'bg-yellow-50 dark:bg-yellow-900/20 border-yellow-200 dark:border-yellow-800 text-yellow-800 dark:text-yellow-300', -}; - const providerWarningAlertStyle = 'bg-yellow-50 dark:bg-yellow-900/20 border-yellow-200 dark:border-yellow-800 text-yellow-800 dark:text-yellow-300'; const providerErrorAlertStyle = 'bg-red-50 dark:bg-red-900/20 border-red-200 dark:border-red-800 text-red-800 dark:text-red-300'; - -const defaultProviderAlertMessages: Record = { - openai: 'Configure your OpenAI API key in the credentials section to use GPT models.', - google: 'Configure your Google API key in the credentials section to use Gemini models.', - openrouter: 'Configure your OpenRouter API key in the credentials section to use models.', - ollama: 'Configure your Ollama instances in this panel to connect local models.', - anthropic: 'Configure your Anthropic API key in the credentials section to use Claude models.', - grok: 'Configure your Grok API key in the credentials section to use Grok models.', +const providerMissingAlertStyle = providerErrorAlertStyle; + +const providerDisplayNames: Record = { + openai: 'OpenAI', + google: 'Google', + openrouter: 'OpenRouter', + ollama: 'Ollama', + anthropic: 'Anthropic', + grok: 'Grok', }; const isProviderKey = (value: unknown): value is ProviderKey => @@ -1022,8 +1014,9 @@ const manualTestConnection = async ( providerAlertClassName = providerWarningAlertStyle; } } else if (activeProviderKey && selectedProviderStatus === 'missing') { - providerAlertMessage = defaultProviderAlertMessages[activeProviderKey] ?? null; - providerAlertClassName = providerAlertStyles[activeProviderKey] ?? ''; + const providerName = providerDisplayNames[activeProviderKey] ?? activeProviderKey; + providerAlertMessage = `${providerName} API key is not configured. Add it in Settings > API Keys.`; + providerAlertClassName = providerMissingAlertStyle; } const shouldShowProviderAlert = Boolean(providerAlertMessage); @@ -1385,46 +1378,6 @@ const manualTestConnection = async ( ))}
- - {/* API Key Validation Warnings */} - {(() => { - const chatStatus = getProviderStatus(chatProvider); - const embeddingStatus = getProviderStatus(embeddingProvider); - const missingProviders = []; - const providerToIgnore = activeProviderKey; - - if (chatStatus === 'missing' && (!providerToIgnore || chatProvider !== providerToIgnore)) { - missingProviders.push({ name: chatProvider, type: 'Chat', color: 'green' }); - } - if ( - embeddingStatus === 'missing' && - embeddingProvider !== chatProvider && - (!providerToIgnore || embeddingProvider !== providerToIgnore) - ) { - missingProviders.push({ name: embeddingProvider, type: 'Embedding', color: 'purple' }); - } - - if (missingProviders.length > 0) { - return ( -
-
- - - - - Missing API Key Configuration - -
-

- Please configure API keys for: {missingProviders.map(p => `${p.name} (${p.type})`).join(', ')} -

-
- ); - } - return null; - })()} - - {shouldShowProviderAlert && (

{providerAlertMessage}

From c07beeb052980ff518060b9ba87eb3a8c951a14b Mon Sep 17 00:00:00 2001 From: Josh Date: Fri, 26 Sep 2025 06:32:36 -0500 Subject: [PATCH 26/28] Update credentialsService.ts default model --- archon-ui-main/src/services/credentialsService.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archon-ui-main/src/services/credentialsService.ts b/archon-ui-main/src/services/credentialsService.ts index 8287be76f8..b2d2da52fa 100644 --- a/archon-ui-main/src/services/credentialsService.ts +++ b/archon-ui-main/src/services/credentialsService.ts @@ -195,7 +195,7 @@ class CredentialsService { USE_HYBRID_SEARCH: true, USE_AGENTIC_RAG: true, USE_RERANKING: true, - MODEL_CHOICE: "gpt-4o-mini", + MODEL_CHOICE: "gpt-4.1-nano", LLM_PROVIDER: "openai", LLM_BASE_URL: "", LLM_INSTANCE_NAME: "", From e438b7109dff10bb0a4671327d70d175297ac159 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Sat, 27 Sep 2025 20:30:41 -0500 Subject: [PATCH 27/28] updated the google embedding adapter for multi dimensional rag querying --- .../services/embeddings/embedding_service.py | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py index 1a71cfdcef..095ef27872 100644 --- a/python/src/server/services/embeddings/embedding_service.py +++ b/python/src/server/services/embeddings/embedding_service.py @@ -12,6 +12,7 @@ from typing import Any import httpx +import numpy as np import openai from ...config.logfire_config import safe_span, search_logger @@ -113,9 +114,6 @@ async def create_embeddings( dimensions: int | None = None, ) -> list[list[float]]: try: - if dimensions is not None: - _ = dimensions # Maintains adapter signature; Google controls dimensions server-side. - google_api_key = await credential_service.get_credential("GOOGLE_API_KEY") if not google_api_key: raise EmbeddingAPIError("Google API key not found") @@ -123,7 +121,7 @@ async def create_embeddings( async with httpx.AsyncClient(timeout=30.0) as http_client: embeddings = await asyncio.gather( *( - self._fetch_single_embedding(http_client, google_api_key, model, text) + self._fetch_single_embedding(http_client, google_api_key, model, text, dimensions) for text in texts ) ) @@ -152,6 +150,7 @@ async def _fetch_single_embedding( api_key: str, model: str, text: str, + dimensions: int | None = None, ) -> list[float]: if model.startswith("models/"): url_model = model[len("models/") :] @@ -169,6 +168,16 @@ async def _fetch_single_embedding( "content": {"parts": [{"text": text}]}, } + # Add output_dimensionality parameter if dimensions are specified + if dimensions is not None and dimensions > 0: + # Validate that the requested dimension is supported by Google + if dimensions not in [128, 256, 512, 768, 1024, 1536, 2048, 3072]: + search_logger.warning( + f"Requested dimension {dimensions} may not be supported by Google. " + f"Supported dimensions: 128, 256, 512, 768, 1024, 1536, 2048, 3072" + ) + payload["outputDimensionality"] = dimensions + response = await http_client.post(url, headers=headers, json=payload) response.raise_for_status() @@ -178,8 +187,28 @@ async def _fetch_single_embedding( if not isinstance(values, list): raise EmbeddingAPIError(f"Invalid embedding payload from Google: {result}") + # Normalize embeddings for dimensions < 3072 as per Google's documentation + if dimensions is not None and dimensions < 3072 and len(values) > 0: + values = self._normalize_embedding(values) + return values + def _normalize_embedding(self, embedding: list[float]) -> list[float]: + """Normalize embedding vector for dimensions < 3072.""" + try: + embedding_array = np.array(embedding, dtype=np.float32) + norm = np.linalg.norm(embedding_array) + if norm > 0: + normalized = embedding_array / norm + return normalized.tolist() + else: + search_logger.warning("Zero-norm embedding detected, returning unnormalized") + return embedding + except Exception as e: + search_logger.error(f"Failed to normalize embedding: {e}") + # Return original embedding if normalization fails + return embedding + def _get_embedding_adapter(provider: str, client: Any) -> EmbeddingProviderAdapter: provider_name = (provider or "").lower() From b5930ba52b46cc10db41cd90e104e545214a76c3 Mon Sep 17 00:00:00 2001 From: Chillbruhhh Date: Mon, 29 Sep 2025 23:00:42 -0500 Subject: [PATCH 28/28] thought this micro fix in the google embedding pushed with the embedding update the other day, it didnt. pushing now --- .../services/embeddings/embedding_service.py | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py index 095ef27872..87ce390b67 100644 --- a/python/src/server/services/embeddings/embedding_service.py +++ b/python/src/server/services/embeddings/embedding_service.py @@ -83,10 +83,10 @@ async def create_embeddings( class OpenAICompatibleEmbeddingAdapter(EmbeddingProviderAdapter): """Adapter for providers using the OpenAI embeddings API shape.""" - + def __init__(self, client: Any): self._client = client - + async def create_embeddings( self, texts: list[str], @@ -99,7 +99,7 @@ async def create_embeddings( } if dimensions is not None: request_args["dimensions"] = dimensions - + response = await self._client.embeddings.create(**request_args) return [item.embedding for item in response.data] @@ -168,15 +168,21 @@ async def _fetch_single_embedding( "content": {"parts": [{"text": text}]}, } - # Add output_dimensionality parameter if dimensions are specified + # Add output_dimensionality parameter if dimensions are specified and supported if dimensions is not None and dimensions > 0: - # Validate that the requested dimension is supported by Google - if dimensions not in [128, 256, 512, 768, 1024, 1536, 2048, 3072]: + model_name = payload_model.removeprefix("models/") + if model_name.startswith("textembedding-gecko"): + supported_dimensions = {128, 256, 512, 768} + else: + supported_dimensions = {128, 256, 512, 768, 1024, 1536, 2048, 3072} + + if dimensions in supported_dimensions: + payload["outputDimensionality"] = dimensions + else: search_logger.warning( - f"Requested dimension {dimensions} may not be supported by Google. " - f"Supported dimensions: 128, 256, 512, 768, 1024, 1536, 2048, 3072" + f"Requested dimension {dimensions} is not supported by Google model '{model_name}'. " + "Falling back to the provider default." ) - payload["outputDimensionality"] = dimensions response = await http_client.post(url, headers=headers, json=payload) response.raise_for_status() @@ -188,7 +194,8 @@ async def _fetch_single_embedding( raise EmbeddingAPIError(f"Invalid embedding payload from Google: {result}") # Normalize embeddings for dimensions < 3072 as per Google's documentation - if dimensions is not None and dimensions < 3072 and len(values) > 0: + actual_dimension = len(values) + if actual_dimension > 0 and actual_dimension < 3072: values = self._normalize_embedding(values) return values