diff --git a/archon-ui-main/src/components/ui/Alert.tsx b/archon-ui-main/src/components/ui/Alert.tsx new file mode 100644 index 0000000000..2d1a0c0603 --- /dev/null +++ b/archon-ui-main/src/components/ui/Alert.tsx @@ -0,0 +1,46 @@ +/** + * Basic Alert component for error display + * + * Provides consistent styling for alerts throughout the application. + */ + +import React from 'react'; + +interface AlertProps { + children: React.ReactNode; + variant?: 'default' | 'destructive'; + className?: string; +} + +export const Alert: React.FC = ({ + children, + variant = 'default', + className = '' +}) => { + const baseClasses = 'p-4 border rounded-lg'; + const variantClasses = variant === 'destructive' + ? 'border-red-300 bg-red-50 text-red-800' + : 'border-blue-300 bg-blue-50 text-blue-800'; + + return ( +
+ {children} +
+ ); +}; + +interface AlertDescriptionProps { + children: React.ReactNode; + className?: string; +} + +export const AlertDescription: React.FC = ({ + children, + className = '' +}) => { + return ( +
+ {children} +
+ ); +}; \ No newline at end of file diff --git a/archon-ui-main/src/components/ui/ErrorAlert.tsx b/archon-ui-main/src/components/ui/ErrorAlert.tsx new file mode 100644 index 0000000000..01808c3d2e --- /dev/null +++ b/archon-ui-main/src/components/ui/ErrorAlert.tsx @@ -0,0 +1,162 @@ +/** + * Enhanced error alert component for displaying OpenAI API errors + * + * Provides specialized UI feedback for different error types including + * quota exhaustion, rate limiting, and API errors. + * + * Related to GitHub issue #362 - improves user experience by showing + * clear, actionable error messages instead of generic failures. + */ + +import React from 'react'; +import { Alert, AlertDescription } from './Alert'; +import { EnhancedError, getDisplayErrorMessage, getErrorSeverity, getErrorAction } from '../../services/knowledgeBaseErrorHandler'; + +interface ErrorAlertProps { + error: EnhancedError | null; + onDismiss?: () => void; + className?: string; +} + +/** + * Validate error object structure and properties + */ +function isValidError(error: any): error is EnhancedError { + if (!error || typeof error !== 'object') return false; + + // Check that it has basic Error properties + if (typeof error.message !== 'string') return false; + + // If it claims to be an OpenAI error, validate the structure + if (error.isOpenAIError === true) { + if (!error.errorDetails || typeof error.errorDetails !== 'object') return false; + + const { error_type, message, error: errorField } = error.errorDetails; + if (typeof error_type !== 'string' || typeof message !== 'string') return false; + } + + // If statusCode is present, it should be a number + if (error.statusCode !== undefined && typeof error.statusCode !== 'number') return false; + + return true; +} + +export const ErrorAlert: React.FC = ({ error, onDismiss, className = '' }) => { + if (!error) return null; + + // Validate error object structure + if (!isValidError(error)) { + console.warn('Invalid error object passed to ErrorAlert:', error); + // Create a fallback error object + const fallbackError: EnhancedError = Object.assign(new Error('Invalid error object received'), { + errorDetails: { + error: 'validation_error', + message: 'An error occurred but the error details could not be parsed properly.', + error_type: 'api_error' as const + } + }); + error = fallbackError; + } + + const severity = getErrorSeverity(error); + const displayMessage = getDisplayErrorMessage(error); + const suggestedAction = getErrorAction(error); + + // Determine alert styling based on severity + const alertVariant = severity === 'error' ? 'destructive' : 'default'; + + // Special styling for OpenAI errors + const isOpenAIError = error.isOpenAIError; + + return ( + +
+
+ {/* Error icon based on type */} +
+ {isOpenAIError ? ( + ⚠️ + ) : ( + + )} + + {isOpenAIError ? 'OpenAI API Error' : 'Error'} + +
+ + {/* Main error message */} + + {displayMessage} + + + {/* Suggested action for OpenAI errors */} + {suggestedAction && ( +
+

+ 💡 Suggested action: +

+

+ {suggestedAction} +

+
+ )} + + {/* Token usage info for quota errors */} + {error.errorDetails?.tokens_used && ( +
+ Tokens used: {error.errorDetails.tokens_used.toLocaleString()} +
+ )} +
+ + {/* Dismiss button */} + {onDismiss && ( + + )} +
+
+ ); +}; + +/** + * Hook for handling knowledge base operation errors + * + * Usage example: + * ```tsx + * const { error, setError, clearError } = useErrorHandler(); + * + * const handleSearch = async () => { + * try { + * await knowledgeBaseService.searchKnowledgeBase(query); + * } catch (err) { + * setError(err as EnhancedError); + * } + * }; + * + * return ( + * <> + * + * + * + * ); + * ``` + */ +export function useErrorHandler() { + const [error, setError] = React.useState(null); + + const clearError = React.useCallback(() => { + setError(null); + }, []); + + return { + error, + setError, + clearError + }; +} \ No newline at end of file diff --git a/archon-ui-main/src/pages/KnowledgeBasePage.tsx b/archon-ui-main/src/pages/KnowledgeBasePage.tsx index 9b1c96d7a7..8806b0c6a8 100644 --- a/archon-ui-main/src/pages/KnowledgeBasePage.tsx +++ b/archon-ui-main/src/pages/KnowledgeBasePage.tsx @@ -17,6 +17,7 @@ import { GroupCreationModal } from '../components/knowledge-base/GroupCreationMo import { AddKnowledgeModal } from '../components/knowledge-base/AddKnowledgeModal'; import { CrawlingTab } from '../components/knowledge-base/CrawlingTab'; import { DocumentBrowser } from '../components/knowledge-base/DocumentBrowser'; +import { parseKnowledgeBaseError, getDisplayErrorMessage, getErrorAction } from '../services/knowledgeBaseErrorHandler'; interface GroupedKnowledgeItem { id: string; @@ -72,7 +73,11 @@ export const KnowledgeBasePage = () => { setTotalItems(response.total); } catch (error) { console.error('Failed to load knowledge items:', error); - showToast('Failed to load knowledge items', 'error'); + + // Parse the error using enhanced error handler + const enhancedError = parseKnowledgeBaseError(error); + const displayMessage = getDisplayErrorMessage(enhancedError); + showToast(displayMessage, 'error'); setKnowledgeItems([]); } finally { setLoading(false); @@ -409,7 +414,17 @@ export const KnowledgeBasePage = () => { } } catch (error) { console.error('Failed to refresh knowledge item:', error); - showToast('Failed to refresh knowledge item', 'error'); + + // Parse the error using enhanced error handler + const enhancedError = parseKnowledgeBaseError(error); + const displayMessage = getDisplayErrorMessage(enhancedError); + const suggestedAction = getErrorAction(enhancedError); + + const fullMessage = suggestedAction + ? `${displayMessage} ${suggestedAction}` + : displayMessage; + + showToast(fullMessage, 'error'); } }; @@ -801,7 +816,6 @@ export const KnowledgeBasePage = () => { }} /> )} - {/* Document Browser Modal */} {isDocumentBrowserOpen && documentBrowserSourceId && ( { )} ); -}; \ No newline at end of file +}; diff --git a/archon-ui-main/src/services/knowledgeBaseErrorHandler.ts b/archon-ui-main/src/services/knowledgeBaseErrorHandler.ts new file mode 100644 index 0000000000..ddbd8d0f02 --- /dev/null +++ b/archon-ui-main/src/services/knowledgeBaseErrorHandler.ts @@ -0,0 +1,327 @@ +/** + * Error handling utilities for knowledge base operations + * + * Provides specialized error handling for OpenAI API errors, + * rate limiting, and quota exhaustion scenarios. + * + * Related to GitHub issue #362 - improves user experience + * by displaying clear error messages when OpenAI API fails. + */ + +export interface OpenAIErrorDetails { + error: string; + message: string; + error_type: 'authentication_required' | 'authentication_failed' | 'quota_exhausted' | 'rate_limit' | 'api_error'; + tokens_used?: number; + retry_after?: number; +} + +// Valid OpenAI error types +const VALID_ERROR_TYPES = [ + 'authentication_required', + 'authentication_failed', + 'quota_exhausted', + 'rate_limit', + 'api_error' +] as const; + +/** + * Validate and normalize error type from API response + */ +function validateErrorType(errorType: any): OpenAIErrorDetails['error_type'] { + if (typeof errorType === 'string' && VALID_ERROR_TYPES.includes(errorType as any)) { + return errorType as OpenAIErrorDetails['error_type']; + } + return 'api_error'; // Default fallback +} + +export interface EnhancedError extends Error { + statusCode?: number; + errorDetails?: OpenAIErrorDetails; + isOpenAIError?: boolean; +} + +/** + * Create a fallback error for cases where input is invalid or unparseable + */ +function createFallbackError(reason: string): EnhancedError { + return Object.assign(new Error('Unknown error occurred'), { + errorDetails: { + error: 'unknown', + message: sanitizeMessage(`${reason}. Please try again or contact support if the problem persists.`), + error_type: 'api_error' as const + } + }) as EnhancedError; +} + +// Constants for validation +const MAX_OBJECT_KEYS = 100; +const MAX_RECURSION_DEPTH = 10; + +// Sanitization patterns for frontend error messages +const REDACTION_PATTERNS: ReadonlyArray<[RegExp, string]> = [ + [/\bsk-[A-Za-z0-9]{10,}\b/g, 'sk-REDACTED'], + [/\borg-[A-Za-z0-9]{6,}\b/g, 'org-REDACTED'], + [/\bproj_[A-Za-z0-9]{10,}\b/g, 'proj_REDACTED'], + [/\bBearer\s+[A-Za-z0-9._-]{10,}\b/gi, 'Bearer REDACTED'], + [/\bhttps?:\/\/[^\s]{1,200}/gi, '[REDACTED_URL]'], +]; + +function sanitizeMessage(msg: unknown): string { + if (typeof msg !== 'string' || msg.length === 0) return 'Unknown error occurred'; + let out = msg.slice(0, 10_000); // cap length + for (const [re, replacement] of REDACTION_PATTERNS) { + out = out.replace(re, replacement); + } + return out; +} + +/** + * Check if an object can be safely serialized (no circular references) + */ +function isSafeObject(obj: any, visited = new WeakSet(), depth = 0): boolean { + if (typeof obj !== 'object' || obj === null) return true; + if (depth > MAX_RECURSION_DEPTH) return false; + + // Quick size check to prevent expensive operations on large objects + if (Object.keys(obj).length > MAX_OBJECT_KEYS) return false; + + // Check for circular references + if (visited.has(obj)) return false; + visited.add(obj); + + // Check each property recursively + try { + for (const key in obj) { + if (Object.prototype.hasOwnProperty.call(obj, key)) { + if (!isSafeObject(obj[key], visited, depth + 1)) { + return false; + } + } + } + return true; + } catch { + // Error during traversal indicates unsafe object + return false; + } +} + +/** + * Parse and enhance API errors from knowledge base operations + */ +export function parseKnowledgeBaseError(error: any): EnhancedError { + if (process.env.NODE_ENV !== 'production') { + const safeMsg = sanitizeMessage((error && (error.message || error?.response?.data?.message)) ?? ''); + console.debug('parseKnowledgeBaseError', { status: error?.status ?? error?.response?.status, message: safeMsg }); + } + + // Enhanced input validation + if (!error) { + return createFallbackError('No error information provided'); + } + + if (typeof error === 'string') { + const sanitizedMessage = sanitizeMessage(error); + return Object.assign(new Error(sanitizedMessage), { + errorDetails: { + error: 'api_error', + message: sanitizedMessage, + error_type: 'api_error' as const + } + }) as EnhancedError; + } + + if (typeof error !== 'object' || error === null) { + return createFallbackError('Invalid error format'); + } + + // Check for empty objects or objects with no useful properties + if (error.constructor === Object && Object.keys(error).length === 0) { + return createFallbackError('Empty error object received'); + } + + // Check for circular references and object safety + if (!isSafeObject(error)) { + return createFallbackError('Error object contains circular references'); + } + + // Handle Error instances that might have been serialized/deserialized + if (error instanceof Error || (error.name && error.message && error.stack)) { + // This is likely an Error object, proceed with parsing + } else if (!error.message && !error.error && !error.detail && !error.status) { + // Object doesn't have any recognizable error properties + return createFallbackError('Unrecognized error object structure'); + } + + const baseMessage = + (typeof error?.message === 'string' && error.message) || + (typeof error?.response?.data?.message === 'string' && error.response.data.message) || + 'Unknown error'; + const enhancedError: EnhancedError = new Error(sanitizeMessage(baseMessage)); + + // Check if this is an HTTP response error with JSON details + if (error && typeof error === 'object') { + // Handle fetch Response errors + if (typeof error.status === 'number') enhancedError.statusCode = error.status; + if (typeof error.statusCode === 'number') enhancedError.statusCode = error.statusCode; + if (typeof error.response?.status === 'number') enhancedError.statusCode = error.response.status; + if (typeof error.response?.statusCode === 'number') enhancedError.statusCode = error.response.statusCode; + + // Parse error details from API response + if (error.error || error.detail || error.response?.data) { + const errorData = error.error || error.detail || error.response?.data?.error || error.response?.data; + + // Check if it's an OpenAI-specific error + if (typeof errorData === 'object' && errorData.error_type) { + enhancedError.isOpenAIError = true; + + // Validate and normalize the error details + const validatedErrorType = validateErrorType(errorData.error_type); + enhancedError.errorDetails = { + error: errorData.error || 'unknown', + message: errorData.message || 'Unknown error occurred', + error_type: validatedErrorType, + tokens_used: typeof errorData.tokens_used === 'number' ? errorData.tokens_used : undefined, + retry_after: typeof errorData.retry_after === 'number' ? errorData.retry_after : undefined + }; + + // Set a more descriptive message based on error type + switch (validatedErrorType) { + case 'authentication_required': + case 'authentication_failed': + enhancedError.message = '401 Unauthorized - Invalid OpenAI API key. Please verify your OpenAI API key in Settings before starting a crawl.'; + break; + case 'quota_exhausted': + enhancedError.message = 'OpenAI API quota exhausted. Please add credits to your OpenAI account or check your billing settings.'; + break; + case 'rate_limit': + enhancedError.message = 'OpenAI API rate limit exceeded. Please wait a moment and try again.'; + break; + case 'api_error': + enhancedError.message = sanitizeMessage( + `OpenAI API error: ${typeof errorData.message === 'string' ? errorData.message : 'Unknown error'}. Please check your API key configuration.` + ); + break; + default: + enhancedError.message = sanitizeMessage( + (typeof errorData.message === 'string' && errorData.message) || + (typeof errorData.error === 'string' && errorData.error) || + enhancedError.message + ); + } + } + } + } + + return enhancedError; +} + +/** + * Get user-friendly error message for display in UI + */ +export function getDisplayErrorMessage(error: EnhancedError): string { + if (error.isOpenAIError && error.errorDetails) { + switch (error.errorDetails.error_type) { + case 'authentication_required': + case 'authentication_failed': + return `401 Unauthorized - Invalid OpenAI API key. Please verify your OpenAI API key in Settings before starting a crawl.`; + + case 'quota_exhausted': + return `OpenAI API quota exhausted. Please add credits to your OpenAI account or check your billing settings.`; + + case 'rate_limit': + return `OpenAI API rate limit exceeded. Please wait a moment and try again.`; + + case 'api_error': + return sanitizeMessage(`OpenAI API error: ${error.errorDetails.message}. Please check your API key configuration.`); + + default: + return sanitizeMessage(error.errorDetails.message || error.message); + } + } + + // Handle HTTP status codes + if (error.statusCode) { + switch (error.statusCode) { + case 401: + return '401 Unauthorized - Invalid or missing OpenAI credentials. Verify your API key in Settings.'; + case 403: + return '403 Forbidden - Your OpenAI key/org/project lacks access to this operation.'; + case 429: + return 'API rate limit exceeded. Please wait a moment and try again.'; + case 502: + return 'API service unavailable. Please try again in a few minutes.'; + case 503: + return 'Service temporarily unavailable. Please try again later.'; + default: + return sanitizeMessage(error.message); + } + } + + return sanitizeMessage(error.message || 'An unexpected error occurred.'); +} + +/** + * Get error severity level for UI styling + */ +export function getErrorSeverity(error: EnhancedError): 'error' | 'warning' | 'info' { + if (error.isOpenAIError && error.errorDetails) { + switch (error.errorDetails.error_type) { + case 'quota_exhausted': + return 'error'; // Critical - user action required + case 'rate_limit': + return 'warning'; // Temporary - retry may work + case 'api_error': + return 'error'; // Likely configuration issue + default: + return 'error'; + } + } + + if (error.statusCode && error.statusCode >= 500) { + return 'error'; // Server errors + } + if (error.statusCode === 401 || error.statusCode === 403) return 'error'; + if (error.statusCode === 429) return 'warning'; + return 'warning'; // Default for other 4xx/unknown +} + +/** + * Get suggested action for the user based on error type + */ +export function getErrorAction(error: EnhancedError): string | null { + if (error.isOpenAIError && error.errorDetails) { + switch (error.errorDetails.error_type) { + case 'authentication_required': + case 'authentication_failed': + return 'Go to Settings and verify your OpenAI API key'; + case 'quota_exhausted': + return 'Check your OpenAI billing dashboard and add credits'; + case 'rate_limit': { + const retryAfter = error.errorDetails.retry_after ?? 30; + return `Wait ${retryAfter} seconds and try again`; + } + case 'api_error': + return 'Verify your OpenAI API key in Settings'; + default: + return null; + } + } + + // HTTP fallback when no OpenAI-specific details are present + if (error.statusCode) { + switch (error.statusCode) { + case 401: + return 'Go to Settings and verify your OpenAI API key'; + case 403: + return 'Check your OpenAI org/project permissions'; + case 429: { + const retryAfter = error.errorDetails?.retry_after ?? 30; + return `Wait ${retryAfter} seconds and try again`; + } + default: + return null; + } + } + return null; +} \ No newline at end of file diff --git a/archon-ui-main/src/services/knowledgeBaseService.ts b/archon-ui-main/src/services/knowledgeBaseService.ts index 10ab75274e..a0f3dd4c8b 100644 --- a/archon-ui-main/src/services/knowledgeBaseService.ts +++ b/archon-ui-main/src/services/knowledgeBaseService.ts @@ -72,6 +72,7 @@ export interface SearchOptions { // Use relative URL to go through Vite proxy import { API_BASE_URL } from '../config/api'; +import { parseKnowledgeBaseError } from './knowledgeBaseErrorHandler'; // const API_BASE_URL = '/api'; // Now imported from config // Helper function for API requests with timeout @@ -108,9 +109,24 @@ async function apiRequest( if (!response.ok) { console.error(`❌ [KnowledgeBase] Response not OK: ${response.status} ${response.statusText}`); - const error = await response.json(); + + // Use enhanced error handling for better user experience + let error: any; + try { + error = await response.json(); + } catch { + const text = await response.text(); + error = { status: response.status, error: text }; + } console.error(`❌ [KnowledgeBase] API error response:`, error); - throw new Error(error.error || `HTTP ${response.status}`); + + // Parse the error structure correctly for OpenAI errors + const enhancedError = parseKnowledgeBaseError({ + status: response.status, + error: error.detail || error.error || error, + detail: error.detail + }); + throw enhancedError; } const data = await response.json(); @@ -125,10 +141,16 @@ async function apiRequest( // Check if it's a timeout error if (error instanceof Error && error.name === 'AbortError') { - throw new Error('Request timed out after 10 seconds'); + throw parseKnowledgeBaseError(new Error('Request timed out after 10 seconds')); } - throw error; + // If it's already an enhanced error, re-throw it + if (error && typeof error === 'object' && 'isOpenAIError' in error) { + throw error; + } + + // Parse other errors through the error handler for consistency + throw parseKnowledgeBaseError(error); } } diff --git a/polling-analysis.md b/polling-analysis.md new file mode 100644 index 0000000000..c12b629285 --- /dev/null +++ b/polling-analysis.md @@ -0,0 +1,116 @@ +# Polling Mechanism Analysis & Recommendation + +## Issue Analysis: Slow UI Refresh Times + +### Problem Statement +The UI sometimes takes over 10 seconds to refresh and show changes, when users expect updates within 1-2 seconds. + +### Root Cause Analysis + +**VERIFIED FINDINGS:** + +#### Multiple Polling Intervals +- **Tasks**: 5-second base interval +- **Projects**: 20-second base interval +- **MCP Status**: 5-second interval +- **Backend Health**: 30-second interval + +#### Smart Polling Behavior +```typescript +// From useSmartPolling.ts lines 45-58 +const getSmartInterval = (): number | false => { + if (!isVisible) { + return false; // Page hidden - disable polling + } + if (!hasFocus) { + return 60000; // Window unfocused - poll every 60 seconds + } + return baseInterval; // Active state - use base interval +}; +``` + +#### Stale Time Configurations +- **Tasks**: 10-second stale time +- **Projects**: 15-second stale time +- **MCP Status**: 3-second stale time + +#### Current Task Configuration +```typescript +// From useTaskQueries.ts lines 14-27 +export function useProjectTasks(projectId: string | undefined, enabled = true) { + const { refetchInterval } = useSmartPolling(5000); // 5 second base interval + + return useQuery({ + refetchInterval, + staleTime: 10000, // Consider data stale after 10 seconds + // ... + }); +} +``` + +### Why Delays Occur + +1. **Focus-based throttling**: Window loses focus → polling slows from 5s to 60s +2. **Stale time conflicts**: 5s polling + 10s stale time = potential 15s delays +3. **Variable polling rates**: Different components update at different speeds +4. **ETag cache misses**: Rare scenarios require full refetch cycles + +--- + +## Recommendation: Reduce Task Polling to 2-3 Seconds + +### Why This Makes Sense + +#### ✅ Technical Support +- **ETag caching** provides 70-90% bandwidth savings via HTTP 304 responses +- **Smart polling** already optimizes (disables when hidden, slows when unfocused) +- **Local deployment** eliminates scaling concerns +- **Existing infrastructure** handles frequent requests efficiently + +#### ✅ User Experience Benefits +- Task management is highly interactive (status changes, creation, updates) +- Users expect immediate feedback for task operations +- Current 5s feels sluggish for interactive workflows +- MCP integration benefits from responsive task updates + +#### ✅ Minimal Technical Cost +- Most requests return HTTP 304 (cached) with existing ETag system +- Smart polling prevents battery drain on mobile/inactive tabs +- Single-user deployment avoids server load concerns + +### Recommended Implementation + +```typescript +// In useTaskQueries.ts +export function useProjectTasks(projectId: string | undefined, enabled = true) { + const { refetchInterval } = useSmartPolling(2000); // 2s instead of 5s + + return useQuery({ + queryKey: projectId ? taskKeys.all(projectId) : ["tasks-undefined"], + queryFn: async () => { + if (!projectId) throw new Error("No project ID"); + return taskService.getTasksByProject(projectId); + }, + enabled: !!projectId && enabled, + refetchInterval, + refetchOnWindowFocus: true, + staleTime: 5000, // 5s instead of 10s for consistency + }); +} +``` + +### Expected Impact +- **Perceived responsiveness**: Updates appear within 2-5 seconds instead of 5-15 seconds +- **Bandwidth**: Minimal increase due to ETag caching (most requests = 304 responses) +- **Battery**: No impact due to smart polling (stops when tab inactive) +- **User satisfaction**: Significantly improved for interactive task workflows + +--- + +## Files to Modify + +1. **`/src/features/projects/tasks/hooks/useTaskQueries.ts`** + - Line 15: Change `useSmartPolling(5000)` to `useSmartPolling(2000)` + - Line 26: Change `staleTime: 10000` to `staleTime: 5000` + +This change maintains the existing bandwidth-efficient architecture while providing much better user experience for the most interactive part of the application. \ No newline at end of file diff --git a/python/src/server/api_routes/knowledge_api.py b/python/src/server/api_routes/knowledge_api.py index a443b89b2a..7ac88f2c0b 100644 --- a/python/src/server/api_routes/knowledge_api.py +++ b/python/src/server/api_routes/knowledge_api.py @@ -26,6 +26,12 @@ from ..services.storage import DocumentStorageService from ..utils import get_supabase_client from ..utils.document_processing import extract_text_from_document +from ..services.embeddings.embedding_exceptions import ( + EmbeddingAuthenticationError, + EmbeddingQuotaExhaustedError, + EmbeddingRateLimitError, + EmbeddingAPIError, +) # Get logger for this module logger = get_logger(__name__) @@ -52,6 +58,85 @@ active_crawl_tasks: dict[str, asyncio.Task] = {} +def _sanitize_openai_error(error_message: str) -> str: + """Sanitize OpenAI API error messages to prevent information disclosure.""" + import re + + # Input validation + if not isinstance(error_message, str): + return "OpenAI API encountered an error. Please verify your API key and quota." + if not error_message.strip(): + return "OpenAI API encountered an error. Please verify your API key and quota." + + # Common patterns to sanitize + sanitized_patterns = { + r'https?://[^\s]+': '[REDACTED_URL]', # Remove URLs + r'sk-[a-zA-Z0-9]{48}': '[REDACTED_KEY]', # Remove API keys (OpenAI format) + r'"[^"]*auth[^"]*?"': '[REDACTED_AUTH]', # Remove auth details (non-greedy) + r'org-[a-zA-Z0-9]{24}': '[REDACTED_ORG]', # Remove OpenAI organization IDs + r'proj_[a-zA-Z0-9]{10,}': '[REDACTED_PROJ]', # Remove OpenAI project IDs (adjusted length) + r'req_[a-zA-Z0-9]{6,}': '[REDACTED_REQ]', # Remove OpenAI request IDs (adjusted length) + r'user-[a-zA-Z0-9]{10,}': '[REDACTED_USER]', # Remove OpenAI user IDs + r'sess_[a-zA-Z0-9]{10,}': '[REDACTED_SESS]', # Remove session IDs + r'Bearer\s+[^\s]{1,200}': 'Bearer [REDACTED_AUTH_TOKEN]', # Remove bearer tokens (bounded to prevent ReDoS) + } + + sanitized = error_message + for pattern, replacement in sanitized_patterns.items(): + sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE) + + # Check for sensitive words after pattern replacement, but exclude our redacted patterns + sensitive_words = ['internal', 'server', 'token'] + # Only check for 'endpoint' if it's not part of our redacted URL pattern + if 'endpoint' in sanitized.lower() and '[REDACTED_URL]' not in sanitized: + sensitive_words.append('endpoint') + + # Return generic message if still contains sensitive info + if any(word in sanitized.lower() for word in sensitive_words): + return "OpenAI API encountered an error. Please verify your API key and quota." + + return sanitized + + +async def _validate_openai_api_key() -> None: + """ + Validate OpenAI API key is present and working before starting operations. + + Raises: + HTTPException: 401 if API key is invalid/missing, 429 if quota exhausted + """ + try: + # Test the API key with a minimal embedding request + from ..services.embeddings.embedding_service import create_embedding + + # Try to create a test embedding with minimal content + await create_embedding(text="test") + + except EmbeddingAuthenticationError as e: + raise HTTPException( + status_code=401, + detail={ + "error": "Invalid OpenAI API key", + "message": "Please verify your OpenAI API key in Settings before starting a crawl.", + "error_type": "authentication_failed" + } + ) + except EmbeddingQuotaExhaustedError as e: + raise HTTPException( + status_code=429, + detail={ + "error": "OpenAI quota exhausted", + "message": "Your OpenAI API key has no remaining credits. Please add credits to your account.", + "error_type": "quota_exhausted" + } + ) + except Exception as e: + # For any other errors, allow the crawl to continue + # The error will be caught later during actual processing + logger.warning(f"API key validation failed with unexpected error: {e}") + pass + + # Request Models class KnowledgeItemRequest(BaseModel): url: str @@ -322,6 +407,9 @@ async def get_knowledge_item_code_examples(source_id: str): @router.post("/knowledge-items/{source_id}/refresh") async def refresh_knowledge_item(source_id: str): """Refresh a knowledge item by re-crawling its URL with the same metadata.""" + # CRITICAL: Validate OpenAI API key before starting refresh + await _validate_openai_api_key() + try: safe_logfire_info(f"Starting knowledge item refresh | source_id={source_id}") @@ -433,6 +521,9 @@ async def crawl_knowledge_item(request: KnowledgeItemRequest): if not request.url.startswith(("http://", "https://")): raise HTTPException(status_code=422, detail="URL must start with http:// or https://") + # CRITICAL: Validate OpenAI API key before starting crawl + await _validate_openai_api_key() + try: safe_logfire_info( f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}" @@ -586,6 +677,9 @@ async def upload_document( knowledge_type: str = Form("technical"), ): """Upload and process a document with progress tracking.""" + # CRITICAL: Validate OpenAI API key before starting upload + await _validate_openai_api_key() + try: # DETAILED LOGGING: Track knowledge_type parameter flow safe_logfire_info( @@ -802,10 +896,66 @@ async def perform_rag_query(request: RagQueryRequest): except HTTPException: raise except Exception as e: - safe_logfire_error( - f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}" - ) - raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"}) + # Handle specific OpenAI/embedding errors with detailed messages + if isinstance(e, EmbeddingAuthenticationError): + safe_logfire_error( + f"OpenAI authentication failed during RAG query | query={request.query[:50]} | source={request.source}" + ) + sanitized_message = _sanitize_openai_error(str(e)) + raise HTTPException( + status_code=401, + detail={ + "error": "OpenAI API authentication failed", + "message": "Invalid or expired OpenAI API key. Please check your API key in settings.", + "error_type": "authentication_failed", + "api_key_prefix": getattr(e, "api_key_prefix", None), + } + ) + elif isinstance(e, EmbeddingQuotaExhaustedError): + safe_logfire_error( + f"OpenAI quota exhausted during RAG query | query={request.query[:50]} | source={request.source}" + ) + raise HTTPException( + status_code=429, + detail={ + "error": "OpenAI API quota exhausted", + "message": "Your OpenAI API key has no remaining credits. Please add credits to your OpenAI account or check your billing settings.", + "error_type": "quota_exhausted", + "tokens_used": getattr(e, "tokens_used", None), + } + ) + elif isinstance(e, EmbeddingRateLimitError): + safe_logfire_error( + f"OpenAI rate limit hit during RAG query | query={request.query[:50]} | source={request.source}" + ) + raise HTTPException( + status_code=429, + detail={ + "error": "OpenAI API rate limit exceeded", + "message": "Too many requests to OpenAI API. Please wait a moment and try again.", + "error_type": "rate_limit", + "retry_after": 30, # Suggest 30 second wait + } + ) + elif isinstance(e, EmbeddingAPIError): + safe_logfire_error( + f"OpenAI API error during RAG query | error={str(e)} | query={request.query[:50]} | source={request.source}" + ) + sanitized_message = _sanitize_openai_error(str(e)) + raise HTTPException( + status_code=502, + detail={ + "error": "OpenAI API error", + "message": f"OpenAI API error: {sanitized_message}", + "error_type": "api_error", + } + ) + else: + # Generic error handling for other exceptions + safe_logfire_error( + f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}" + ) + raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"}) @router.post("/rag/code-examples") diff --git a/python/src/server/services/credential_service.py b/python/src/server/services/credential_service.py index 017c3b2af1..24580bbf1d 100644 --- a/python/src/server/services/credential_service.py +++ b/python/src/server/services/credential_service.py @@ -239,6 +239,17 @@ async def set_credential( self._rag_cache_timestamp = None logger.debug(f"Invalidated RAG settings cache due to update of {key}") + # Invalidate LLM provider cache when API keys are updated + # This fixes the issue where OpenAI API key changes weren't reflected immediately + if key in ["OPENAI_API_KEY", "GOOGLE_API_KEY"] or category == "rag_strategy": + try: + # Use late import to avoid circular dependency + from .llm_provider_service import invalidate_provider_cache + invalidate_provider_cache() + logger.debug(f"Invalidated LLM provider cache due to update of {key}") + except ImportError as import_err: + logger.warning(f"Could not invalidate LLM provider cache: {import_err}") + logger.info( f"Successfully {'encrypted and ' if is_encrypted else ''}stored credential: {key}" ) diff --git a/python/src/server/services/embeddings/embedding_exceptions.py b/python/src/server/services/embeddings/embedding_exceptions.py index 7a2ae6f985..21d775841f 100644 --- a/python/src/server/services/embeddings/embedding_exceptions.py +++ b/python/src/server/services/embeddings/embedding_exceptions.py @@ -72,6 +72,22 @@ def __init__(self, message: str, retry_count: int = 0, **kwargs): self.metadata["retry_count"] = retry_count +class EmbeddingAuthenticationError(EmbeddingError): + """ + Raised when API authentication fails (invalid API key, expired key, etc). + + This is a CRITICAL error that should stop the entire process + as continuing would be pointless without valid authentication. + """ + + def __init__(self, message: str, api_key_prefix: str | None = None, **kwargs): + super().__init__(message, **kwargs) + masked = f"{api_key_prefix[:4]}…" if api_key_prefix else None + self.api_key_prefix = masked + if masked: + self.metadata["api_key_prefix"] = masked + + class EmbeddingAsyncContextError(EmbeddingError): """ Raised when sync embedding function is called from async context. diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py index d697abf933..98723fa47d 100644 --- a/python/src/server/services/embeddings/embedding_service.py +++ b/python/src/server/services/embeddings/embedding_service.py @@ -17,6 +17,7 @@ from ..threading_service import get_threading_service from .embedding_exceptions import ( EmbeddingAPIError, + EmbeddingAuthenticationError, EmbeddingError, EmbeddingQuotaExhaustedError, EmbeddingRateLimitError, @@ -107,6 +108,9 @@ async def create_embedding(text: str, provider: str | None = None) -> list[float "No embeddings returned from batch creation", text_preview=text ) return result.embeddings[0] + except EmbeddingAuthenticationError: + # Let auth errors bubble so the HTTP layer can return 401 + raise except EmbeddingError: # Re-raise our custom exceptions raise @@ -233,6 +237,11 @@ async def rate_limit_callback(data: dict): break # Success, exit retry loop + except openai.AuthenticationError as e: + # Invalid API key - critical error, stop everything + search_logger.error("Authentication failed: Invalid API key", exc_info=True) + raise EmbeddingAuthenticationError("Invalid API key") from e + except openai.RateLimitError as e: error_message = str(e) if "insufficient_quota" in error_message: @@ -275,6 +284,9 @@ async def rate_limit_callback(data: dict): else: raise # Will be caught by outer try + except EmbeddingAuthenticationError: + # Auth errors must bubble up immediately for HTTP 401 + raise except Exception as e: # This batch failed - track failures but continue with next batch search_logger.error(f"Batch {batch_index} failed: {e}", exc_info=True) @@ -312,6 +324,9 @@ async def rate_limit_callback(data: dict): return result + except EmbeddingAuthenticationError: + # Auth errors must bubble up immediately for HTTP 401 + raise except Exception as e: # Catastrophic failure - return what we have span.set_attribute("catastrophic_failure", True) diff --git a/python/src/server/services/llm_provider_service.py b/python/src/server/services/llm_provider_service.py index d7c834f9f2..2ddbaeeeec 100644 --- a/python/src/server/services/llm_provider_service.py +++ b/python/src/server/services/llm_provider_service.py @@ -38,6 +38,23 @@ def _set_cached_settings(key: str, value: Any) -> None: _settings_cache[key] = (value, time.time()) +def invalidate_provider_cache() -> None: + """Invalidate all cached provider configurations.""" + global _settings_cache + _settings_cache.clear() + logger.debug("Provider configuration cache cleared") + + +def invalidate_specific_cache(key_pattern: str) -> None: + """Invalidate cache entries matching a specific pattern.""" + global _settings_cache + keys_to_remove = [key for key in _settings_cache.keys() if key_pattern in key] + for key in keys_to_remove: + del _settings_cache[key] + if keys_to_remove: + logger.debug(f"Invalidated cache entries: {keys_to_remove}") + + @asynccontextmanager async def get_llm_client(provider: str | None = None, use_embedding_provider: bool = False): """ diff --git a/python/src/server/services/search/rag_service.py b/python/src/server/services/search/rag_service.py index cdc89c237f..d55cc4cf31 100644 --- a/python/src/server/services/search/rag_service.py +++ b/python/src/server/services/search/rag_service.py @@ -15,9 +15,15 @@ import os from typing import Any +from ..embeddings.embedding_exceptions import EmbeddingAuthenticationError from ...config.logfire_config import get_logger, safe_span from ...utils import get_supabase_client from ..embeddings.embedding_service import create_embedding +from ..embeddings.embedding_exceptions import ( + EmbeddingAPIError, + EmbeddingQuotaExhaustedError, + EmbeddingRateLimitError, +) from .agentic_rag_strategy import AgenticRAGStrategy # Import all strategies @@ -104,6 +110,11 @@ async def search_documents( Returns: List of matching documents + + Raises: + EmbeddingQuotaExhaustedError: When OpenAI quota is exhausted + EmbeddingRateLimitError: When rate limited + EmbeddingAPIError: For other embedding API errors """ with safe_span( "rag_search_documents", @@ -112,12 +123,13 @@ async def search_documents( hybrid_enabled=use_hybrid_search, ) as span: try: - # Create embedding for the query - query_embedding = await create_embedding(query) + # Create embedding for the query using single-vector API + query_embedding = await create_embedding(text=query) if not query_embedding: logger.error("Failed to create embedding for query") - return [] + # Follow alpha "fail fast" principle - embedding failure should not return empty results + raise RuntimeError("Failed to create embedding for query - this indicates a configuration or API issue") if use_hybrid_search: # Use hybrid strategy @@ -140,10 +152,17 @@ async def search_documents( span.set_attribute("results_found", len(results)) return results + except EmbeddingAuthenticationError: + # Let auth failures bubble to API layer -> 401 + raise + except (EmbeddingQuotaExhaustedError, EmbeddingRateLimitError, EmbeddingAPIError): + # Re-raise embedding errors so they propagate to the API layer with specific error info + raise except Exception as e: logger.error(f"Document search failed: {e}") span.set_attribute("error", str(e)) - return [] + # Follow alpha "fail fast" principle - don't return empty results for legitimate failures + raise RuntimeError(f"Document search failed: {str(e)}") from e async def search_code_examples( self, @@ -202,7 +221,6 @@ async def perform_rag_query( # Check which strategies are enabled use_hybrid_search = self.get_bool_setting("USE_HYBRID_SEARCH", False) - use_reranking = self.get_bool_setting("USE_RERANKING", False) # Step 1 & 2: Get results (with hybrid search if enabled) results = await self.search_documents( @@ -230,9 +248,9 @@ async def perform_rag_query( logger.warning(f"Failed to format result {i}: {format_error}") continue - # Step 3: Apply reranking if we have a strategy or if enabled + # Step 3: Apply reranking if we have a strategy reranking_applied = False - if self.reranking_strategy and formatted_results: + if self.reranking_strategy is not None and formatted_results: try: formatted_results = await self.reranking_strategy.rerank_results( query, formatted_results, content_key="content" @@ -262,6 +280,9 @@ async def perform_rag_query( logger.info(f"RAG query completed - {len(formatted_results)} results found") return True, response_data + except EmbeddingAuthenticationError: + # Let 401 bubble to the API layer + raise except Exception as e: logger.error(f"RAG query failed: {e}") span.set_attribute("error", str(e)) @@ -311,7 +332,6 @@ async def search_code_examples_service( # Check which strategies are enabled use_hybrid_search = self.get_bool_setting("USE_HYBRID_SEARCH", False) - use_reranking = self.get_bool_setting("USE_RERANKING", False) # Prepare filter filter_metadata = {"source": source_id} if source_id and source_id.strip() else None @@ -334,11 +354,13 @@ async def search_code_examples_service( ) # Apply reranking if we have a strategy - if self.reranking_strategy and results: + reranking_applied = False + if self.reranking_strategy is not None and results: try: results = await self.reranking_strategy.rerank_results( query, results, content_key="content" ) + reranking_applied = True except Exception as e: logger.warning(f"Code reranking failed: {e}") @@ -362,17 +384,20 @@ async def search_code_examples_service( "query": query, "source_filter": source_id, "search_mode": "hybrid" if use_hybrid_search else "vector", - "reranking_applied": self.reranking_strategy is not None, + "reranking_applied": reranking_applied, "results": formatted_results, "count": len(formatted_results), } span.set_attribute("results_found", len(formatted_results)) span.set_attribute("hybrid_used", use_hybrid_search) - span.set_attribute("reranking_used", use_reranking) + span.set_attribute("reranking_used", reranking_applied) return True, response_data + except EmbeddingAuthenticationError: + # Let 401 bubble to the API layer + raise except Exception as e: logger.error(f"Code example search failed: {e}") span.set_attribute("error", str(e)) diff --git a/python/src/server/services/threading_service.py b/python/src/server/services/threading_service.py index cc768418b4..dac4cfb835 100644 --- a/python/src/server/services/threading_service.py +++ b/python/src/server/services/threading_service.py @@ -109,11 +109,7 @@ async def acquire(self, estimated_tokens: int = 8000, progress_callback: Callabl wait_time = self._calculate_wait_time(estimated_tokens) if wait_time > 0: logfire_logger.info( - f"Rate limiting: waiting {wait_time:.1f}s", - extra={ - "tokens": estimated_tokens, - "current_usage": self._get_current_usage(), - } + f"Rate limiting: waiting {wait_time:.1f}s (tokens: {estimated_tokens}, current usage: {self._get_current_usage()})" ) wait_time_to_sleep = wait_time else: diff --git a/python/tests/test_document_storage_metrics.py b/python/tests/test_document_storage_metrics.py index 66b3d3d4ef..bfb8843d0f 100644 --- a/python/tests/test_document_storage_metrics.py +++ b/python/tests/test_document_storage_metrics.py @@ -34,7 +34,13 @@ async def test_avg_chunks_calculation_with_empty_docs(self): with patch('src.server.services.crawling.document_storage_operations.safe_logfire_info') as mock_log: mock_log.side_effect = lambda msg: logged_messages.append(msg) - with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase'): + with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase') as mock_add_docs: + mock_add_docs.return_value = { + 'chunks_stored': 6, + 'embedding_failures': 0, + 'total_chunks': 6, + 'success': True + } # Test data with mix of empty and non-empty documents crawl_results = [ {"url": "https://example.com/page1", "markdown": "Content 1"}, @@ -83,7 +89,13 @@ async def test_avg_chunks_all_empty_docs(self): with patch('src.server.services.crawling.document_storage_operations.safe_logfire_info') as mock_log: mock_log.side_effect = lambda msg: logged_messages.append(msg) - with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase'): + with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase') as mock_add_docs: + mock_add_docs.return_value = { + 'chunks_stored': 0, + 'embedding_failures': 0, + 'total_chunks': 0, + 'success': True + } # All documents are empty crawl_results = [ {"url": "https://example.com/page1", "markdown": ""}, @@ -131,7 +143,13 @@ async def test_avg_chunks_single_doc(self): with patch('src.server.services.crawling.document_storage_operations.safe_logfire_info') as mock_log: mock_log.side_effect = lambda msg: logged_messages.append(msg) - with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase'): + with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase') as mock_add_docs: + mock_add_docs.return_value = { + 'chunks_stored': 10, + 'embedding_failures': 0, + 'total_chunks': 10, + 'success': True + } crawl_results = [ {"url": "https://example.com/page", "markdown": "Long content here..."}, ] @@ -175,7 +193,13 @@ def mock_chunk(text, chunk_size): doc_storage._create_source_records = AsyncMock() with patch('src.server.services.crawling.document_storage_operations.safe_logfire_info'): - with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase'): + with patch('src.server.services.crawling.document_storage_operations.add_documents_to_supabase') as mock_add_docs: + mock_add_docs.return_value = { + 'chunks_stored': 3, + 'embedding_failures': 0, + 'total_chunks': 3, + 'success': True + } # Mix of documents with various content states crawl_results = [ {"url": "https://example.com/1", "markdown": "Content"}, diff --git a/python/tests/test_openai_quota_error_handling.py b/python/tests/test_openai_quota_error_handling.py new file mode 100644 index 0000000000..66fe5a2bf0 --- /dev/null +++ b/python/tests/test_openai_quota_error_handling.py @@ -0,0 +1,458 @@ +""" +Test OpenAI quota error handling in RAG queries. + +This test verifies that when OpenAI API quota is exhausted, +the error is properly propagated through the service layer +and returned to API clients with detailed error information. + +Related to GitHub issue #362. +""" + +import pytest +from unittest.mock import Mock, AsyncMock, patch +from fastapi.testclient import TestClient +from fastapi import HTTPException + +from src.server.services.embeddings.embedding_exceptions import ( + EmbeddingAuthenticationError, + EmbeddingQuotaExhaustedError, + EmbeddingRateLimitError, + EmbeddingAPIError, +) +from src.server.services.search.rag_service import RAGService +from src.server.api_routes.knowledge_api import perform_rag_query, RagQueryRequest, _sanitize_openai_error + + +class TestOpenAIQuotaErrorHandling: + """Test suite for OpenAI quota error handling in RAG queries.""" + + @pytest.mark.asyncio + async def test_quota_exhausted_error_propagation(self): + """Test that quota exhausted errors propagate correctly through service layer.""" + + # Mock the create_embedding function to raise quota error + with patch("src.server.services.search.rag_service.create_embedding") as mock_create_embedding: + mock_create_embedding.side_effect = EmbeddingQuotaExhaustedError( + "OpenAI quota exhausted", tokens_used=1000 + ) + + # Create RAG service and test search_documents method + with patch("src.server.services.search.rag_service.get_supabase_client"): + rag_service = RAGService() + + # Should propagate the quota error, not return empty list + with pytest.raises(EmbeddingQuotaExhaustedError) as exc_info: + await rag_service.search_documents("test query") + + assert "quota exhausted" in str(exc_info.value).lower() + assert exc_info.value.tokens_used == 1000 + + @pytest.mark.asyncio + async def test_authentication_error_propagation(self): + """Test that authentication errors propagate correctly through service layer.""" + + # Mock the create_embedding function to raise authentication error + with patch("src.server.services.search.rag_service.create_embedding") as mock_create_embedding: + mock_create_embedding.side_effect = EmbeddingAuthenticationError( + "Invalid API key", api_key_prefix="sk-1234" + ) + + # Create RAG service and test search_documents method + with patch("src.server.services.search.rag_service.get_supabase_client"): + rag_service = RAGService() + + # Should propagate the authentication error + with pytest.raises(EmbeddingAuthenticationError) as exc_info: + await rag_service.search_documents("test query") + + assert "invalid api key" in str(exc_info.value).lower() + assert exc_info.value.api_key_prefix == "sk-1…" + + @pytest.mark.asyncio + async def test_rate_limit_error_propagation(self): + """Test that rate limit errors propagate correctly through service layer.""" + + # Mock the create_embedding function to raise rate limit error + with patch("src.server.services.search.rag_service.create_embedding") as mock_create_embedding: + mock_create_embedding.side_effect = EmbeddingRateLimitError( + "Rate limit exceeded" + ) + + # Create RAG service and test search_documents method + with patch("src.server.services.search.rag_service.get_supabase_client"): + rag_service = RAGService() + + # Should propagate the rate limit error, not return empty list + with pytest.raises(EmbeddingRateLimitError) as exc_info: + await rag_service.search_documents("test query") + + assert "rate limit" in str(exc_info.value).lower() + + @pytest.mark.asyncio + async def test_api_quota_error_in_rag_endpoint(self): + """Test that quota errors are properly handled in RAG API endpoint.""" + + request = RagQueryRequest(query="test query", match_count=5) + + # Mock RAGService to raise quota exhausted error + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock( + side_effect=EmbeddingQuotaExhaustedError( + "OpenAI quota exhausted", tokens_used=500 + ) + ) + mock_rag_service_class.return_value = mock_service + + # Should raise HTTPException with status 429 and detailed error info + with pytest.raises(HTTPException) as exc_info: + await perform_rag_query(request) + + # Verify error details + assert exc_info.value.status_code == 429 + assert "quota exhausted" in exc_info.value.detail["error"].lower() + assert "no remaining credits" in exc_info.value.detail["message"].lower() + assert exc_info.value.detail["error_type"] == "quota_exhausted" + assert exc_info.value.detail["tokens_used"] == 500 + + @pytest.mark.asyncio + async def test_api_rate_limit_error_in_rag_endpoint(self): + """Test that rate limit errors are properly handled in RAG API endpoint.""" + + request = RagQueryRequest(query="test query", match_count=5) + + # Mock RAGService to raise rate limit error + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock( + side_effect=EmbeddingRateLimitError("Rate limit exceeded") + ) + mock_rag_service_class.return_value = mock_service + + # Should raise HTTPException with status 429 and detailed error info + with pytest.raises(HTTPException) as exc_info: + await perform_rag_query(request) + + # Verify error details + assert exc_info.value.status_code == 429 + assert "rate limit" in exc_info.value.detail["error"].lower() + assert "too many requests" in exc_info.value.detail["message"].lower() + assert exc_info.value.detail["error_type"] == "rate_limit" + + @pytest.mark.asyncio + async def test_api_authentication_error_in_rag_endpoint(self): + """Test that authentication errors are properly handled in RAG API endpoint.""" + + request = RagQueryRequest(query="test query", match_count=5) + + # Mock RAGService to raise authentication error + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock( + side_effect=EmbeddingAuthenticationError( + "Invalid API key", api_key_prefix="sk-1234" + ) + ) + mock_rag_service_class.return_value = mock_service + + # Should raise HTTPException with status 401 and detailed error info + with pytest.raises(HTTPException) as exc_info: + await perform_rag_query(request) + + # Verify error details + assert exc_info.value.status_code == 401 + assert "authentication failed" in exc_info.value.detail["error"].lower() + assert "invalid or expired" in exc_info.value.detail["message"].lower() + assert exc_info.value.detail["error_type"] == "authentication_failed" + assert exc_info.value.detail["api_key_prefix"] == "sk-1…" + + @pytest.mark.asyncio + async def test_api_generic_error_in_rag_endpoint(self): + """Test that generic API errors are properly handled in RAG API endpoint.""" + + request = RagQueryRequest(query="test query", match_count=5) + + # Mock RAGService to raise generic API error + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock( + side_effect=EmbeddingAPIError("Invalid API key") + ) + mock_rag_service_class.return_value = mock_service + + # Should raise HTTPException with status 502 and detailed error info + with pytest.raises(HTTPException) as exc_info: + await perform_rag_query(request) + + # Verify error details + assert exc_info.value.status_code == 502 + assert "api error" in exc_info.value.detail["error"].lower() + assert "invalid api key" in exc_info.value.detail["message"].lower() + assert exc_info.value.detail["error_type"] == "api_error" + + @pytest.mark.asyncio + async def test_successful_rag_query_still_works(self): + """Test that successful RAG queries still work after error handling changes.""" + + request = RagQueryRequest(query="test query", match_count=5) + + # Mock RAGService to return successful result + mock_result = { + "results": [{"content": "test result"}], + "query": "test query", + "total_found": 1, + } + + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock(return_value=(True, mock_result)) + mock_rag_service_class.return_value = mock_service + + # Should return successful result + result = await perform_rag_query(request) + + # Verify successful response + assert result["success"] is True + assert result["results"] == [{"content": "test result"}] + assert result["total_found"] == 1 + + def test_sanitize_openai_error_removes_urls(self): + """Test that sanitization function removes URLs from error messages.""" + error_message = "Connection failed to https://api.openai.com/v1/embeddings with status 400" + sanitized = _sanitize_openai_error(error_message) + + assert "https://api.openai.com" not in sanitized + assert "[REDACTED_URL]" in sanitized + assert "Connection failed" in sanitized + + def test_sanitize_openai_error_removes_api_keys(self): + """Test that sanitization function removes API keys from error messages.""" + error_message = "Authentication failed with key sk-FAKE00000000000000000000000000000000000000000000" + sanitized = _sanitize_openai_error(error_message) + + assert "sk-FAKE00000000000000000000000000000000000000000000" not in sanitized + assert "[REDACTED_KEY]" in sanitized + assert "Authentication failed" in sanitized + + def test_sanitize_openai_error_removes_auth_info(self): + """Test that sanitization function removes auth details from error messages.""" + error_message = 'Failed to authenticate: "auth_bearer_xyz123"' + sanitized = _sanitize_openai_error(error_message) + + assert "auth_bearer_xyz123" not in sanitized + assert "[REDACTED_AUTH]" in sanitized + + def test_sanitize_openai_error_returns_generic_for_sensitive_words(self): + """Test that sanitization returns generic message for sensitive internal details.""" + error_message = "Internal server error on endpoint /v1/embeddings" + sanitized = _sanitize_openai_error(error_message) + + # Should return generic message due to 'internal' and 'endpoint' keywords + assert sanitized == "OpenAI API encountered an error. Please verify your API key and quota." + + def test_sanitize_openai_error_preserves_safe_messages(self): + """Test that sanitization preserves safe error messages.""" + error_message = "Model not found: text-embedding-ada-002" + sanitized = _sanitize_openai_error(error_message) + + # Should preserve the message since it contains no sensitive info + assert sanitized == error_message + + @pytest.mark.asyncio + async def test_api_error_sanitization_in_endpoint(self): + """Test that API errors are sanitized in the RAG endpoint response.""" + + request = RagQueryRequest(query="test query", match_count=5) + + # Mock RAGService to raise API error with sensitive information + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock( + side_effect=EmbeddingAPIError("Request failed to https://api.openai.com/v1/embeddings with key sk-FAKE00000000000000000000000000000000000000000000") + ) + mock_rag_service_class.return_value = mock_service + + # Should raise HTTPException with sanitized error message + with pytest.raises(HTTPException) as exc_info: + await perform_rag_query(request) + + # Verify error is sanitized + error_message = exc_info.value.detail["message"] + assert "sk-FAKE00000000000000000000000000000000000000000000" not in error_message + assert "https://api.openai.com" not in error_message + assert "[REDACTED_KEY]" in error_message + assert "[REDACTED_URL]" in error_message + + @pytest.mark.asyncio + async def test_fail_fast_pattern_embedding_failure(self): + """Test that embedding failures now fail fast instead of returning empty results.""" + + # Mock the create_embedding function to return None (failure) + with patch("src.server.services.search.rag_service.create_embedding") as mock_create_embedding: + mock_create_embedding.return_value = None + + # Create RAG service and test search_documents method + with patch("src.server.services.search.rag_service.get_supabase_client"): + rag_service = RAGService() + + # Should raise RuntimeError instead of returning empty list + with pytest.raises(RuntimeError) as exc_info: + await rag_service.search_documents("test query") + + assert "Failed to create embedding" in str(exc_info.value) + assert "configuration or API issue" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_fail_fast_pattern_search_failure(self): + """Test that search failures now fail fast instead of returning empty results.""" + + # Mock the create_embedding to succeed but vector search to fail + with patch("src.server.services.search.rag_service.create_embedding") as mock_create_embedding: + mock_create_embedding.return_value = [0.1] * 1536 # Mock embedding vector + + with patch("src.server.services.search.rag_service.get_supabase_client"): + rag_service = RAGService() + + # Mock the base strategy to raise an exception + with patch.object(rag_service.base_strategy, 'vector_search', side_effect=Exception("Database connection failed")): + + # Should raise RuntimeError instead of returning empty list + with pytest.raises(RuntimeError) as exc_info: + await rag_service.search_documents("test query") + + assert "Document search failed" in str(exc_info.value) + assert "Database connection failed" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_integration_error_flow_rag_to_api(self): + """Test complete error flow from RAG service through API endpoint.""" + + request = RagQueryRequest(query="test query", match_count=5) + + # Mock both RAGService and get_supabase_client to avoid real connections + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock( + side_effect=RuntimeError("Document search failed: Database connection failed") + ) + mock_rag_service_class.return_value = mock_service + + # Should raise HTTPException with generic error (not OpenAI specific) + with pytest.raises(HTTPException) as exc_info: + await perform_rag_query(request) + + # Verify error details + assert exc_info.value.status_code == 500 + assert "RAG query failed" in exc_info.value.detail["error"] + assert "Database connection failed" in exc_info.value.detail["error"] + + def test_sanitize_openai_error_removes_organization_ids(self): + """Test that sanitization function removes OpenAI organization IDs.""" + error_message = "Permission denied for org-FAKE00000000000000000000 with model access" + sanitized = _sanitize_openai_error(error_message) + + assert "org-FAKE00000000000000000000" not in sanitized + assert "[REDACTED_ORG]" in sanitized + assert "Permission denied" in sanitized + + def test_sanitize_openai_error_removes_project_ids(self): + """Test that sanitization function removes OpenAI project IDs.""" + error_message = "Project proj_abcdef1234567890xyz not found" + sanitized = _sanitize_openai_error(error_message) + + assert "proj_abcdef1234567890xyz" not in sanitized + assert "[REDACTED_PROJ]" in sanitized + assert "Project" in sanitized and "not found" in sanitized + + def test_sanitize_openai_error_removes_request_ids(self): + """Test that sanitization function removes OpenAI request IDs.""" + error_message = "Request req_1234567890abcdefghij failed with timeout" + sanitized = _sanitize_openai_error(error_message) + + assert "req_1234567890abcdefghij" not in sanitized + assert "[REDACTED_REQ]" in sanitized + assert "Request" in sanitized and "failed with timeout" in sanitized + + def test_sanitize_openai_error_removes_bearer_tokens(self): + """Test that sanitization function removes Bearer tokens.""" + error_message = "Authorization failed: Bearer sk-FAKE00000000000000000000000000000000000000000000 invalid" + sanitized = _sanitize_openai_error(error_message) + + # This message should be fully sanitized due to potential sensitive content + assert "sk-FAKE00000000000000000000000000000000000000000000" not in sanitized + assert sanitized == "OpenAI API encountered an error. Please verify your API key and quota." + + def test_sanitize_openai_error_handles_multiple_patterns(self): + """Test that sanitization handles multiple sensitive patterns in one message.""" + error_message = "Request req_abc123 to https://api.openai.com failed for org-FAKE00000000000000000000 with key sk-FAKE00000000000000000000000000000000000000000000" + sanitized = _sanitize_openai_error(error_message) + + # Verify all patterns are redacted + assert "req_abc123" not in sanitized + assert "https://api.openai.com" not in sanitized + assert "org-FAKE00000000000000000000" not in sanitized + assert "sk-FAKE00000000000000000000000000000000000000000000" not in sanitized + + # Verify redacted placeholders are present + assert "[REDACTED_REQ]" in sanitized + assert "[REDACTED_URL]" in sanitized + assert "[REDACTED_ORG]" in sanitized + assert "[REDACTED_KEY]" in sanitized + + @pytest.mark.asyncio + async def test_rate_limit_error_includes_retry_after(self): + """Test that rate limit errors now include retry_after information.""" + + request = RagQueryRequest(query="test query", match_count=5) + + with patch("src.server.api_routes.knowledge_api.RAGService") as mock_rag_service_class, \ + patch("src.server.api_routes.knowledge_api.get_supabase_client"): + + mock_service = Mock() + mock_service.perform_rag_query = AsyncMock( + side_effect=EmbeddingRateLimitError("Rate limit exceeded") + ) + mock_rag_service_class.return_value = mock_service + + # Should raise HTTPException with retry_after field + with pytest.raises(HTTPException) as exc_info: + await perform_rag_query(request) + + # Verify rate limit error with retry_after + assert exc_info.value.status_code == 429 + error_detail = exc_info.value.detail + assert error_detail["error_type"] == "rate_limit" + assert "retry_after" in error_detail + assert error_detail["retry_after"] == 30 + + def test_sanitize_openai_error_input_validation(self): + """Test that sanitization function handles invalid input gracefully.""" + # Test None input + result = _sanitize_openai_error(None) + assert result == "OpenAI API encountered an error. Please verify your API key and quota." + + # Test non-string input + result = _sanitize_openai_error(123) + assert result == "OpenAI API encountered an error. Please verify your API key and quota." + + # Test empty string + result = _sanitize_openai_error("") + assert result == "OpenAI API encountered an error. Please verify your API key and quota." + + # Test whitespace-only string + result = _sanitize_openai_error(" ") + assert result == "OpenAI API encountered an error. Please verify your API key and quota." \ No newline at end of file diff --git a/python/tests/test_rag_simple.py b/python/tests/test_rag_simple.py index e8322e29c0..95550e5edd 100644 --- a/python/tests/test_rag_simple.py +++ b/python/tests/test_rag_simple.py @@ -113,7 +113,7 @@ async def test_search_documents_with_embedding(self, rag_service): assert isinstance(results, list) assert len(results) == 1 - mock_embed.assert_called_once_with("test query") + mock_embed.assert_called_once_with(text="test query") mock_search.assert_called_once() @pytest.mark.asyncio diff --git a/python/tests/test_rag_strategies.py b/python/tests/test_rag_strategies.py index ff9dc90e9b..abdc3253cb 100644 --- a/python/tests/test_rag_strategies.py +++ b/python/tests/test_rag_strategies.py @@ -11,6 +11,25 @@ import pytest +# Global mock to prevent any real API calls during testing +@pytest.fixture(autouse=True) +def mock_embedding_service(): + """Auto-use fixture to mock embedding service globally""" + # Only mock create_embeddings_batch (batch function), not create_embedding + # This allows individual tests to mock create_embedding as needed + with patch("src.server.services.embeddings.embedding_service.create_embeddings_batch") as mock_batch: + # Create a mock result object with the expected attributes + mock_result = type('EmbeddingResult', (), { + 'embeddings': [[0.1] * 1536], + 'texts_processed': ["test"], + 'success_count': 1, + 'failure_count': 0, + 'has_failures': False, + 'failed_items': [] + })() + mock_batch.return_value = mock_result + yield + # Mock problematic imports at module level with patch.dict( os.environ, @@ -31,11 +50,9 @@ mock_client = MagicMock() mock_supabase.return_value = mock_client - # Mock embedding service to prevent API calls - with patch( - "src.server.services.embeddings.embedding_service.create_embedding" - ) as mock_embed: - mock_embed.return_value = [0.1] * 1536 + # Import the modules we need but don't mock create_embedding at module level + # This allows individual tests to properly mock create_embedding as needed + pass # Test RAGService core functionality @@ -352,18 +369,18 @@ async def test_full_rag_pipeline(self, rag_service): @pytest.mark.asyncio async def test_error_handling_in_rag_pipeline(self, rag_service): """Test error handling when strategies fail""" - with patch( - "src.server.services.embeddings.embedding_service.create_embedding" - ) as mock_embedding: - # Simulate embedding failure (returns None) - mock_embedding.return_value = None - + # Mock the search_documents method to raise an error directly + with patch.object( + rag_service, 'search_documents', + side_effect=RuntimeError("Failed to create embedding for query - this indicates a configuration or API issue") + ): + # Should now fail fast and return error result instead of empty results success, result = await rag_service.perform_rag_query(query="test query", match_count=5) - # Should handle gracefully by returning empty results - assert success is True - assert "results" in result - assert len(result["results"]) == 0 # Empty results due to embedding failure + # Should return failure result due to "fail fast" principle + assert success is False + assert "error" in result + assert "Failed to create embedding" in result["error"] or "configuration or API issue" in result["error"] @pytest.mark.asyncio async def test_empty_results_handling(self, rag_service):