diff --git a/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx b/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx index 55f2519d88..c4a9e267fd 100644 --- a/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx +++ b/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx @@ -595,7 +595,7 @@ const OllamaConfigurationPanel: React.FC = ({ value={tempUrls[instance.id] !== undefined ? tempUrls[instance.id] : instance.baseUrl} onChange={(e) => handleUrlChange(instance.id, e.target.value)} onBlur={() => handleUrlBlur(instance.id)} - placeholder="http://localhost:11434" + placeholder="http://host.docker.internal:11434" className={cn( "text-sm", tempUrls[instance.id] !== undefined && tempUrls[instance.id] !== instance.baseUrl @@ -686,7 +686,7 @@ const OllamaConfigurationPanel: React.FC = ({ /> setNewInstanceUrl(e.target.value)} /> diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx index 83766b6c3a..8cb721a73a 100644 --- a/archon-ui-main/src/components/settings/RAGSettings.tsx +++ b/archon-ui-main/src/components/settings/RAGSettings.tsx @@ -61,11 +61,11 @@ export const RAGSettings = ({ // Instance configurations const [llmInstanceConfig, setLLMInstanceConfig] = useState({ name: '', - url: ragSettings.LLM_BASE_URL || 'http://localhost:11434/v1' + url: ragSettings.LLM_BASE_URL || 'http://host.docker.internal:11434/v1' }); const [embeddingInstanceConfig, setEmbeddingInstanceConfig] = useState({ name: '', - url: ragSettings.OLLAMA_EMBEDDING_URL || 'http://localhost:11434/v1' + url: ragSettings.OLLAMA_EMBEDDING_URL || 'http://host.docker.internal:11434/v1' }); // Update instance configs when ragSettings change (after loading from database) @@ -932,7 +932,7 @@ export const RAGSettings = ({ className="text-green-400 border-green-400 mb-1" onClick={() => { // Quick setup: configure both instances with default values - const defaultUrl = 'http://localhost:11434/v1'; + const defaultUrl = 'http://host.docker.internal:11434/v1'; const defaultName = 'Default Ollama'; setLLMInstanceConfig({ name: defaultName, url: defaultUrl }); setEmbeddingInstanceConfig({ name: defaultName, url: defaultUrl }); @@ -1680,7 +1680,7 @@ export const RAGSettings = ({ }); } }} - placeholder="http://localhost:11434/v1" + placeholder="http://host.docker.internal:11434/v1" /> {/* Convenience checkbox for single host setup */} @@ -1753,7 +1753,7 @@ export const RAGSettings = ({ label="Instance URL" value={embeddingInstanceConfig.url} onChange={(e) => setEmbeddingInstanceConfig({...embeddingInstanceConfig, url: e.target.value})} - placeholder="http://localhost:11434/v1" + placeholder="http://host.docker.internal:11434/v1" /> diff --git a/docs/docs/rag.mdx b/docs/docs/rag.mdx index a77167c12e..82866e2a0f 100644 --- a/docs/docs/rag.mdx +++ b/docs/docs/rag.mdx @@ -320,7 +320,7 @@ EMBEDDING_MODEL=text-embedding-004 ### Ollama (Local/Private) ```bash LLM_PROVIDER=ollama -LLM_BASE_URL=http://localhost:11434/v1 +LLM_BASE_URL=http://host.docker.internal:11434/v1 MODEL_CHOICE=llama2 EMBEDDING_MODEL=nomic-embed-text # Pros: Privacy, no API costs diff --git a/migration/complete_setup.sql b/migration/complete_setup.sql index bd9ebd88ce..322e0b2f1f 100644 --- a/migration/complete_setup.sql +++ b/migration/complete_setup.sql @@ -94,7 +94,7 @@ INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, descr -- LLM Provider configuration settings INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES ('LLM_PROVIDER', 'openai', false, 'rag_strategy', 'LLM provider to use: openai, ollama, or google'), -('LLM_BASE_URL', NULL, false, 'rag_strategy', 'Custom base URL for LLM provider (mainly for Ollama, e.g., http://localhost:11434/v1)'), +('LLM_BASE_URL', NULL, false, 'rag_strategy', 'Custom base URL for LLM provider (mainly for Ollama, e.g., http://host.docker.internal:11434/v1)'), ('EMBEDDING_MODEL', 'text-embedding-3-small', false, 'rag_strategy', 'Embedding model for vector search and similarity matching (required for all embedding operations)') ON CONFLICT (key) DO NOTHING; diff --git a/python/src/server/services/credential_service.py b/python/src/server/services/credential_service.py index a57c1abbbd..e72ca8a4ff 100644 --- a/python/src/server/services/credential_service.py +++ b/python/src/server/services/credential_service.py @@ -475,7 +475,7 @@ async def _get_provider_api_key(self, provider: str) -> str | None: def _get_provider_base_url(self, provider: str, rag_settings: dict) -> str | None: """Get base URL for provider.""" if provider == "ollama": - return rag_settings.get("LLM_BASE_URL", "http://localhost:11434/v1") + return rag_settings.get("LLM_BASE_URL", "http://host.docker.internal:11434/v1") elif provider == "google": return "https://generativelanguage.googleapis.com/v1beta/openai/" return None # Use default for OpenAI diff --git a/python/src/server/services/llm_provider_service.py b/python/src/server/services/llm_provider_service.py index f04f0741ba..10655b229d 100644 --- a/python/src/server/services/llm_provider_service.py +++ b/python/src/server/services/llm_provider_service.py @@ -203,7 +203,7 @@ async def _get_optimal_ollama_instance(instance_type: str | None = None, return embedding_url if embedding_url.endswith('/v1') else f"{embedding_url}/v1" # Default to LLM base URL for chat operations - fallback_url = rag_settings.get("LLM_BASE_URL", "http://localhost:11434") + fallback_url = rag_settings.get("LLM_BASE_URL", "http://host.docker.internal:11434") return fallback_url if fallback_url.endswith('/v1') else f"{fallback_url}/v1" except Exception as e: @@ -211,11 +211,11 @@ async def _get_optimal_ollama_instance(instance_type: str | None = None, # Final fallback to localhost only if we can't get RAG settings try: rag_settings = await credential_service.get_credentials_by_category("rag_strategy") - fallback_url = rag_settings.get("LLM_BASE_URL", "http://localhost:11434") + fallback_url = rag_settings.get("LLM_BASE_URL", "http://host.docker.internal:11434") return fallback_url if fallback_url.endswith('/v1') else f"{fallback_url}/v1" except Exception as fallback_error: logger.error(f"Could not retrieve fallback configuration: {fallback_error}") - return "http://localhost:11434/v1" + return "http://host.docker.internal:11434/v1" async def get_embedding_model(provider: str | None = None) -> str: diff --git a/python/src/server/services/provider_discovery_service.py b/python/src/server/services/provider_discovery_service.py index e49341cf77..ccd811dd6f 100644 --- a/python/src/server/services/provider_discovery_service.py +++ b/python/src/server/services/provider_discovery_service.py @@ -23,7 +23,7 @@ _CACHE_TTL_SECONDS = 300 # 5 minutes # Default Ollama instance URL (configurable via environment/settings) -DEFAULT_OLLAMA_URL = "http://localhost:11434" +DEFAULT_OLLAMA_URL = "http://host.docker.internal:11434" # Model pattern detection for dynamic capabilities (no hardcoded model names) CHAT_MODEL_PATTERNS = ["llama", "qwen", "mistral", "codellama", "phi", "gemma", "vicuna", "orca"] diff --git a/python/tests/test_async_llm_provider_service.py b/python/tests/test_async_llm_provider_service.py index 6c0128972f..e52c224250 100644 --- a/python/tests/test_async_llm_provider_service.py +++ b/python/tests/test_async_llm_provider_service.py @@ -69,7 +69,7 @@ def ollama_provider_config(self): return { "provider": "ollama", "api_key": "ollama", - "base_url": "http://localhost:11434/v1", + "base_url": "http://host.docker.internal:11434/v1", "chat_model": "llama2", "embedding_model": "nomic-embed-text", } @@ -127,7 +127,7 @@ async def test_get_llm_client_ollama_success( async with get_llm_client() as client: assert client == mock_client mock_openai.assert_called_once_with( - api_key="ollama", base_url="http://localhost:11434/v1" + api_key="ollama", base_url="http://host.docker.internal:11434/v1" ) @pytest.mark.asyncio @@ -216,7 +216,7 @@ async def test_get_llm_client_missing_openai_key_with_ollama_fallback(self, mock } mock_credential_service.get_active_provider.return_value = config_without_key mock_credential_service.get_credentials_by_category = AsyncMock(return_value={ - "LLM_BASE_URL": "http://localhost:11434" + "LLM_BASE_URL": "http://host.docker.internal:11434" }) with patch( @@ -234,7 +234,7 @@ async def test_get_llm_client_missing_openai_key_with_ollama_fallback(self, mock # Verify it created an Ollama client with correct params mock_openai.assert_called_once_with( api_key="ollama", - base_url="http://localhost:11434/v1" + base_url="http://host.docker.internal:11434/v1" ) @pytest.mark.asyncio @@ -480,7 +480,7 @@ async def test_multiple_providers_in_sequence(self, mock_credential_service): """Test creating clients for different providers in sequence""" configs = [ {"provider": "openai", "api_key": "openai-key", "base_url": None}, - {"provider": "ollama", "api_key": "ollama", "base_url": "http://localhost:11434/v1"}, + {"provider": "ollama", "api_key": "ollama", "base_url": "http://host.docker.internal:11434/v1"}, { "provider": "google", "api_key": "google-key",