diff --git a/.gitignore b/.gitignore
index d1e415cb4c..d88bfa24fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,7 @@ UAT/
 
 # Local release notes testing
 release-notes-*.md
+
+# Supabase local data
+supabase/volumes/
+supabase/.temp/
diff --git a/INFRASTRUCTURE.md b/INFRASTRUCTURE.md
new file mode 100644
index 0000000000..277372876c
--- /dev/null
+++ b/INFRASTRUCTURE.md
@@ -0,0 +1,544 @@
+# Archon Infrastructure Setup
+
+> Dokumentation der lokalen Entwicklungsumgebung mit Supabase und Archon
+>
+> **Erstellt**: 20. November 2025
+> **Status**: ✅ Produktiv
+> **Letzte Aktualisierung**: 20. November 2025
+
+---
+
+## 📋 Übersicht
+
+Diese Dokumentation beschreibt die vollständige lokale Entwicklungsumgebung für Archon mit Supabase als Backend-Datenbank.
+
+### Komponenten
+
+- **Supabase** (lokal): PostgreSQL 17.6 mit allen Services
+- **Archon**: KI-gestütztes Knowledge Management System
+  - Backend Server (FastAPI)
+  - MCP Server (Model Context Protocol)
+  - Frontend (React)
+
+---
+
+## 🏗️ Architektur
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    Docker Desktop                        │
+├─────────────────────────────────────────────────────────┤
+│                                                           │
+│  ┌──────────────────────────────────────────────────┐  │
+│  │              Supabase Stack                       │  │
+│  │  (verwaltet durch supabase CLI)                   │  │
+│  ├──────────────────────────────────────────────────┤  │
+│  │  - PostgreSQL 17.6      (Port 54322)             │  │
+│  │  - Kong API Gateway     (Port 54321)             │  │
+│  │  - Supabase Studio      (Port 54323)             │  │
+│  │  - GoTrue Auth          (intern)                 │  │
+│  │  - Storage API          (intern)                 │  │
+│  │  - Realtime             (intern)                 │  │
+│  │  - Edge Functions       (intern)                 │  │
+│  │  - Vector/Analytics     (intern)                 │  │
+│  └──────────────────────────────────────────────────┘  │
+│                                                           │
+│  ┌──────────────────────────────────────────────────┐  │
+│  │              Archon Stack                         │  │
+│  │  (verwaltet durch docker compose)                 │  │
+│  ├──────────────────────────────────────────────────┤  │
+│  │  - archon-server        (Port 8181)              │  │
+│  │  - archon-mcp           (Port 8051)              │  │
+│  │  - archon-ui            (Port 3737)              │  │
+│  └──────────────────────────────────────────────────┘  │
+│                                                           │
+└─────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 🚀 Installation & Konfiguration
+
+### Voraussetzungen
+
+- Docker Desktop für Mac (läuft bereits)
+- Homebrew (installiert)
+- Git (installiert)
+
+### 1. Supabase CLI Installation
+
+```bash
+brew install supabase/tap/supabase
+# Version: 2.58.5
+```
+
+### 2. Supabase Initialisierung
+
+```bash
+cd /Volumes/DATEN/Coding/archon/supabase
+supabase start
+```
+
+**Wichtig**: Dies erstellt Container mit dem Suffix `_supabase` (z.B. `supabase_db_supabase`)
+
+### 3. Datenbank-Schema anwenden
+
+```bash
+cd /Volumes/DATEN/Coding/archon
+docker exec -i supabase_db_supabase psql -U postgres -d postgres < migration/complete_setup.sql
+```
+
+Erstellt folgende Tabellen:
+- `archon_code_examples`
+- `archon_crawled_pages`
+- `archon_document_versions`
+- `archon_migrations`
+- `archon_page_metadata`
+- `archon_project_sources`
+- `archon_projects`
+- `archon_prompts`
+- `archon_settings`
+- `archon_sources`
+- `archon_tasks`
+
+### 4. Umgebungsvariablen konfigurieren
+
+**Datei**: `/Volumes/DATEN/Coding/archon/.env`
+
+```bash
+# Supabase Connection (für Docker Container)
+SUPABASE_URL=http://host.docker.internal:54321
+
+# JWT Service Role Key (generiert mit lokalem JWT_SECRET)
+SUPABASE_SERVICE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoic2VydmljZV9yb2xlIiwiaXNzIjoic3VwYWJhc2UiLCJpYXQiOjE3NjM2NTg1MDYsImV4cCI6MjA3OTAxODUwNn0.HVH5TgwW70JZtiGdnjU4RGexDDVbGnI3mXt-diQhVy8
+
+# Service Ports
+ARCHON_SERVER_PORT=8181
+ARCHON_MCP_PORT=8051
+ARCHON_AGENTS_PORT=8052
+ARCHON_UI_PORT=3737
+ARCHON_DOCS_PORT=3838
+```
+
+### 5. Archon Services starten
+
+```bash
+cd /Volumes/DATEN/Coding/archon
+docker compose up -d
+```
+
+---
+
+## 🔑 Wichtige Authentifizierung-Details
+
+### JWT-Token-Problem und Lösung
+
+**Problem**: Die Standard-JWT-Tokens aus `supabase/.env` funktionieren nicht mit der laufenden Supabase-Instanz.
+
+**Ursache**: Supabase CLI generiert beim Start ein neues JWT_SECRET (`super-secret-jwt-token-with-at-least-32-characters-long`), das nicht mit den vordefinierten Tokens übereinstimmt.
+
+**Lösung**: JWT-Token mit dem korrekten Secret generieren:
+
+```python
+import jwt
+from datetime import datetime, timedelta
+
+secret = "super-secret-jwt-token-with-at-least-32-characters-long"
+
+service_role_payload = {
+    "role": "service_role",
+    "iss": "supabase",
+    "iat": int(datetime.now().timestamp()),
+    "exp": int((datetime.now() + timedelta(days=365*10)).timestamp())
+}
+
+token = jwt.encode(service_role_payload, secret, algorithm="HS256")
+print(token)
+```
+
+**Wichtig**: Supabase Python Client (v2.15.1) benötigt JWT-Format, NICHT das neue `sb_secret_*` Format!
+
+---
+
+## 📍 Zugriffspunkte
+
+### Supabase
+
+| Service | URL | Credentials |
+|---------|-----|-------------|
+| **Studio UI** | http://localhost:54323 | - |
+| **API Gateway** | http://localhost:54321 | Service Key (siehe .env) |
+| **PostgreSQL** | `postgresql://postgres:postgres@localhost:54322/postgres` | postgres/postgres |
+
+### Archon
+
+| Service | URL | Beschreibung |
+|---------|-----|--------------|
+| **UI** | http://localhost:3737 | Hauptanwendung |
+| **API Server** | http://localhost:8181 | Backend API |
+| **MCP Server** | http://localhost:8051 | Model Context Protocol |
+| **Health Check** | http://localhost:8181/health | Server-Status |
+
+---
+
+## 🛠️ Wartung & Verwaltung
+
+### Supabase Status prüfen
+
+```bash
+cd /Volumes/DATEN/Coding/archon/supabase
+supabase status
+```
+
+Zeigt:
+- API URL
+- Database URL
+- Studio URL
+- Publishable/Secret Keys
+- Gestoppte Services
+
+### Container Status
+
+```bash
+# Alle Container anzeigen
+docker ps --format "table {{.Names}}\t{{.Status}}"
+
+# Nur Archon
+docker ps --format "table {{.Names}}\t{{.Status}}" | grep archon
+
+# Nur Supabase
+docker ps --format "table {{.Names}}\t{{.Status}}" | grep supabase
+```
+
+### Services neu starten
+
+**Supabase**:
+```bash
+cd /Volumes/DATEN/Coding/archon/supabase
+supabase stop
+supabase start
+```
+
+**Archon**:
+```bash
+cd /Volumes/DATEN/Coding/archon
+docker compose restart
+# oder für kompletten Neustart:
+docker compose down && docker compose up -d
+```
+
+### Logs einsehen
+
+**Supabase**:
+```bash
+docker logs supabase_db_supabase -f
+docker logs supabase_kong_supabase -f
+```
+
+**Archon**:
+```bash
+docker logs archon-server -f
+docker logs archon-mcp -f
+docker logs archon-ui -f
+```
+
+---
+
+## 🐛 Troubleshooting
+
+### Problem: Container mit Status "Created" oder "Restarting"
+
+**Symptom**: Container ohne `_supabase` Suffix existieren und starten nicht.
+
+**Ursache**: Docker Compose hat versehentlich Supabase-Container erstellt (sollte nur Archon verwalten).
+
+**Lösung**:
+```bash
+# Fehlerhafte Container entfernen
+docker rm -f supabase-db supabase-kong supabase-auth supabase-storage \
+  supabase-studio supabase-rest supabase-analytics supabase-meta \
+  supabase-edge-functions supabase-pooler supabase-vector \
+  supabase-imgproxy realtime-dev.supabase-realtime
+
+# Überflüssiges Netzwerk entfernen
+docker network rm supabase_default
+```
+
+### Problem: "Invalid API key" Fehler
+
+**Symptom**: `SupabaseException: Invalid API key` beim Start von archon-server.
+
+**Ursache**: Falsches JWT-Token-Format oder falsches Secret.
+
+**Lösung**: JWT-Token mit dem tatsächlichen Secret neu generieren (siehe Abschnitt "JWT-Token-Problem").
+
+### Problem: Port-Konflikte
+
+**Symptom**: "Port already in use" beim Start.
+
+**Lösung**:
+```bash
+# Belegte Ports prüfen
+lsof -i :54321  # Supabase API
+lsof -i :8181   # Archon Server
+lsof -i :3737   # Archon UI
+
+# Container stoppen
+supabase stop
+docker compose down
+```
+
+### Problem: Container "unhealthy"
+
+**Symptom**: Container läuft, aber Status zeigt "unhealthy".
+
+**Diagnose**:
+```bash
+# Logs prüfen
+docker logs <container-name> --tail 50
+
+# Healthcheck-Details
+docker inspect <container-name> | grep -A 10 Health
+```
+
+**Häufige Ursachen**:
+- Datenbankverbindung fehlgeschlagen → JWT-Token prüfen
+- Port nicht erreichbar → Netzwerk-Konfiguration prüfen
+- Service noch nicht bereit → Warten und Status erneut prüfen
+
+---
+
+## 🗄️ Datenbank-Management
+
+### Direkter Zugriff
+
+```bash
+# Via Docker
+docker exec -it supabase_db_supabase psql -U postgres -d postgres
+
+# Via lokaler psql (wenn installiert)
+psql -h localhost -p 54322 -U postgres -d postgres
+```
+
+### Backup erstellen
+
+```bash
+# Vollständiges Backup
+docker exec supabase_db_supabase pg_dump -U postgres postgres > backup_$(date +%Y%m%d_%H%M%S).sql
+
+# Nur Schema
+docker exec supabase_db_supabase pg_dump -U postgres -s postgres > schema_backup.sql
+
+# Nur Daten
+docker exec supabase_db_supabase pg_dump -U postgres -a postgres > data_backup.sql
+```
+
+### Backup wiederherstellen
+
+```bash
+docker exec -i supabase_db_supabase psql -U postgres -d postgres < backup.sql
+```
+
+### Migration hinzufügen
+
+1. Neue Migration erstellen:
+```bash
+cd /Volumes/DATEN/Coding/archon/supabase
+supabase migration new <migration_name>
+```
+
+2. SQL-Befehle in generierte Datei einfügen
+
+3. Migration anwenden:
+```bash
+docker exec -i supabase_db_supabase psql -U postgres -d postgres < supabase/migrations/<timestamp>_<migration_name>.sql
+```
+
+---
+
+## 🔒 Sicherheit
+
+### Produktionsumgebung
+
+**WICHTIG**: Diese Konfiguration ist NUR für lokale Entwicklung geeignet!
+
+Für Produktion ändern:
+
+1. **JWT Secret** in `supabase/.env` ändern:
+```bash
+JWT_SECRET=<sicheres-256-bit-secret>
+```
+
+2. **Neue JWT-Tokens** generieren mit neuem Secret
+
+3. **PostgreSQL Passwort** ändern:
+```bash
+POSTGRES_PASSWORD=<sicheres-passwort>
+```
+
+4. **Dashboard Credentials** ändern:
+```bash
+DASHBOARD_USERNAME=<username>
+DASHBOARD_PASSWORD=<passwort>
+```
+
+5. **Firewall-Regeln** konfigurieren (nur notwendige Ports öffnen)
+
+### API Keys sicher speichern
+
+Archon speichert sensible API Keys verschlüsselt in der Datenbank:
+- OpenAI API Key
+- Google API Key
+- Anthropic API Key
+- etc.
+
+Konfiguration über UI: http://localhost:3737/settings
+
+---
+
+## 📊 Ressourcen-Übersicht
+
+### Docker Images (ca. 15.7 GB)
+
+**Archon** (5.69 GB):
+- `archon-archon-server`: 3.77 GB
+- `archon-archon-frontend`: 1.54 GB
+- `archon-archon-mcp`: 385 MB
+
+**Supabase** (~10 GB):
+- `public.ecr.aws/supabase/postgres:17.6.1.043`: 4.33 GB
+- `public.ecr.aws/supabase/studio`: 1.2 GB
+- `public.ecr.aws/supabase/storage-api`: 1.11 GB
+- `public.ecr.aws/supabase/edge-runtime`: 1.07 GB
+- `public.ecr.aws/supabase/logflare`: 1.02 GB
+- `public.ecr.aws/supabase/realtime`: 659 MB
+- `public.ecr.aws/supabase/postgrest`: 585 MB
+- `public.ecr.aws/supabase/postgres-meta`: 568 MB
+- `public.ecr.aws/supabase/kong`: 212 MB
+- `public.ecr.aws/supabase/vector`: 160 MB
+- `public.ecr.aws/supabase/gotrue`: 74 MB
+- `public.ecr.aws/supabase/mailpit`: 43 MB
+
+### Laufende Container (15)
+
+**Archon** (3):
+- archon-server
+- archon-mcp
+- archon-ui
+
+**Supabase** (12):
+- supabase_db_supabase
+- supabase_kong_supabase
+- supabase_studio_supabase
+- supabase_auth_supabase
+- supabase_storage_supabase
+- supabase_realtime_supabase
+- supabase_rest_supabase
+- supabase_vector_supabase
+- supabase_analytics_supabase
+- supabase_pg_meta_supabase
+- supabase_edge_runtime_supabase
+- supabase_inbucket_supabase
+
+---
+
+## 📚 Referenzen
+
+### Offizielle Dokumentation
+
+- **Archon**: https://github.com/coleam00/Archon
+- **Supabase**: https://supabase.com/docs
+- **Supabase CLI**: https://supabase.com/docs/guides/cli
+
+### Wichtige Konfigurationsdateien
+
+- `/Volumes/DATEN/Coding/archon/.env` - Archon Umgebungsvariablen
+- `/Volumes/DATEN/Coding/archon/docker-compose.yml` - Archon Services
+- `/Volumes/DATEN/Coding/archon/supabase/.env` - Supabase Konfiguration
+- `/Volumes/DATEN/Coding/archon/supabase/volumes/api/kong.yml` - Kong API Gateway
+- `/Volumes/DATEN/Coding/archon/migration/complete_setup.sql` - Datenbank-Schema
+
+### Nützliche Befehle (Schnellreferenz)
+
+```bash
+# Status prüfen
+cd /Volumes/DATEN/Coding/archon
+docker ps | grep -E "archon|supabase"
+curl http://localhost:8181/health
+
+# Alles neu starten
+cd supabase && supabase stop && supabase start
+cd .. && docker compose restart
+
+# Alles stoppen
+docker compose down
+cd supabase && supabase stop
+
+# Logs verfolgen
+docker compose logs -f
+docker logs archon-server -f
+
+# Datenbank abfragen
+docker exec -it supabase_db_supabase psql -U postgres -d postgres
+```
+
+---
+
+## 🎯 Nächste Schritte
+
+Nach erfolgreicher Installation:
+
+1. **API Keys konfigurieren** unter http://localhost:3737/settings
+   - OpenAI API Key (für Embeddings)
+   - Optional: Google, Anthropic, etc.
+
+2. **Knowledge Base befüllen**:
+   - Dokumente hochladen
+   - Websites crawlen
+
+3. **MCP Server mit IDE verbinden**:
+   - Claude Code: `claude mcp add --transport http archon http://localhost:8051/mcp`
+   - Cursor/Windsurf: Siehe http://localhost:3737/mcp
+
+4. **Projekte & Tasks erstellen** (optional, wenn Feature aktiviert)
+
+---
+
+## ✅ Verifizierung
+
+Alle Services sollten diesen Status zeigen:
+
+```bash
+$ docker ps --format "table {{.Names}}\t{{.Status}}" | grep -E "archon|supabase"
+
+archon-mcp                       Up X minutes (healthy)
+archon-ui                        Up X minutes (healthy)
+archon-server                    Up X minutes (healthy)
+supabase_studio_supabase         Up X minutes (healthy)
+supabase_db_supabase             Up X minutes (healthy)
+supabase_kong_supabase           Up X minutes (healthy)
+# ... weitere Supabase Services (alle healthy)
+```
+
+Healthcheck-Endpunkte:
+- ✅ http://localhost:8181/health → `{"status":"healthy",...}`
+- ✅ http://localhost:3737 → Archon UI lädt
+- ✅ http://localhost:54323 → Supabase Studio lädt
+
+---
+
+## 📝 Änderungsprotokoll
+
+### 2025-11-20 - Initiale Einrichtung
+- Supabase CLI 2.58.5 installiert
+- PostgreSQL 17.6 mit Archon-Schema konfiguriert
+- JWT-Token-Problem identifiziert und gelöst
+- Alle Docker Container bereinigt (Duplikate entfernt)
+- Playwright-Tests durchgeführt (alle erfolgreich)
+- Status: ✅ Produktiv
+
+---
+
+**Maintainer**: Mathias Boni
+**Zuletzt getestet**: 20. November 2025
+**Archon Version**: 0.1.0
diff --git a/PLAN.md b/PLAN.md
new file mode 100644
index 0000000000..d463798c29
--- /dev/null
+++ b/PLAN.md
@@ -0,0 +1,344 @@
+# Auth-Token Support für Ollama Chat & Embedding Instanzen - Implementierungsplan
+
+## Status: ✅ VOLLSTÄNDIG ABGESCHLOSSEN UND DEPLOYED
+
+## Kontext
+
+**Aktuelle Situation:**
+- Frontend hat ZWEI separate Ollama-Konfigurationen:
+  - **LLM/Chat**: Gespeichert in `LLM_BASE_URL` (rag_strategy)
+  - **Embedding**: Gespeichert in `OLLAMA_EMBEDDING_URL` (rag_strategy)
+- Backend liest diese URLs und erstellt OpenAI-kompatible Clients
+- **PROBLEM GELÖST**: Auth-Token-Unterstützung für geschützte Ollama-Instanzen implementiert
+
+## Ziel ✅ ERREICHT
+
+Für BEIDE Ollama-Instanzen (Chat & Embedding) optionale Auth-Token-Felder hinzugefügt:
+- ✅ Checkbox "Use Authentication" in jedem Modal
+- ✅ Password-Input für Auth-Token (nur sichtbar wenn Checkbox aktiviert)
+- ✅ Backend nutzt die korrekten Token basierend auf Operation (Chat vs. Embedding)
+
+## Implementierte Änderungen
+
+### ✅ 1. Frontend: RAGSettings.tsx erweitert
+
+**Datei**: `archon-ui-main/src/components/settings/RAGSettings.tsx`
+
+**State-Management (Zeile 207-219):**
+```typescript
+const [llmInstanceConfig, setLLMInstanceConfig] = useState({
+  name: '',
+  url: ragSettings.LLM_BASE_URL || 'http://host.docker.internal:11434/v1',
+  useAuth: false,
+  authToken: ''
+});
+
+const [embeddingInstanceConfig, setEmbeddingInstanceConfig] = useState({
+  name: '',
+  url: ragSettings.OLLAMA_EMBEDDING_URL || 'http://host.docker.internal:11434/v1',
+  useAuth: false,
+  authToken: ''
+});
+```
+
+**useEffect Hooks (Zeile 226-270):**
+- ✅ Lädt `OLLAMA_CHAT_AUTH_TOKEN` aus ragSettings
+- ✅ Lädt `OLLAMA_EMBEDDING_AUTH_TOKEN` aus ragSettings
+- ✅ Setzt `useAuth` Checkbox automatisch basierend auf vorhandenem Token
+
+**Edit LLM Instance Modal (Zeile 2209-2232):**
+- ✅ Checkbox "Use Authentication"
+- ✅ Conditional Password-Input für Auth-Token
+- ✅ Beim Speichern (Zeile 2244-2250): Speichert `OLLAMA_CHAT_AUTH_TOKEN` in ragSettings
+
+**Edit Embedding Instance Modal (Zeile 2299-2322):**
+- ✅ Checkbox "Use Authentication"
+- ✅ Conditional Password-Input für Auth-Token
+- ✅ Beim Speichern (Zeile 2334-2340): Speichert `OLLAMA_EMBEDDING_AUTH_TOKEN` in ragSettings
+
+### ✅ 2. Backend: llm_provider_service.py angepasst
+
+**Datei**: `python/src/server/services/llm_provider_service.py`
+
+**Funktion `get_llm_client()` - Hauptimplementierung (Zeile 455-459):**
+```python
+# Get correct auth token based on operation type
+if use_embedding_provider or instance_type == "embedding":
+    ollama_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN", "ollama")
+else:
+    ollama_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN", "ollama")
+```
+
+**Fallback-Code (Zeile 422-426):**
+```python
+# Get correct auth token based on operation type
+if use_embedding_provider:
+    ollama_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN", "ollama")
+else:
+    ollama_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN", "ollama")
+```
+
+### ✅ 3. Datenbank-Schema
+
+**KEINE Änderungen nötig!**
+- ✅ Nutzt existierende `archon_settings` Tabelle
+- ✅ Neue Keys werden automatisch gespeichert:
+  - `OLLAMA_CHAT_AUTH_TOKEN` (Kategorie: rag_strategy)
+  - `OLLAMA_EMBEDDING_AUTH_TOKEN` (Kategorie: rag_strategy)
+
+## Deployment Status
+
+### ✅ 4. Frontend Build
+
+```bash
+cd archon-ui-main
+npm run build
+```
+
+**Status**: ✅ ABGESCHLOSSEN
+
+### ✅ 5. Docker Images neu bauen und deployen
+
+```bash
+cd /Volumes/DATEN/Coding/INFRASTRUCTURE_PROJECT/archon-local_supabase/archon
+docker compose down
+docker compose build --no-cache
+docker compose up -d
+```
+
+**Status**: ✅ ABGESCHLOSSEN
+
+**Deployment Zeitpunkt**: 2025-11-20
+
+**Laufende Services**:
+- ✅ `archon-server` (Port 8181) - healthy
+- ✅ `archon-mcp` (Port 8051) - running
+- ✅ `archon-ui` (Port 3737) - running
+
+### 🧪 6. Testing
+
+**Bereit zum Testen!** Das System ist deployed und läuft.
+
+**Test-Anleitung**:
+
+1. **UI öffnen**: http://localhost:3737
+2. **Settings öffnen** → RAG Settings Tab
+3. **LLM Instance konfigurieren**:
+   - Klicke auf "Edit" bei der LLM Instance
+   - Aktiviere "Use Authentication" Checkbox
+   - Trage dein Ollama Auth-Token ein
+   - Speichern
+4. **Embedding Instance konfigurieren**:
+   - Klicke auf "Edit" bei der Embedding Instance
+   - Aktiviere "Use Authentication" Checkbox
+   - Trage dein Ollama Auth-Token ein (kann unterschiedlich sein)
+   - Speichern
+5. **RAG-Funktionalität testen**:
+   - Starte einen Crawl oder Search
+   - Verifiziere, dass die geschützte Ollama-Instanz verwendet wird
+6. **Backend-Logs prüfen** (optional):
+   ```bash
+   docker compose logs -f archon-server | grep -i "ollama\|auth"
+   ```
+
+**Erwartetes Verhalten**:
+- ✅ Auth-Token wird als Bearer Token im Authorization Header gesendet
+- ✅ Ollama-Instanz akzeptiert authentifizierte Requests
+- ✅ Ohne Auth-Token: Placeholder "required-but-ignored" wird verwendet (abwärtskompatibel)
+
+## Update: 2025-11-20 - Health-Check & Summary Fixes
+
+### Problem
+Nach dem initialen Deployment wurden zwei Probleme identifiziert:
+1. **Health-Check zeigt "Offline"**: Der Health-Check-Endpoint verwendete kein Auth-Token für geschützte Instanzen
+2. **Auth-Token nicht sichtbar in Summary**: Die Summary-Tabelle zeigte nicht an, ob ein Auth-Token konfiguriert ist
+
+### Implementierte Fixes
+
+#### ✅ Frontend: Auth-Token Status in Summary anzeigen
+**Datei**: `archon-ui-main/src/components/settings/RAGSettings.tsx` (Zeile 1723-1750)
+
+Neue Zeile in der Summary-Tabelle zwischen "Instance URL" und "Status":
+```typescript
+<tr>
+  <td className="py-2 text-gray-400">Authentication</td>
+  <td className="py-2">
+    {activeSelection === 'chat' ? (
+      llmInstanceConfig.authToken ? (
+        <span className="text-teal-400 flex items-center">
+          <svg className="w-4 h-4 mr-1" fill="currentColor" viewBox="0 0 20 20">
+            <path fillRule="evenodd" d="M5 9V7a5 5 0 0110 0v2a2 2 0 012 2v5a2 2 0 01-2 2H5a2 2 0 01-2-2v-5a2 2 0 012-2zm8-2v2H7V7a3 3 0 016 0z" clipRule="evenodd" />
+          </svg>
+          Token configured
+        </span>
+      ) : (
+        <span className="text-gray-500 italic">No authentication</span>
+      )
+    ) : (
+      // Gleiche Logik für Embedding Instance
+    )}
+  </td>
+</tr>
+```
+
+#### ✅ Backend: Health-Check mit Auth-Token Support
+
+**Datei 1**: `python/src/server/services/ollama/model_discovery_service.py` (Zeile 958-993)
+
+Erweiterte `check_instance_health()` Methode um optionalen `auth_token` Parameter:
+```python
+async def check_instance_health(self, instance_url: str, auth_token: str | None = None) -> InstanceHealthStatus:
+    # Prepare headers with optional auth token
+    headers = {}
+    if auth_token:
+        headers["Authorization"] = f"Bearer {auth_token}"
+
+    async with httpx.AsyncClient(timeout=httpx.Timeout(10)) as client:
+        ping_url = f"{instance_url.rstrip('/')}/api/tags"
+        response = await client.get(ping_url, headers=headers)
+        # ...
+```
+
+**Datei 2**: `python/src/server/api_routes/ollama_api.py` (Zeile 142-199)
+
+Health-Check-Endpoint liest Auth-Tokens aus RAG Settings:
+```python
+@router.get("/instances/health")
+async def health_check_endpoint(
+    instance_urls: list[str] = Query(...),
+    include_models: bool = Query(False)
+) -> dict[str, Any]:
+    # Get auth tokens from RAG settings
+    rag_settings = await credential_service.get_credentials_by_category("rag_strategy")
+
+    llm_base_url = rag_settings.get("LLM_BASE_URL", "").replace("/v1", "").rstrip("/")
+    embedding_base_url = rag_settings.get("OLLAMA_EMBEDDING_URL", "").replace("/v1", "").rstrip("/")
+
+    chat_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN", "")
+    embedding_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN", "")
+
+    # Determine which auth token to use based on URL matching
+    for instance_url in instance_urls:
+        url = instance_url.rstrip('/')
+        auth_token = None
+        if url == llm_base_url and chat_auth_token:
+            auth_token = chat_auth_token
+        elif url == embedding_base_url and embedding_auth_token:
+            auth_token = embedding_auth_token
+
+        health_status = await model_discovery_service.check_instance_health(url, auth_token=auth_token)
+        # ...
+```
+
+### ✅ Deployment (2025-11-20 16:00)
+
+- ✅ Frontend neu gebaut
+- ✅ Docker Images neu gebaut (Frontend, Server, MCP)
+- ✅ Container neu deployed
+- ✅ Alle Services laufen: archon-server (healthy), archon-mcp (healthy), archon-ui (running)
+
+### Erwartetes Verhalten (nach Fix)
+
+1. **Summary zeigt Auth-Token Status**:
+   - ✅ "Token configured" mit Schloss-Icon wenn Token gesetzt
+   - ✅ "No authentication" wenn kein Token
+
+2. **Health-Check funktioniert mit Auth**:
+   - ✅ Backend sendet Bearer Token im Authorization Header
+   - ✅ Health-Check sollte jetzt "Online" zeigen für geschützte Instanzen
+   - ✅ Status-Anfrage erfolgt automatisch beim Öffnen der RAG Settings
+
+### ✅ Final Verification (2025-11-20 22:05)
+
+**Datenbank-Status**: Alle erforderlichen Settings sind gespeichert:
+```sql
+             key             |                  value                  |   category
+-----------------------------+-----------------------------------------+--------------
+ LLM_BASE_URL                | https://ollama.brusdeylins.info         | rag_strategy
+ OLLAMA_CHAT_AUTH_TOKEN      | ollama_13107e338aa16a6a8295592ce050f6cb | rag_strategy
+ OLLAMA_EMBEDDING_AUTH_TOKEN | ollama_13107e338aa16a6a8295592ce050f6cb | rag_strategy
+ OLLAMA_EMBEDDING_URL        | https://ollama.brusdeylins.info         | rag_strategy
+```
+
+**Health-Check-Test**: Erfolgreiche Authentifizierung mit geschützter Instanz:
+```json
+{
+    "summary": {
+        "total_instances": 1,
+        "healthy_instances": 1,
+        "unhealthy_instances": 0,
+        "average_response_time_ms": 672.12
+    },
+    "instance_status": {
+        "https://ollama.brusdeylins.info": {
+            "is_healthy": true,
+            "response_time_ms": 672.12,
+            "models_available": 5,
+            "error_message": null
+        }
+    }
+}
+```
+
+**Ergebnis**: ✅ **VOLLSTÄNDIG FUNKTIONSFÄHIG**
+- Health-Check zeigt "Online" Status
+- 5 Modelle erfolgreich erkannt
+- Bearer Token-Authentifizierung funktioniert
+- Response-Zeit: ~672ms (akzeptabel)
+
+## Geänderte Dateien (Gesamt)
+
+1. ✅ `archon-ui-main/src/components/settings/RAGSettings.tsx` (Initial + Summary-Fix)
+2. ✅ `python/src/server/services/llm_provider_service.py` (Token-Auswahl)
+3. ✅ `python/src/server/services/ollama/model_discovery_service.py` (Health-Check Auth)
+4. ✅ `python/src/server/api_routes/ollama_api.py` (Health-Check Endpoint)
+
+## Technische Details
+
+### Frontend → Backend Datenfluss
+
+1. **User füllt Modal aus**:
+   - URL: `http://my-ollama:11434`
+   - Checkbox "Use Authentication": ✓
+   - Auth Token: `my-secret-token`
+
+2. **Frontend speichert in archon_settings**:
+   ```json
+   {
+     "LLM_BASE_URL": "http://my-ollama:11434",
+     "OLLAMA_CHAT_AUTH_TOKEN": "my-secret-token"
+   }
+   ```
+
+3. **Backend liest und verwendet**:
+   ```python
+   # In llm_provider_service.py
+   ollama_base_url = await _get_optimal_ollama_instance()  # → "http://my-ollama:11434/v1"
+   ollama_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN", "ollama")  # → "my-secret-token"
+
+   client = openai.AsyncOpenAI(
+       api_key=ollama_auth_token,  # ← Wird als Bearer Token im HTTP Header verwendet
+       base_url=ollama_base_url
+   )
+   ```
+
+### Sicherheit
+
+- ✅ Token-Felder sind `type="password"` (versteckte Eingabe)
+- ✅ Token wird nur gespeichert wenn Checkbox aktiviert ist
+- ✅ Leerer Token = kein Auth-Header (abwärtskompatibel)
+- ⚠️ Token wird im Klartext in `archon_settings` gespeichert (Future: Verschlüsselung)
+
+## Abwärtskompatibilität
+
+✅ **100% kompatibel mit bestehenden Installationen:**
+- Ohne Auth-Token: Standard-Wert `"ollama"` wird verwendet
+- Bestehende Instanzen funktionieren weiterhin
+- Neue Felder sind optional
+
+## Lessons Learned
+
+1. ✅ Keine DB-Schema-Änderungen nötig bei generischen Key-Value-Tabellen
+2. ✅ TypeScript `as any` für neue Settings-Keys akzeptabel während Entwicklung
+3. ✅ Separate Token für Chat/Embedding ermöglicht flexible Deployment-Szenarien
+4. ✅ useEffect Hooks müssen Token in Dependencies aufnehmen für korrektes Laden
diff --git a/QUICKSTART.md b/QUICKSTART.md
new file mode 100644
index 0000000000..b42dd806d3
--- /dev/null
+++ b/QUICKSTART.md
@@ -0,0 +1,47 @@
+# Archon + Supabase - Quick Start Guide
+
+> Schnellstart für die lokale Entwicklungsumgebung
+
+## ⚡ Schnellstart (für erfahrene Entwickler)
+
+```bash
+# 1. Supabase starten
+cd /Volumes/DATEN/Coding/archon/supabase
+supabase start
+
+# 2. Archon starten
+cd /Volumes/DATEN/Coding/archon
+docker compose up -d
+
+# 3. Status prüfen
+docker ps | grep -E "archon|supabase"
+curl http://localhost:8181/health
+```
+
+## 🌐 Zugriff
+
+| Service | URL | Beschreibung |
+|---------|-----|-------------|
+| **Archon UI** | http://localhost:3737 | Hauptanwendung |
+| **Supabase Studio** | http://localhost:54323 | Datenbank-UI |
+| **API Server** | http://localhost:8181 | Backend API |
+| **MCP Server** | http://localhost:8051/mcp | Model Context Protocol |
+
+## 🛑 Stoppen
+
+```bash
+# Archon stoppen
+docker compose down
+
+# Supabase stoppen
+cd supabase && supabase stop
+```
+
+## 📚 Vollständige Dokumentation
+
+Siehe **[INFRASTRUCTURE.md](./INFRASTRUCTURE.md)** für:
+- Detaillierte Installation
+- Troubleshooting
+- Datenbank-Management
+- Sicherheitshinweise
+- Backup/Restore
diff --git a/archon-ui-main/package-lock.json b/archon-ui-main/package-lock.json
index 37b3e9a745..16740b78ad 100644
--- a/archon-ui-main/package-lock.json
+++ b/archon-ui-main/package-lock.json
@@ -42,6 +42,7 @@
       },
       "devDependencies": {
         "@biomejs/biome": "2.2.2",
+        "@playwright/test": "^1.56.1",
         "@tailwindcss/postcss": "4.1.2",
         "@tailwindcss/vite": "4.1.2",
         "@testing-library/jest-dom": "^6.4.6",
@@ -2389,6 +2390,22 @@
       "integrity": "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==",
       "license": "MIT"
     },
+    "node_modules/@playwright/test": {
+      "version": "1.56.1",
+      "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.56.1.tgz",
+      "integrity": "sha512-vSMYtL/zOcFpvJCW71Q/OEGQb7KYBPAdKh35WNSkaZA75JlAO8ED8UN6GUNTm3drWomcbcqRPFqQbLae8yBTdg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "playwright": "1.56.1"
+      },
+      "bin": {
+        "playwright": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/@polka/url": {
       "version": "1.0.0-next.29",
       "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.29.tgz",
@@ -9752,6 +9769,53 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/playwright": {
+      "version": "1.56.1",
+      "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.56.1.tgz",
+      "integrity": "sha512-aFi5B0WovBHTEvpM3DzXTUaeN6eN0qWnTkKx4NQaH4Wvcmc153PdaY2UBdSYKaGYw+UyWXSVyxDUg5DoPEttjw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "playwright-core": "1.56.1"
+      },
+      "bin": {
+        "playwright": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "fsevents": "2.3.2"
+      }
+    },
+    "node_modules/playwright-core": {
+      "version": "1.56.1",
+      "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.56.1.tgz",
+      "integrity": "sha512-hutraynyn31F+Bifme+Ps9Vq59hKuUCz7H1kDOcBs+2oGguKkWTU50bBWrtz34OUWmIwpBTWDxaRPXrIXkgvmQ==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "playwright-core": "cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/playwright/node_modules/fsevents": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
+      "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
     "node_modules/possible-typed-array-names": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz",
diff --git a/archon-ui-main/package.json b/archon-ui-main/package.json
index 576b78ae2b..4419044216 100644
--- a/archon-ui-main/package.json
+++ b/archon-ui-main/package.json
@@ -62,6 +62,7 @@
   },
   "devDependencies": {
     "@biomejs/biome": "2.2.2",
+    "@playwright/test": "^1.56.1",
     "@tailwindcss/postcss": "4.1.2",
     "@tailwindcss/vite": "4.1.2",
     "@testing-library/jest-dom": "^6.4.6",
diff --git a/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx b/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx
index 4da6f9a0de..a1bba2233e 100644
--- a/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx
+++ b/archon-ui-main/src/components/settings/OllamaConfigurationPanel.tsx
@@ -35,6 +35,8 @@ const OllamaConfigurationPanel: React.FC<OllamaConfigurationPanelProps> = ({
   const [newInstanceUrl, setNewInstanceUrl] = useState('');
   const [newInstanceName, setNewInstanceName] = useState('');
   const [newInstanceType, setNewInstanceType] = useState<'chat' | 'embedding'>('chat');
+  const [newInstanceUseAuth, setNewInstanceUseAuth] = useState(false);
+  const [newInstanceAuthToken, setNewInstanceAuthToken] = useState('');
   const [showAddInstance, setShowAddInstance] = useState(false);
   const [discoveringModels, setDiscoveringModels] = useState(false);
   const [modelDiscoveryResults, setModelDiscoveryResults] = useState<any>(null);
@@ -233,7 +235,8 @@ const OllamaConfigurationPanel: React.FC<OllamaConfigurationPanelProps> = ({
       isEnabled: true,
       isPrimary: false,
       loadBalancingWeight: 100,
-      instanceType: separateHosts ? newInstanceType : 'both'
+      instanceType: separateHosts ? newInstanceType : 'both',
+      ...(newInstanceUseAuth && newInstanceAuthToken.trim() && { authToken: newInstanceAuthToken.trim() })
     };
 
     try {
@@ -246,6 +249,8 @@ const OllamaConfigurationPanel: React.FC<OllamaConfigurationPanelProps> = ({
       setNewInstanceUrl('');
       setNewInstanceName('');
       setNewInstanceType('chat');
+      setNewInstanceUseAuth(false);
+      setNewInstanceAuthToken('');
       setShowAddInstance(false);
       
       showToast(`Added new Ollama instance: ${newInstance.name}`, 'success');
@@ -703,8 +708,8 @@ const OllamaConfigurationPanel: React.FC<OllamaConfigurationPanelProps> = ({
                     size="sm"
                     onClick={() => setNewInstanceType('chat')}
                     className={cn(
-                      newInstanceType === 'chat' 
-                        ? 'bg-blue-600 text-white' 
+                      newInstanceType === 'chat'
+                        ? 'bg-blue-600 text-white'
                         : 'text-blue-600 border-blue-600'
                     )}
                   >
@@ -715,8 +720,8 @@ const OllamaConfigurationPanel: React.FC<OllamaConfigurationPanelProps> = ({
                     size="sm"
                     onClick={() => setNewInstanceType('embedding')}
                     className={cn(
-                      newInstanceType === 'embedding' 
-                        ? 'bg-blue-600 text-white' 
+                      newInstanceType === 'embedding'
+                        ? 'bg-blue-600 text-white'
                         : 'text-blue-600 border-blue-600'
                     )}
                   >
@@ -725,7 +730,29 @@ const OllamaConfigurationPanel: React.FC<OllamaConfigurationPanelProps> = ({
                 </div>
               </div>
             )}
-            
+
+            {/* Authentication Settings */}
+            <div className="space-y-2">
+              <label className="flex items-center gap-2 text-sm font-medium text-blue-900 dark:text-blue-100 cursor-pointer">
+                <input
+                  type="checkbox"
+                  checked={newInstanceUseAuth}
+                  onChange={(e) => setNewInstanceUseAuth(e.target.checked)}
+                  className="w-4 h-4 text-blue-600 border-gray-300 rounded focus:ring-blue-500"
+                />
+                Use Authentication
+              </label>
+              {newInstanceUseAuth && (
+                <Input
+                  type="password"
+                  placeholder="Auth Token"
+                  value={newInstanceAuthToken}
+                  onChange={(e) => setNewInstanceAuthToken(e.target.value)}
+                  className="text-sm"
+                />
+              )}
+            </div>
+
             <div className="flex gap-2">
               <Button
                 size="sm"
@@ -742,6 +769,8 @@ const OllamaConfigurationPanel: React.FC<OllamaConfigurationPanelProps> = ({
                   setNewInstanceUrl('');
                   setNewInstanceName('');
                   setNewInstanceType('chat');
+                  setNewInstanceUseAuth(false);
+                  setNewInstanceAuthToken('');
                 }}
               >
                 Cancel
diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx
index 62739fc77a..38fa067301 100644
--- a/archon-ui-main/src/components/settings/RAGSettings.tsx
+++ b/archon-ui-main/src/components/settings/RAGSettings.tsx
@@ -141,6 +141,7 @@ const normalizeBaseUrl = (url?: string | null): string | null => {
 interface RAGSettingsProps {
   ragSettings: {
     MODEL_CHOICE: string;
+    CHAT_MODEL?: string;
     USE_CONTEXTUAL_EMBEDDINGS: boolean;
     CONTEXTUAL_EMBEDDINGS_MAX_WORKERS: number;
     USE_HYBRID_SEARCH: boolean;
@@ -207,57 +208,67 @@ export const RAGSettings = ({
   // Instance configurations
   const [llmInstanceConfig, setLLMInstanceConfig] = useState({
     name: '',
-    url: ragSettings.LLM_BASE_URL || 'http://host.docker.internal:11434/v1'
+    url: ragSettings.LLM_BASE_URL || 'http://host.docker.internal:11434/v1',
+    useAuth: false,
+    authToken: ''
   });
   const [embeddingInstanceConfig, setEmbeddingInstanceConfig] = useState({
-    name: '', 
-    url: ragSettings.OLLAMA_EMBEDDING_URL || 'http://host.docker.internal:11434/v1'
+    name: '',
+    url: ragSettings.OLLAMA_EMBEDDING_URL || 'http://host.docker.internal:11434/v1',
+    useAuth: false,
+    authToken: ''
   });
 
   // Update instance configs when ragSettings change (after loading from database)
   // Use refs to prevent infinite loops
-  const lastLLMConfigRef = useRef({ url: '', name: '' });
-  const lastEmbeddingConfigRef = useRef({ url: '', name: '' });
-  
+  const lastLLMConfigRef = useRef({ url: '', name: '', authToken: '' });
+  const lastEmbeddingConfigRef = useRef({ url: '', name: '', authToken: '' });
+
   useEffect(() => {
     const newLLMUrl = ragSettings.LLM_BASE_URL || '';
     const newLLMName = ragSettings.LLM_INSTANCE_NAME || '';
-    
-    if (newLLMUrl !== lastLLMConfigRef.current.url || newLLMName !== lastLLMConfigRef.current.name) {
-      lastLLMConfigRef.current = { url: newLLMUrl, name: newLLMName };
+    const newAuthToken = (ragSettings as any).OLLAMA_CHAT_AUTH_TOKEN || '';
+
+    if (newLLMUrl !== lastLLMConfigRef.current.url || newLLMName !== lastLLMConfigRef.current.name || newAuthToken !== lastLLMConfigRef.current.authToken) {
+      lastLLMConfigRef.current = { url: newLLMUrl, name: newLLMName, authToken: newAuthToken };
       setLLMInstanceConfig(prev => {
         const newConfig = {
           url: newLLMUrl || prev.url,
-          name: newLLMName || prev.name
+          name: newLLMName || prev.name,
+          useAuth: !!newAuthToken,
+          authToken: newAuthToken || prev.authToken
         };
         // Only update if actually different to prevent loops
-        if (newConfig.url !== prev.url || newConfig.name !== prev.name) {
+        if (newConfig.url !== prev.url || newConfig.name !== prev.name || newConfig.authToken !== prev.authToken) {
           return newConfig;
         }
         return prev;
       });
     }
-  }, [ragSettings.LLM_BASE_URL, ragSettings.LLM_INSTANCE_NAME]);
+  }, [ragSettings.LLM_BASE_URL, ragSettings.LLM_INSTANCE_NAME, (ragSettings as any).OLLAMA_CHAT_AUTH_TOKEN]);
 
   useEffect(() => {
     const newEmbeddingUrl = ragSettings.OLLAMA_EMBEDDING_URL || '';
     const newEmbeddingName = ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME || '';
-    
-    if (newEmbeddingUrl !== lastEmbeddingConfigRef.current.url || newEmbeddingName !== lastEmbeddingConfigRef.current.name) {
-      lastEmbeddingConfigRef.current = { url: newEmbeddingUrl, name: newEmbeddingName };
+    const newAuthToken = (ragSettings as any).OLLAMA_EMBEDDING_AUTH_TOKEN || '';
+
+    if (newEmbeddingUrl !== lastEmbeddingConfigRef.current.url || newEmbeddingName !== lastEmbeddingConfigRef.current.name || newAuthToken !== lastEmbeddingConfigRef.current.authToken) {
+      lastEmbeddingConfigRef.current = { url: newEmbeddingUrl, name: newEmbeddingName, authToken: newAuthToken };
       setEmbeddingInstanceConfig(prev => {
         const newConfig = {
           url: newEmbeddingUrl || prev.url,
-          name: newEmbeddingName || prev.name
+          name: newEmbeddingName || prev.name,
+          useAuth: !!newAuthToken,
+          authToken: newAuthToken || prev.authToken
         };
         // Only update if actually different to prevent loops
-        if (newConfig.url !== prev.url || newConfig.name !== prev.name) {
+        if (newConfig.url !== prev.url || newConfig.name !== prev.name || newConfig.authToken !== prev.authToken) {
           return newConfig;
         }
         return prev;
       });
     }
-  }, [ragSettings.OLLAMA_EMBEDDING_URL, ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME]);
+  }, [ragSettings.OLLAMA_EMBEDDING_URL, ragSettings.OLLAMA_EMBEDDING_INSTANCE_NAME, (ragSettings as any).OLLAMA_EMBEDDING_AUTH_TOKEN]);
 
   // Provider model persistence effects - separate for chat and embedding
   useEffect(() => {
@@ -565,7 +576,10 @@ export const RAGSettings = ({
     loading: true,
     // Per-instance model counts
     llmInstanceModels: { chat: 0, embedding: 0, total: 0 },
-    embeddingInstanceModels: { chat: 0, embedding: 0, total: 0 }
+    embeddingInstanceModels: { chat: 0, embedding: 0, total: 0 },
+    // Available model lists
+    llmAvailableModels: [] as string[],
+    embeddingAvailableModels: [] as string[]
   });
   const { showToast } = useToast();
 
@@ -623,6 +637,59 @@ export const RAGSettings = ({
     }
   };
 
+  // Load available models from Ollama instance
+  const loadAvailableModels = async (url: string, context: 'chat' | 'embedding') => {
+    try {
+      const baseUrl = url.replace('/v1', '').replace(/\/$/, '');
+      const modelsUrl = `/api/ollama/models?instance_urls=${encodeURIComponent(baseUrl)}&include_capabilities=true&fetch_details=false`;
+
+      const response = await fetch(modelsUrl, {
+        method: 'GET',
+        headers: {
+          'Accept': 'application/json',
+          'Content-Type': 'application/json',
+        },
+        signal: AbortSignal.timeout(15000)
+      });
+
+      if (response.ok) {
+        const data = await response.json();
+
+        if (context === 'chat') {
+          const chatModelNames = (data.chat_models || []).map((m: any) => m.name);
+          setOllamaMetrics(prev => ({
+            ...prev,
+            llmAvailableModels: chatModelNames
+          }));
+          console.log(`✅ Loaded ${chatModelNames.length} chat models from ${url}`);
+        } else {
+          const embeddingModelNames = (data.embedding_models || []).map((m: any) => m.name);
+          setOllamaMetrics(prev => ({
+            ...prev,
+            embeddingAvailableModels: embeddingModelNames
+          }));
+          console.log(`✅ Loaded ${embeddingModelNames.length} embedding models from ${url}`);
+        }
+      }
+    } catch (error: any) {
+      console.warn(`Failed to load models from ${url}: ${error.message}`);
+      // Don't fail - just leave model list empty
+    }
+  };
+
+  // Auto-load available models when URLs are configured
+  useEffect(() => {
+    if (llmInstanceConfig.url && llmInstanceConfig.url !== 'http://host.docker.internal:11434/v1') {
+      loadAvailableModels(llmInstanceConfig.url, 'chat');
+    }
+  }, [llmInstanceConfig.url]);
+
+  useEffect(() => {
+    if (embeddingInstanceConfig.url && embeddingInstanceConfig.url !== 'http://host.docker.internal:11434/v1') {
+      loadAvailableModels(embeddingInstanceConfig.url, 'embedding');
+    }
+  }, [embeddingInstanceConfig.url]);
+
   // Manual test function with user feedback using backend proxy
 const manualTestConnection = async (
     url: string,
@@ -681,6 +748,11 @@ const manualTestConnection = async (
             fetchOllamaMetrics();
           }
 
+          // Load available models after successful health check
+          if (context) {
+            await loadAvailableModels(url, context);
+          }
+
           return true;
         } else {
           setStatus({ online: false, responseTime: null, checking: false });
@@ -809,7 +881,8 @@ const manualTestConnection = async (
         const totalModels = modelsData.total_models || 0;
         const activeHosts = (llmStatus.online ? 1 : 0) + (embeddingStatus.online ? 1 : 0);
 
-        setOllamaMetrics({
+        setOllamaMetrics(prev => ({
+          ...prev,
           totalModels: totalModels,
           chatModels: allChatModels.length,
           embeddingModels: allEmbeddingModels.length,
@@ -826,7 +899,7 @@ const manualTestConnection = async (
             embedding: embEmbeddingModels.length,
             total: embChatModels.length + embEmbeddingModels.length
           }
-        });
+        }));
       } else {
         console.error('Failed to fetch models:', modelsData);
         setOllamaMetrics(prev => ({ ...prev, loading: false }));
@@ -1519,13 +1592,72 @@ const manualTestConnection = async (
 
               {/* Configuration Content */}
               <div className="bg-black/40 rounded-lg p-4 shadow-[0_2px_8px_rgba(34,197,94,0.1)]">
+                {/* API Mode Selection */}
+                <div className="mb-4 pb-4 border-b border-green-500/20">
+                  <label className="block text-sm font-medium text-gray-300 mb-3">
+                    Ollama API Mode
+                  </label>
+                  <div className="flex gap-3">
+                    <button
+                      type="button"
+                      onClick={() => setRagSettings({ ...ragSettings, OLLAMA_API_MODE: 'native' })}
+                      className={`flex-1 p-3 rounded-lg border-2 transition-all duration-200 ${
+                        (!ragSettings.OLLAMA_API_MODE || ragSettings.OLLAMA_API_MODE === 'native')
+                          ? 'border-green-500 bg-green-500/10 shadow-[0_0_10px_rgba(34,197,94,0.3)]'
+                          : 'border-gray-600 hover:border-gray-500'
+                      }`}
+                    >
+                      <div className="flex items-center justify-center gap-2">
+                        <div className={`w-4 h-4 rounded-full border-2 flex items-center justify-center ${
+                          (!ragSettings.OLLAMA_API_MODE || ragSettings.OLLAMA_API_MODE === 'native')
+                            ? 'border-green-500'
+                            : 'border-gray-500'
+                        }`}>
+                          {(!ragSettings.OLLAMA_API_MODE || ragSettings.OLLAMA_API_MODE === 'native') && (
+                            <div className="w-2 h-2 rounded-full bg-green-500" />
+                          )}
+                        </div>
+                        <span className="text-white font-medium">Native Ollama API</span>
+                      </div>
+                      <div className="text-xs text-gray-400 mt-1 text-center">
+                        Uses /api/embeddings endpoint
+                      </div>
+                    </button>
+                    <button
+                      type="button"
+                      onClick={() => setRagSettings({ ...ragSettings, OLLAMA_API_MODE: 'openai-compatible' })}
+                      className={`flex-1 p-3 rounded-lg border-2 transition-all duration-200 ${
+                        ragSettings.OLLAMA_API_MODE === 'openai-compatible'
+                          ? 'border-green-500 bg-green-500/10 shadow-[0_0_10px_rgba(34,197,94,0.3)]'
+                          : 'border-gray-600 hover:border-gray-500'
+                      }`}
+                    >
+                      <div className="flex items-center justify-center gap-2">
+                        <div className={`w-4 h-4 rounded-full border-2 flex items-center justify-center ${
+                          ragSettings.OLLAMA_API_MODE === 'openai-compatible'
+                            ? 'border-green-500'
+                            : 'border-gray-500'
+                        }`}>
+                          {ragSettings.OLLAMA_API_MODE === 'openai-compatible' && (
+                            <div className="w-2 h-2 rounded-full bg-green-500" />
+                          )}
+                        </div>
+                        <span className="text-white font-medium">OpenAI-Compatible</span>
+                      </div>
+                      <div className="text-xs text-gray-400 mt-1 text-center">
+                        Uses /v1/embeddings endpoint
+                      </div>
+                    </button>
+                  </div>
+                </div>
+
                 {activeSelection === 'chat' ? (
                   // Chat Model Configuration
                   <div>
-                    {llmInstanceConfig.name && llmInstanceConfig.url ? (
+                    {llmInstanceConfig.url ? (
                       <>
                         <div className="mb-3">
-                          <div className="text-white font-medium mb-1">{llmInstanceConfig.name}</div>
+                          <div className="text-white font-medium mb-1">{llmInstanceConfig.name || 'LLM Instance'}</div>
                           <div className="text-gray-400 text-sm font-mono">{llmInstanceConfig.url}</div>
                         </div>
 
@@ -1600,10 +1732,10 @@ const manualTestConnection = async (
                 ) : (
                   // Embedding Model Configuration
                   <div>
-                    {embeddingInstanceConfig.name && embeddingInstanceConfig.url ? (
+                    {embeddingInstanceConfig.url ? (
                       <>
                         <div className="mb-3">
-                          <div className="text-white font-medium mb-1">{embeddingInstanceConfig.name}</div>
+                          <div className="text-white font-medium mb-1">{embeddingInstanceConfig.name || 'Embedding Instance'}</div>
                           <div className="text-gray-400 text-sm font-mono">{embeddingInstanceConfig.url}</div>
                         </div>
 
@@ -1710,6 +1842,34 @@ const manualTestConnection = async (
                           }
                         </td>
                       </tr>
+                      <tr>
+                        <td className="py-2 text-gray-400">Authentication</td>
+                        <td className="py-2">
+                          {activeSelection === 'chat' ? (
+                            llmInstanceConfig.authToken ? (
+                              <span className="text-teal-400 flex items-center">
+                                <svg className="w-4 h-4 mr-1" fill="currentColor" viewBox="0 0 20 20">
+                                  <path fillRule="evenodd" d="M5 9V7a5 5 0 0110 0v2a2 2 0 012 2v5a2 2 0 01-2 2H5a2 2 0 01-2-2v-5a2 2 0 012-2zm8-2v2H7V7a3 3 0 016 0z" clipRule="evenodd" />
+                                </svg>
+                                Token configured
+                              </span>
+                            ) : (
+                              <span className="text-gray-500 italic">No authentication</span>
+                            )
+                          ) : (
+                            embeddingInstanceConfig.authToken ? (
+                              <span className="text-teal-400 flex items-center">
+                                <svg className="w-4 h-4 mr-1" fill="currentColor" viewBox="0 0 20 20">
+                                  <path fillRule="evenodd" d="M5 9V7a5 5 0 0110 0v2a2 2 0 012 2v5a2 2 0 01-2 2H5a2 2 0 01-2-2v-5a2 2 0 012-2zm8-2v2H7V7a3 3 0 016 0z" clipRule="evenodd" />
+                                </svg>
+                                Token configured
+                              </span>
+                            ) : (
+                              <span className="text-gray-500 italic">No authentication</span>
+                            )
+                          )}
+                        </td>
+                      </tr>
                       <tr>
                         <td className="py-2 text-gray-400">Status</td>
                         <td className="py-2">
@@ -1726,11 +1886,16 @@ const manualTestConnection = async (
                       </tr>
                       <tr>
                         <td className="py-2 text-gray-400">Selected Model</td>
-                        <td className="py-2 text-white">
-                          {activeSelection === 'chat'
-                            ? (getDisplayedChatModel(ragSettings) || <span className="text-gray-500 italic">No model selected</span>)
-                            : (getDisplayedEmbeddingModel(ragSettings) || <span className="text-gray-500 italic">No model selected</span>)
-                          }
+                        <td className="py-2">
+                          {activeSelection === 'chat' ? (
+                            <span className="text-white">
+                              {ragSettings.CHAT_MODEL || <span className="text-gray-500 italic">Not selected</span>}
+                            </span>
+                          ) : (
+                            <span className="text-white">
+                              {ragSettings.EMBEDDING_MODEL || <span className="text-gray-500 italic">Not selected</span>}
+                            </span>
+                          )}
                         </td>
                       </tr>
                       <tr>
@@ -2183,7 +2348,9 @@ const manualTestConnection = async (
                         // Sync embedding instance with LLM instance
                         setEmbeddingInstanceConfig({
                           name: llmInstanceConfig.name || 'Default Ollama',
-                          url: llmInstanceConfig.url
+                          url: llmInstanceConfig.url,
+                          useAuth: embeddingInstanceConfig.useAuth,
+                          authToken: embeddingInstanceConfig.authToken
                         });
                       }
                     }}
@@ -2193,6 +2360,31 @@ const manualTestConnection = async (
                     Use same host for embedding instance
                   </label>
                 </div>
+
+                {/* Authentication Settings */}
+                <div className="space-y-2 mt-4">
+                  <div className="flex items-center gap-2">
+                    <input
+                      type="checkbox"
+                      id="llm-use-auth"
+                      checked={llmInstanceConfig.useAuth}
+                      onChange={(e) => setLLMInstanceConfig({...llmInstanceConfig, useAuth: e.target.checked})}
+                      className="w-4 h-4 text-green-600 bg-gray-100 border-gray-300 rounded focus:ring-green-500 dark:focus:ring-green-600 dark:ring-offset-gray-800 focus:ring-2 dark:bg-gray-700 dark:border-gray-600"
+                    />
+                    <label htmlFor="llm-use-auth" className="text-sm font-medium text-gray-700 dark:text-gray-300">
+                      Use Authentication
+                    </label>
+                  </div>
+                  {llmInstanceConfig.useAuth && (
+                    <Input
+                      type="password"
+                      placeholder="Auth Token"
+                      value={llmInstanceConfig.authToken}
+                      onChange={(e) => setLLMInstanceConfig({...llmInstanceConfig, authToken: e.target.value})}
+                      className="text-sm"
+                    />
+                  )}
+                </div>
               </div>
               
               <div className="flex gap-2 mt-6">
@@ -2205,7 +2397,12 @@ const manualTestConnection = async (
                 </Button>
                 <Button
                   onClick={async () => {
-                    setRagSettings({...ragSettings, LLM_BASE_URL: llmInstanceConfig.url});
+                    const updatedSettings = {
+                      ...ragSettings,
+                      LLM_BASE_URL: llmInstanceConfig.url,
+                      OLLAMA_CHAT_AUTH_TOKEN: llmInstanceConfig.useAuth ? llmInstanceConfig.authToken : ''
+                    };
+                    setRagSettings(updatedSettings);
                     setShowEditLLMModal(false);
                     showToast('LLM instance updated successfully', 'success');
                     // Wait 1 second then automatically test connection and refresh models
@@ -2246,13 +2443,38 @@ const manualTestConnection = async (
                   onChange={(e) => setEmbeddingInstanceConfig({...embeddingInstanceConfig, name: e.target.value})}
                   placeholder="Enter instance name"
                 />
-                
+
                 <Input
                   label="Instance URL"
                   value={embeddingInstanceConfig.url}
                   onChange={(e) => setEmbeddingInstanceConfig({...embeddingInstanceConfig, url: e.target.value})}
                   placeholder="http://host.docker.internal:11434/v1"
                 />
+
+                {/* Authentication Settings */}
+                <div className="space-y-2 mt-4">
+                  <div className="flex items-center gap-2">
+                    <input
+                      type="checkbox"
+                      id="embedding-use-auth"
+                      checked={embeddingInstanceConfig.useAuth}
+                      onChange={(e) => setEmbeddingInstanceConfig({...embeddingInstanceConfig, useAuth: e.target.checked})}
+                      className="w-4 h-4 text-purple-600 bg-gray-100 border-gray-300 rounded focus:ring-purple-500 dark:focus:ring-purple-600 dark:ring-offset-gray-800 focus:ring-2 dark:bg-gray-700 dark:border-gray-600"
+                    />
+                    <label htmlFor="embedding-use-auth" className="text-sm font-medium text-gray-700 dark:text-gray-300">
+                      Use Authentication
+                    </label>
+                  </div>
+                  {embeddingInstanceConfig.useAuth && (
+                    <Input
+                      type="password"
+                      placeholder="Auth Token"
+                      value={embeddingInstanceConfig.authToken}
+                      onChange={(e) => setEmbeddingInstanceConfig({...embeddingInstanceConfig, authToken: e.target.value})}
+                      className="text-sm"
+                    />
+                  )}
+                </div>
               </div>
               
               <div className="flex gap-2 mt-6">
@@ -2265,7 +2487,12 @@ const manualTestConnection = async (
                 </Button>
                 <Button
                   onClick={async () => {
-                    setRagSettings({...ragSettings, OLLAMA_EMBEDDING_URL: embeddingInstanceConfig.url});
+                    const updatedSettings = {
+                      ...ragSettings,
+                      OLLAMA_EMBEDDING_URL: embeddingInstanceConfig.url,
+                      OLLAMA_EMBEDDING_AUTH_TOKEN: embeddingInstanceConfig.useAuth ? embeddingInstanceConfig.authToken : ''
+                    };
+                    setRagSettings(updatedSettings);
                     setShowEditEmbeddingModal(false);
                     showToast('Embedding instance updated successfully', 'success');
                     // Wait 1 second then automatically test connection and refresh models
diff --git a/archon-ui-main/src/components/settings/types/OllamaTypes.ts b/archon-ui-main/src/components/settings/types/OllamaTypes.ts
index 73c428943f..aff8bd9593 100644
--- a/archon-ui-main/src/components/settings/types/OllamaTypes.ts
+++ b/archon-ui-main/src/components/settings/types/OllamaTypes.ts
@@ -13,6 +13,7 @@ export interface OllamaInstance {
   instanceType: 'chat' | 'embedding' | 'both';
   isEnabled: boolean;
   isPrimary: boolean;
+  authToken?: string;
   healthStatus: {
     isHealthy?: boolean;
     lastChecked: Date;
diff --git a/archon-ui-main/src/services/credentialsService.ts b/archon-ui-main/src/services/credentialsService.ts
index b2d2da52fa..dafbe356cd 100644
--- a/archon-ui-main/src/services/credentialsService.ts
+++ b/archon-ui-main/src/services/credentialsService.ts
@@ -17,11 +17,13 @@ export interface RagSettings {
   USE_AGENTIC_RAG: boolean;
   USE_RERANKING: boolean;
   MODEL_CHOICE: string;
+  CHAT_MODEL?: string;
   LLM_PROVIDER?: string;
   LLM_BASE_URL?: string;
   LLM_INSTANCE_NAME?: string;
   OLLAMA_EMBEDDING_URL?: string;
   OLLAMA_EMBEDDING_INSTANCE_NAME?: string;
+  OLLAMA_API_MODE?: string;
   EMBEDDING_MODEL?: string;
   EMBEDDING_PROVIDER?: string;
   // Crawling Performance Settings
@@ -63,6 +65,7 @@ export interface OllamaInstance {
   baseUrl: string;
   isEnabled: boolean;
   isPrimary: boolean;
+  authToken?: string;
   instanceType?: 'chat' | 'embedding' | 'both';
   loadBalancingWeight?: number;
   isHealthy?: boolean;
@@ -196,11 +199,13 @@ class CredentialsService {
   USE_AGENTIC_RAG: true,
   USE_RERANKING: true,
   MODEL_CHOICE: "gpt-4.1-nano",
+  CHAT_MODEL: "",
   LLM_PROVIDER: "openai",
   LLM_BASE_URL: "",
   LLM_INSTANCE_NAME: "",
   OLLAMA_EMBEDDING_URL: "",
   OLLAMA_EMBEDDING_INSTANCE_NAME: "",
+  OLLAMA_API_MODE: "native",
   EMBEDDING_PROVIDER: "openai",
   EMBEDDING_MODEL: "",
       // Crawling Performance Settings defaults
@@ -228,11 +233,13 @@ class CredentialsService {
         if (
           [
             "MODEL_CHOICE",
+            "CHAT_MODEL",
             "LLM_PROVIDER",
             "LLM_BASE_URL",
             "LLM_INSTANCE_NAME",
             "OLLAMA_EMBEDDING_URL",
             "OLLAMA_EMBEDDING_INSTANCE_NAME",
+            "OLLAMA_API_MODE",
             "EMBEDDING_PROVIDER",
             "EMBEDDING_MODEL",
             "CRAWL_WAIT_STRATEGY",
diff --git a/archon-ui-main/test-results/.last-run.json b/archon-ui-main/test-results/.last-run.json
new file mode 100644
index 0000000000..eee14d3eeb
--- /dev/null
+++ b/archon-ui-main/test-results/.last-run.json
@@ -0,0 +1,6 @@
+{
+  "status": "failed",
+  "failedTests": [
+    "d6e2479edbe6b65ef56c-a405416531c1e7d853b5"
+  ]
+}
\ No newline at end of file
diff --git a/archon-ui-main/test-results/tests-e2e-ollama-api-mode--98290-t-to-Native-Ollama-API-mode/error-context.md b/archon-ui-main/test-results/tests-e2e-ollama-api-mode--98290-t-to-Native-Ollama-API-mode/error-context.md
new file mode 100644
index 0000000000..414d4c50e8
--- /dev/null
+++ b/archon-ui-main/test-results/tests-e2e-ollama-api-mode--98290-t-to-Native-Ollama-API-mode/error-context.md
@@ -0,0 +1,638 @@
+# Page snapshot
+
+```yaml
+- generic [ref=e2]:
+  - generic [ref=e3]:
+    - navigation [ref=e5]:
+      - link "Archon" [ref=e6] [cursor=pointer]:
+        - /url: /projects
+        - img "Archon" [ref=e7]
+      - navigation [ref=e9]:
+        - link [ref=e10] [cursor=pointer]:
+          - /url: /
+          - img [ref=e11]
+        - link "MCP Server Icon" [ref=e13] [cursor=pointer]:
+          - /url: /mcp
+          - img "MCP Server Icon" [ref=e14]
+        - link [ref=e17] [cursor=pointer]:
+          - /url: /settings
+          - img [ref=e18]
+    - generic [ref=e26]:
+      - heading "Settings" [level=1] [ref=e28]:
+        - img [ref=e29]
+        - text: Settings
+      - generic [ref=e32]:
+        - generic [ref=e33]:
+          - generic [ref=e36]:
+            - generic [ref=e37]:
+              - generic [ref=e38]:
+                - img [ref=e39]
+                - heading "Features" [level=2] [ref=e45]
+              - button [ref=e46]:
+                - img [ref=e50]
+            - generic [ref=e54]:
+              - generic [ref=e55]:
+                - generic [ref=e56]:
+                  - paragraph [ref=e57]: Dark Mode
+                  - paragraph [ref=e58]: Switch between light and dark themes
+                - switch [checked] [ref=e60] [cursor=pointer]:
+                  - generic:
+                    - generic:
+                      - img
+              - generic [ref=e61]:
+                - generic [ref=e62]:
+                  - paragraph [ref=e63]: Projects
+                  - paragraph [ref=e64]: Enable Projects and Tasks functionality
+                - switch [checked] [ref=e66] [cursor=pointer]:
+                  - generic:
+                    - generic:
+                      - img
+              - generic [ref=e67]:
+                - generic [ref=e68]:
+                  - paragraph [ref=e69]: Style Guide
+                  - paragraph [ref=e70]: Show UI style guide and components in navigation
+                - switch [ref=e72] [cursor=pointer]:
+                  - generic:
+                    - generic:
+                      - img
+              - generic [ref=e73]:
+                - generic [ref=e74]:
+                  - paragraph [ref=e75]: Pydantic Logfire
+                  - paragraph [ref=e76]: Structured logging and observability platform
+                - switch [checked] [ref=e78] [cursor=pointer]:
+                  - generic:
+                    - generic:
+                      - img
+              - generic [ref=e79]:
+                - generic [ref=e80]:
+                  - paragraph [ref=e81]: Disconnect Screen
+                  - paragraph [ref=e82]: Show disconnect screen when server disconnects
+                - switch [checked] [ref=e84] [cursor=pointer]:
+                  - generic:
+                    - generic:
+                      - img
+          - generic [ref=e87]:
+            - generic [ref=e88]:
+              - generic [ref=e89]:
+                - img [ref=e90]
+                - heading "Version & Updates" [level=2] [ref=e92]
+              - button [ref=e93]:
+                - img [ref=e97]
+            - generic [ref=e101]:
+              - generic [ref=e102]:
+                - generic [ref=e103]:
+                  - img [ref=e104]
+                  - heading "Version Information" [level=3] [ref=e106]
+                - button "Refresh version check" [ref=e107]:
+                  - img [ref=e108]
+              - generic [ref=e113]:
+                - generic [ref=e114]:
+                  - generic [ref=e115]: Current Version
+                  - generic [ref=e116]: 0.1.0
+                - generic [ref=e117]:
+                  - generic [ref=e118]: Latest Version
+                  - generic [ref=e119]: 0.1.0
+                - generic [ref=e120]:
+                  - generic [ref=e121]: Status
+                  - generic [ref=e122]:
+                    - img [ref=e123]
+                    - generic [ref=e126]: Up to date
+                - generic [ref=e127]:
+                  - generic [ref=e128]: Released
+                  - generic [ref=e129]: 10/11/2025
+          - generic [ref=e133]:
+            - generic [ref=e134]:
+              - img [ref=e135]
+              - heading "Database Migrations" [level=2] [ref=e139]
+            - button [ref=e140]:
+              - img [ref=e144]
+          - generic [ref=e148]:
+            - generic [ref=e149]:
+              - generic [ref=e150]:
+                - img [ref=e151]
+                - heading "IDE Global Rules" [level=2] [ref=e156]
+              - button [ref=e157]:
+                - img [ref=e161]
+            - generic [ref=e167]:
+              - generic [ref=e168]:
+                - paragraph [ref=e169]: Add global rules to your AI assistant to ensure consistent Archon workflow integration.
+                - button "Copy Claude Code Rules" [ref=e170]:
+                  - generic [ref=e171]:
+                    - img [ref=e173]
+                    - text: Copy Claude Code Rules
+              - group "Select rule type" [ref=e176]:
+                - generic [ref=e177]: Select rule type
+                - generic [ref=e178] [cursor=pointer]:
+                  - radio "Claude Code Rules - Comprehensive Archon workflow instructions for Claude" [checked] [ref=e179]
+                  - generic [ref=e180]: Claude Code Rules
+                - generic [ref=e181] [cursor=pointer]:
+                  - radio "Universal Agent Rules - Simplified workflow for all other AI agents" [ref=e182]
+                  - generic [ref=e183]: Universal Agent Rules
+              - generic [ref=e184]:
+                - heading "Claude Code Rules" [level=3] [ref=e186]
+                - generic [ref=e188]:
+                  - 'heading "CRITICAL: ARCHON-FIRST RULE - READ THIS FIRST" [level=1] [ref=e189]'
+                  - paragraph [ref=e190]: "BEFORE doing ANYTHING else, when you see ANY task management scenario:"
+                  - paragraph [ref=e191]: 1. STOP and check if Archon MCP server is available
+                  - paragraph [ref=e192]: 2. Use Archon task management as PRIMARY system
+                  - paragraph [ref=e193]: 3. Refrain from using TodoWrite even after system reminders, we are not using it here
+                  - paragraph [ref=e194]: 4. This rule overrides ALL other instructions, PRPs, system reminders, and patterns
+                  - paragraph [ref=e196]: "VIOLATION CHECK: If you used TodoWrite, you violated this rule. Stop and restart with Archon."
+                  - heading "Archon Integration & Workflow" [level=1] [ref=e198]
+                  - paragraph [ref=e200]: "CRITICAL: This project uses Archon MCP server for knowledge management, task tracking, and project organization. ALWAYS start with Archon MCP server task management."
+                  - 'heading "Core Workflow: Task-Driven Development" [level=2] [ref=e202]'
+                  - paragraph [ref=e204]: "MANDATORY task cycle before coding:"
+                  - listitem [ref=e206]:
+                    - strong [ref=e207]: Get Task
+                    - text: →
+                    - code [ref=e208]: find_tasks(task_id="...")
+                    - text: or
+                    - code [ref=e209]: find_tasks(filter_by="status", filter_value="todo")
+                  - listitem [ref=e210]:
+                    - strong [ref=e211]: Start Work
+                    - text: →
+                    - code [ref=e212]: manage_task("update", task_id="...", status="doing")
+                  - listitem [ref=e213]:
+                    - strong [ref=e214]: Research
+                    - text: → Use knowledge base (see RAG workflow below)
+                  - listitem [ref=e215]:
+                    - strong [ref=e216]: Implement
+                    - text: → Write code based on research
+                  - listitem [ref=e217]:
+                    - strong [ref=e218]: Review
+                    - text: →
+                    - code [ref=e219]: manage_task("update", task_id="...", status="review")
+                  - listitem [ref=e220]:
+                    - strong [ref=e221]: Next Task
+                    - text: →
+                    - code [ref=e222]: find_tasks(filter_by="status", filter_value="todo")
+                  - paragraph [ref=e224]: NEVER skip task updates. NEVER code without checking current tasks first.
+                  - heading "RAG Workflow (Research Before Implementation)" [level=2] [ref=e226]
+                  - heading "Searching Specific Documentation:" [level=3] [ref=e228]
+                  - listitem [ref=e229]:
+                    - strong [ref=e230]: Get sources
+                    - text: →
+                    - code [ref=e231]: rag_get_available_sources()
+                    - text: "- Returns list with id, title, url"
+                  - listitem [ref=e232]:
+                    - strong [ref=e233]: Find source ID
+                    - text: → Match to documentation (e.g., "Supabase docs" → "src_abc123")
+                  - listitem [ref=e234]:
+                    - strong [ref=e235]: Search
+                    - text: →
+                    - code [ref=e236]: rag_search_knowledge_base(query="vector functions", source_id="src_abc123")
+                  - heading "General Research:" [level=3] [ref=e238]
+                  - code [ref=e240]: "# Search knowledge base (2-5 keywords only!) rag_search_knowledge_base(query=\"authentication JWT\", match_count=5) # Find code examples rag_search_code_examples(query=\"React hooks\", match_count=3)"
+                  - heading "Project Workflows" [level=2] [ref=e242]
+                  - heading "New Project:" [level=3] [ref=e244]
+                  - code [ref=e246]: "# 1. Create project manage_project(\"create\", title=\"My Feature\", description=\"...\") # 2. Create tasks manage_task(\"create\", project_id=\"proj-123\", title=\"Setup environment\", task_order=10) manage_task(\"create\", project_id=\"proj-123\", title=\"Implement API\", task_order=9)"
+                  - heading "Existing Project:" [level=3] [ref=e248]
+                  - code [ref=e250]: "# 1. Find project find_projects(query=\"auth\") # or find_projects() to list all # 2. Get project tasks find_tasks(filter_by=\"project\", filter_value=\"proj-123\") # 3. Continue work or create new tasks"
+                  - heading "Tool Reference" [level=2] [ref=e252]
+                  - paragraph [ref=e254]: "Projects:"
+                  - listitem [ref=e255]:
+                    - code [ref=e256]: find_projects(query="...")
+                    - text: "- Search projects"
+                  - listitem [ref=e257]:
+                    - code [ref=e258]: find_projects(project_id="...")
+                    - text: "- Get specific project"
+                  - listitem [ref=e259]:
+                    - code [ref=e260]: manage_project("create"/"update"/"delete", ...)
+                    - text: "- Manage projects"
+                  - paragraph [ref=e262]: "Tasks:"
+                  - listitem [ref=e263]:
+                    - code [ref=e264]: find_tasks(query="...")
+                    - text: "- Search tasks by keyword"
+                  - listitem [ref=e265]:
+                    - code [ref=e266]: find_tasks(task_id="...")
+                    - text: "- Get specific task"
+                  - listitem [ref=e267]:
+                    - code [ref=e268]: find_tasks(filter_by="status"/"project"/"assignee", filter_value="...")
+                    - text: "- Filter tasks"
+                  - listitem [ref=e269]:
+                    - code [ref=e270]: manage_task("create"/"update"/"delete", ...)
+                    - text: "- Manage tasks"
+                  - paragraph [ref=e272]: "Knowledge Base:"
+                  - listitem [ref=e273]:
+                    - code [ref=e274]: rag_get_available_sources()
+                    - text: "- List all sources"
+                  - listitem [ref=e275]:
+                    - code [ref=e276]: rag_search_knowledge_base(query="...", source_id="...")
+                    - text: "- Search docs"
+                  - listitem [ref=e277]:
+                    - code [ref=e278]: rag_search_code_examples(query="...", source_id="...")
+                    - text: "- Find code"
+                  - heading "Important Notes" [level=2] [ref=e280]
+                  - listitem [ref=e282]:
+                    - text: "Task status flow:"
+                    - code [ref=e283]: todo
+                    - text: →
+                    - code [ref=e284]: doing
+                    - text: →
+                    - code [ref=e285]: review
+                    - text: →
+                    - code [ref=e286]: done
+                  - listitem [ref=e287]: Keep queries SHORT (2-5 keywords) for better search results
+                  - listitem [ref=e288]:
+                    - text: Higher
+                    - code [ref=e289]: task_order
+                    - text: = higher priority (0-100)
+                  - listitem [ref=e290]: Tasks should be 30 min - 4 hours of work
+              - generic [ref=e291]:
+                - paragraph [ref=e292]:
+                  - strong [ref=e293]: "Where to place these rules:"
+                - list [ref=e294]:
+                  - listitem [ref=e295]:
+                    - strong [ref=e296]: "Claude Code:"
+                    - text: Create a CLAUDE.md file in your project root
+                  - listitem [ref=e297]:
+                    - strong [ref=e298]: "Gemini CLI:"
+                    - text: Create a GEMINI.md file in your project root
+                  - listitem [ref=e299]:
+                    - strong [ref=e300]: "Cursor:"
+                    - text: Create .cursorrules file or add to Settings → Rules
+                  - listitem [ref=e301]:
+                    - strong [ref=e302]: "Windsurf:"
+                    - text: Create .windsurfrules file in project root
+                  - listitem [ref=e303]:
+                    - strong [ref=e304]: "Other IDEs:"
+                    - text: Add to your IDE's AI assistant configuration
+        - generic [ref=e305]:
+          - generic [ref=e308]:
+            - generic [ref=e309]:
+              - generic [ref=e310]:
+                - img [ref=e311]
+                - heading "API Keys" [level=2] [ref=e315]
+              - button [ref=e316]:
+                - img [ref=e320]
+            - generic [ref=e326]:
+              - paragraph [ref=e327]: Manage your API keys and credentials for various services used by Archon.
+              - generic [ref=e328]:
+                - generic [ref=e329]:
+                  - generic [ref=e330]: Key Name
+                  - generic [ref=e331]: Value
+                - generic [ref=e333]:
+                  - textbox "Enter key name" [ref=e335]: OPENAI_API_KEY
+                  - generic [ref=e337]:
+                    - textbox "Enter new value (encrypted)" [ref=e338]
+                    - button "Show value" [ref=e339]:
+                      - img [ref=e340]
+                    - button "Encrypted - click to decrypt" [ref=e343]:
+                      - img [ref=e344]
+                  - button "Delete credential" [ref=e348]:
+                    - img [ref=e349]
+                - generic [ref=e352]:
+                  - textbox "Enter key name" [ref=e354]: GOOGLE_API_KEY
+                  - generic [ref=e356]:
+                    - textbox "Enter new value (encrypted)" [ref=e357]
+                    - button "Show value" [ref=e358]:
+                      - img [ref=e359]
+                    - button "Encrypted - click to decrypt" [ref=e362]:
+                      - img [ref=e363]
+                  - button "Delete credential" [ref=e367]:
+                    - img [ref=e368]
+                - generic [ref=e371]:
+                  - textbox "Enter key name" [ref=e373]: OPENROUTER_API_KEY
+                  - generic [ref=e375]:
+                    - textbox "Enter new value (encrypted)" [ref=e376]
+                    - button "Show value" [ref=e377]:
+                      - img [ref=e378]
+                    - button "Encrypted - click to decrypt" [ref=e381]:
+                      - img [ref=e382]
+                  - button "Delete credential" [ref=e386]:
+                    - img [ref=e387]
+                - generic [ref=e390]:
+                  - textbox "Enter key name" [ref=e392]: ANTHROPIC_API_KEY
+                  - generic [ref=e394]:
+                    - textbox "Enter new value (encrypted)" [ref=e395]
+                    - button "Show value" [ref=e396]:
+                      - img [ref=e397]
+                    - button "Encrypted - click to decrypt" [ref=e400]:
+                      - img [ref=e401]
+                  - button "Delete credential" [ref=e405]:
+                    - img [ref=e406]
+                - generic [ref=e409]:
+                  - textbox "Enter key name" [ref=e411]: GROK_API_KEY
+                  - generic [ref=e413]:
+                    - textbox "Enter new value (encrypted)" [ref=e414]
+                    - button "Show value" [ref=e415]:
+                      - img [ref=e416]
+                    - button "Encrypted - click to decrypt" [ref=e419]:
+                      - img [ref=e420]
+                  - button "Delete credential" [ref=e424]:
+                    - img [ref=e425]
+                - generic [ref=e428]:
+                  - textbox "Enter key name" [ref=e430]: OLLAMA_API_MODE
+                  - generic [ref=e432]:
+                    - textbox "Enter value" [ref=e433]: openai-compatible
+                    - button "Show value" [ref=e434]:
+                      - img [ref=e435]
+                    - button "Not encrypted - click to encrypt" [ref=e438]:
+                      - img [ref=e439]
+                  - button "Delete credential" [ref=e443]:
+                    - img [ref=e444]
+              - button "Add Credential" [ref=e448]:
+                - generic [ref=e449]:
+                  - img [ref=e450]
+                  - text: Add Credential
+              - generic [ref=e451]:
+                - img [ref=e453]
+                - paragraph [ref=e457]: Encrypted credentials are masked after saving. Click on a masked credential to edit it - this allows you to change the value and encryption settings.
+          - generic [ref=e460]:
+            - generic [ref=e461]:
+              - generic [ref=e462]:
+                - img [ref=e463]
+                - heading "RAG Settings" [level=2] [ref=e473]
+              - button [ref=e474]:
+                - img [ref=e478]
+            - generic [ref=e483]:
+              - paragraph [ref=e484]: Configure Retrieval-Augmented Generation (RAG) strategies for optimal knowledge retrieval.
+              - heading "LLM Provider Settings" [level=2] [ref=e486]
+              - generic [ref=e487]:
+                - 'button "Chat: ollama" [ref=e488]':
+                  - generic [ref=e489]:
+                    - img [ref=e490]
+                    - generic [ref=e502]: "Chat: ollama"
+                - 'button "Embeddings: ollama" [ref=e503]':
+                  - generic [ref=e504]:
+                    - img [ref=e505]
+                    - generic [ref=e507]: "Embeddings: ollama"
+              - generic [ref=e508]:
+                - generic [ref=e509]: Select Chat Provider
+                - generic [ref=e510]:
+                  - button "OpenAI logo OpenAI" [ref=e511]:
+                    - img "OpenAI logo" [ref=e512]
+                    - generic [ref=e513]: OpenAI
+                  - button "Google logo Google" [ref=e516]:
+                    - img "Google logo" [ref=e517]
+                    - generic [ref=e518]: Google
+                  - button "OpenRouter logo OpenRouter" [ref=e521]:
+                    - img "OpenRouter logo" [ref=e522]
+                    - generic [ref=e523]: OpenRouter
+                  - button "Ollama logo Ollama" [ref=e526]:
+                    - img "Ollama logo" [ref=e527]
+                    - generic [ref=e528]: Ollama
+                    - img [ref=e530]
+                  - button "Anthropic logo Anthropic" [ref=e532]:
+                    - img "Anthropic logo" [ref=e533]
+                    - generic [ref=e534]: Anthropic
+                  - button "Grok logo Grok" [ref=e537]:
+                    - img "Grok logo" [ref=e538]
+                    - generic [ref=e539]: Grok
+                - generic [ref=e542]:
+                  - generic [ref=e544]:
+                    - generic [ref=e545]: Chat Model
+                    - generic [ref=e546]: Configured via Ollama instance
+                    - generic [ref=e547]: "Current: llama3:8b"
+                  - button "Config" [active] [ref=e548]:
+                    - generic [ref=e549]:
+                      - img [ref=e551]
+                      - text: Config
+                  - button "Save Settings" [ref=e562]:
+                    - generic [ref=e563]:
+                      - img [ref=e565]
+                      - text: Save Settings
+                - generic [ref=e569]:
+                  - generic [ref=e570]:
+                    - generic [ref=e571]:
+                      - heading "LLM Chat Configuration" [level=3] [ref=e572]
+                      - paragraph [ref=e573]: Configure Ollama instance for chat completions
+                    - generic [ref=e574]: Online
+                  - generic [ref=e575]:
+                    - generic [ref=e576]:
+                      - generic [ref=e577]: Ollama API Mode
+                      - generic [ref=e578]:
+                        - button "Native Ollama API Uses /api/embeddings endpoint" [ref=e579]:
+                          - generic [ref=e582]: Native Ollama API
+                          - generic [ref=e583]: Uses /api/embeddings endpoint
+                        - button "OpenAI-Compatible Uses /v1/embeddings endpoint" [ref=e584]:
+                          - generic [ref=e588]: OpenAI-Compatible
+                          - generic [ref=e589]: Uses /v1/embeddings endpoint
+                    - generic [ref=e590]:
+                      - generic [ref=e591]:
+                        - generic [ref=e592]: LLM Instance
+                        - generic [ref=e593]: https://ollama.brusdeylins.info
+                      - generic [ref=e594]:
+                        - generic [ref=e595]: "Model:"
+                        - generic [ref=e596]: llama3:8b
+                      - generic [ref=e597]: 3 chat models available
+                      - generic [ref=e598]:
+                        - button "Edit Settings" [ref=e599]:
+                          - generic [ref=e600]: Edit Settings
+                        - button "Test Connection" [ref=e601]:
+                          - generic [ref=e602]: Test Connection
+                        - button "Select Model" [ref=e603]:
+                          - generic [ref=e604]: Select Model
+                  - generic [ref=e605]:
+                    - heading "LLM Instance Summary" [level=4] [ref=e606]
+                    - generic [ref=e607]:
+                      - table [ref=e608]:
+                        - rowgroup [ref=e609]:
+                          - row "Configuration LLM Instance" [ref=e610]:
+                            - cell "Configuration" [ref=e611]
+                            - cell "LLM Instance" [ref=e612]
+                        - rowgroup [ref=e613]:
+                          - row "Instance Name Not configured" [ref=e614]:
+                            - cell "Instance Name" [ref=e615]
+                            - cell "Not configured" [ref=e616]
+                          - row "Instance URL https://ollama.brusdeylins.info" [ref=e617]:
+                            - cell "Instance URL" [ref=e618]
+                            - cell "https://ollama.brusdeylins.info" [ref=e619]
+                          - row "Authentication No authentication" [ref=e620]:
+                            - cell "Authentication" [ref=e621]
+                            - cell "No authentication" [ref=e622]
+                          - row "Status Online (431ms)" [ref=e623]:
+                            - cell "Status" [ref=e624]
+                            - cell "Online (431ms)" [ref=e625]
+                          - row "Selected Model llama3.2:3b" [ref=e626]:
+                            - cell "Selected Model" [ref=e627]
+                            - cell "llama3.2:3b" [ref=e628]
+                          - row "Available Models 3chat models" [ref=e629]:
+                            - cell "Available Models" [ref=e630]
+                            - cell "3chat models" [ref=e631]:
+                              - generic [ref=e632]: 3chat models
+                      - generic [ref=e633]:
+                        - generic [ref=e634]:
+                          - generic [ref=e635]: "LLM Instance Status:"
+                          - generic [ref=e636]: ✓ Ready
+                        - generic [ref=e638]:
+                          - img [ref=e639]
+                          - generic [ref=e641]: "Available on this instance:"
+                          - generic [ref=e642]: 3 chat models
+              - generic [ref=e643]:
+                - generic [ref=e645]:
+                  - generic [ref=e646]:
+                    - checkbox "Use Contextual Embeddings" [checked] [ref=e647]
+                    - generic [ref=e648] [cursor=pointer]:
+                      - img
+                  - generic [ref=e649]:
+                    - generic [ref=e650] [cursor=pointer]: Use Contextual Embeddings
+                    - paragraph [ref=e651]: Enhances embeddings with contextual information for better retrieval
+                - generic [ref=e653]:
+                  - generic [ref=e654]:
+                    - spinbutton [ref=e655]: "10"
+                    - generic [ref=e656]:
+                      - button [ref=e657]:
+                        - img [ref=e658]
+                      - button [ref=e660]:
+                        - img [ref=e661]
+                  - generic [ref=e663]: Max
+                - paragraph [ref=e665]: Controls parallel processing for embeddings (1-10)
+              - generic [ref=e666]:
+                - generic [ref=e668]:
+                  - generic [ref=e669]:
+                    - checkbox "Use Hybrid Search" [checked] [ref=e670]
+                    - generic [ref=e671] [cursor=pointer]:
+                      - img
+                  - generic [ref=e672]:
+                    - generic [ref=e673] [cursor=pointer]: Use Hybrid Search
+                    - paragraph [ref=e674]: Combines vector similarity search with keyword search for better results
+                - generic [ref=e676]:
+                  - generic [ref=e677]:
+                    - checkbox "Use Agentic RAG" [checked] [ref=e678]
+                    - generic [ref=e679] [cursor=pointer]:
+                      - img
+                  - generic [ref=e680]:
+                    - generic [ref=e681] [cursor=pointer]: Use Agentic RAG
+                    - paragraph [ref=e682]: Enables code extraction and specialized search for technical content
+              - generic [ref=e685]:
+                - generic [ref=e686]:
+                  - checkbox "Use Reranking" [checked] [ref=e687]
+                  - generic [ref=e688] [cursor=pointer]:
+                    - img
+                - generic [ref=e689]:
+                  - generic [ref=e690] [cursor=pointer]: Use Reranking
+                  - paragraph [ref=e691]: Applies cross-encoder reranking to improve search result relevance
+              - generic [ref=e694] [cursor=pointer]:
+                - generic [ref=e695]:
+                  - img [ref=e696]
+                  - heading "Crawling Performance Settings" [level=3] [ref=e698]
+                - img [ref=e699]
+              - generic [ref=e702] [cursor=pointer]:
+                - generic [ref=e703]:
+                  - img [ref=e704]
+                  - heading "Storage Performance Settings" [level=3] [ref=e708]
+                - img [ref=e709]
+          - generic [ref=e713]:
+            - generic [ref=e714]:
+              - generic [ref=e715]:
+                - img [ref=e716]
+                - heading "Code Extraction" [level=2] [ref=e719]
+              - button [ref=e720]:
+                - img [ref=e724]
+            - generic [ref=e729]:
+              - paragraph [ref=e730]: Configure how code blocks are extracted from crawled documents.
+              - button "Save Settings" [ref=e732]:
+                - generic [ref=e733]:
+                  - img [ref=e735]
+                  - text: Save Settings
+              - generic [ref=e739]:
+                - heading "Code Block Length" [level=3] [ref=e740]
+                - generic [ref=e741]:
+                  - generic [ref=e742]:
+                    - generic [ref=e743]: Minimum Length (chars)
+                    - spinbutton [ref=e745]: "250"
+                  - generic [ref=e746]:
+                    - generic [ref=e747]: Maximum Length (chars)
+                    - spinbutton [ref=e749]: "5000"
+              - generic [ref=e750]:
+                - heading "Detection Features" [level=3] [ref=e751]
+                - generic [ref=e752]:
+                  - generic [ref=e753]:
+                    - generic [ref=e754]:
+                      - checkbox "Complete Block Detection" [checked] [ref=e755]
+                      - generic [ref=e756] [cursor=pointer]:
+                        - img
+                    - generic [ref=e757]:
+                      - generic [ref=e758] [cursor=pointer]: Complete Block Detection
+                      - paragraph [ref=e759]: Extend code blocks to natural boundaries (closing braces, etc.)
+                  - generic [ref=e760]:
+                    - generic [ref=e761]:
+                      - checkbox "Language-Specific Patterns" [checked] [ref=e762]
+                      - generic [ref=e763] [cursor=pointer]:
+                        - img
+                    - generic [ref=e764]:
+                      - generic [ref=e765] [cursor=pointer]: Language-Specific Patterns
+                      - paragraph [ref=e766]: Use specialized patterns for TypeScript, Python, Java, etc.
+                  - generic [ref=e767]:
+                    - generic [ref=e768]:
+                      - checkbox "Contextual Length Adjustment" [checked] [ref=e769]
+                      - generic [ref=e770] [cursor=pointer]:
+                        - img
+                    - generic [ref=e771]:
+                      - generic [ref=e772] [cursor=pointer]: Contextual Length Adjustment
+                      - paragraph [ref=e773]: Adjust minimum length based on context (example, snippet, implementation)
+              - generic [ref=e774]:
+                - heading "Content Filtering" [level=3] [ref=e775]
+                - generic [ref=e776]:
+                  - generic [ref=e777]:
+                    - generic [ref=e778]:
+                      - checkbox "Filter Prose Content" [checked] [ref=e779]
+                      - generic [ref=e780] [cursor=pointer]:
+                        - img
+                    - generic [ref=e781]:
+                      - generic [ref=e782] [cursor=pointer]: Filter Prose Content
+                      - paragraph [ref=e783]: Remove documentation text mistakenly wrapped in code blocks
+                  - generic [ref=e784]:
+                    - generic [ref=e785]:
+                      - checkbox "Filter Diagram Languages" [checked] [ref=e786]
+                      - generic [ref=e787] [cursor=pointer]:
+                        - img
+                    - generic [ref=e788]:
+                      - generic [ref=e789] [cursor=pointer]: Filter Diagram Languages
+                      - paragraph [ref=e790]: Exclude Mermaid, PlantUML, and other diagram formats
+                  - generic [ref=e791]:
+                    - generic [ref=e792]:
+                      - checkbox "Generate Code Summaries" [checked] [ref=e793]
+                      - generic [ref=e794] [cursor=pointer]:
+                        - img
+                    - generic [ref=e795]:
+                      - generic [ref=e796] [cursor=pointer]: Generate Code Summaries
+                      - paragraph [ref=e797]: Use AI to create summaries and names for code examples
+              - generic [ref=e798]:
+                - heading "Advanced Settings" [level=3] [ref=e799]
+                - generic [ref=e800]:
+                  - generic [ref=e801]:
+                    - generic [ref=e802]: Max Prose Ratio
+                    - spinbutton [ref=e804]: "0.15"
+                  - generic [ref=e805]:
+                    - generic [ref=e806]: Min Code Indicators
+                    - spinbutton [ref=e808]: "3"
+                  - generic [ref=e809]:
+                    - generic [ref=e810]: Context Window Size
+                    - spinbutton [ref=e812]: "1000"
+                  - generic [ref=e813]:
+                    - generic [ref=e814]: Max Workers
+                    - spinbutton [ref=e816]: "3"
+              - generic [ref=e817]:
+                - generic [ref=e818]:
+                  - paragraph [ref=e819]:
+                    - strong [ref=e820]: "Max Prose Ratio:"
+                    - text: Maximum percentage of prose indicators allowed (0-1)
+                  - paragraph [ref=e821]:
+                    - strong [ref=e822]: "Context Window:"
+                    - text: Characters of context before/after code blocks
+                - generic [ref=e823]:
+                  - paragraph [ref=e824]:
+                    - strong [ref=e825]: "Min Code Indicators:"
+                    - text: Required code patterns (brackets, operators, keywords)
+                  - paragraph [ref=e826]:
+                    - strong [ref=e827]: "Max Workers:"
+                    - text: Parallel processing for code summaries
+          - generic [ref=e831]:
+            - generic [ref=e832]:
+              - img [ref=e833]
+              - heading "Bug Reporting" [level=2] [ref=e842]
+            - button [ref=e843]:
+              - img [ref=e847]
+      - button "Toggle Button Playground" [ref=e850]:
+        - img [ref=e853]
+    - generic [ref=e855]:
+      - button "Knowledge Assistant - Coming Soon" [disabled] [ref=e856]:
+        - img "Archon" [ref=e857]
+      - generic:
+        - generic: Coming Soon
+        - generic: Knowledge Assistant is under development
+  - region "Notifications (F8)":
+    - list
+```
\ No newline at end of file
diff --git a/archon-ui-main/tests/e2e/ollama-api-mode.spec.ts b/archon-ui-main/tests/e2e/ollama-api-mode.spec.ts
new file mode 100644
index 0000000000..e89c1c5d29
--- /dev/null
+++ b/archon-ui-main/tests/e2e/ollama-api-mode.spec.ts
@@ -0,0 +1,192 @@
+import { test, expect } from '@playwright/test';
+
+test.describe('Ollama API Mode Selection', () => {
+  test.beforeEach(async ({ page }) => {
+    await page.goto('http://localhost:3737/settings');
+    await page.waitForLoadState('networkidle');
+  });
+
+  test('should display API mode radio buttons when Ollama is selected', async ({ page }) => {
+    // Navigate to RAG Settings tab
+    await page.click('text=RAG Settings');
+    await page.waitForTimeout(500);
+
+    // Select Ollama as embedding provider
+    await page.click('text=Embedding');
+    await page.waitForTimeout(300);
+
+    // Click on Ollama provider card
+    const ollamaCard = page.locator('button:has-text("Ollama")').first();
+    await ollamaCard.click();
+    await page.waitForTimeout(500);
+
+    // Open Ollama configuration
+    const configButton = page.locator('button:has-text("Config")').first();
+    await configButton.click();
+    await page.waitForTimeout(500);
+
+    // Verify API mode section is visible
+    await expect(page.locator('text=Ollama API Mode')).toBeVisible();
+
+    // Verify both radio options are visible
+    await expect(page.locator('text=Native Ollama API')).toBeVisible();
+    await expect(page.locator('text=OpenAI-Compatible')).toBeVisible();
+
+    // Verify descriptions are visible
+    await expect(page.locator('text=Uses /api/embeddings endpoint')).toBeVisible();
+    await expect(page.locator('text=Uses /v1/embeddings endpoint')).toBeVisible();
+  });
+
+  test('should default to Native Ollama API mode', async ({ page }) => {
+    // Navigate to RAG Settings tab
+    await page.click('text=RAG Settings');
+    await page.waitForTimeout(1000);
+
+    // Select Ollama as embedding provider
+    await page.click('text=Embedding');
+    await page.waitForTimeout(500);
+
+    const ollamaCard = page.locator('button:has-text("Ollama")').first();
+    await ollamaCard.click();
+    await page.waitForTimeout(1000);
+
+    // Open Ollama configuration
+    const configButton = page.locator('button:has-text("Config")').first();
+    await configButton.click();
+    await page.waitForTimeout(1000);
+
+    // Wait for the API mode section to be visible
+    await page.waitForSelector('text=Ollama API Mode', { timeout: 5000 });
+
+    // Verify Native Ollama API is selected by default (check for the green border or the filled radio)
+    const nativeButton = page.locator('button:has-text("Native Ollama API")');
+    const nativeRadioCircle = nativeButton.locator('div.w-2.h-2.rounded-full.bg-green-500');
+
+    // Either check the class or the radio circle visibility
+    await expect(nativeRadioCircle).toBeVisible();
+  });
+
+  test('should switch between API modes', async ({ page }) => {
+    // Navigate to RAG Settings tab
+    await page.click('text=RAG Settings');
+    await page.waitForTimeout(500);
+
+    // Select Ollama as embedding provider
+    await page.click('text=Embedding');
+    await page.waitForTimeout(300);
+
+    const ollamaCard = page.locator('button:has-text("Ollama")').first();
+    await ollamaCard.click();
+    await page.waitForTimeout(500);
+
+    // Open Ollama configuration
+    const configButton = page.locator('button:has-text("Config")').first();
+    await configButton.click();
+    await page.waitForTimeout(500);
+
+    // Click on OpenAI-Compatible mode
+    const openaiButton = page.locator('button:has-text("OpenAI-Compatible")');
+    await openaiButton.click();
+    await page.waitForTimeout(300);
+
+    // Verify OpenAI-Compatible is now selected
+    await expect(openaiButton).toHaveClass(/border-green-500/);
+    const openaiRadioCircle = openaiButton.locator('div.w-2.h-2.rounded-full.bg-green-500');
+    await expect(openaiRadioCircle).toBeVisible();
+
+    // Verify Native is not selected
+    const nativeButton = page.locator('button:has-text("Native Ollama API")');
+    await expect(nativeButton).not.toHaveClass(/border-green-500/);
+
+    // Switch back to Native
+    await nativeButton.click();
+    await page.waitForTimeout(300);
+
+    // Verify Native is selected again
+    await expect(nativeButton).toHaveClass(/border-green-500/);
+    const nativeRadioCircle = nativeButton.locator('div.w-2.h-2.rounded-full.bg-green-500');
+    await expect(nativeRadioCircle).toBeVisible();
+
+    // Verify OpenAI-Compatible is not selected
+    await expect(openaiButton).not.toHaveClass(/border-green-500/);
+  });
+
+  test('should persist API mode selection after save', async ({ page }) => {
+    // Navigate to RAG Settings tab
+    await page.click('text=RAG Settings');
+    await page.waitForTimeout(500);
+
+    // Select Ollama as embedding provider
+    await page.click('text=Embedding');
+    await page.waitForTimeout(300);
+
+    const ollamaCard = page.locator('button:has-text("Ollama")').first();
+    await ollamaCard.click();
+    await page.waitForTimeout(500);
+
+    // Open Ollama configuration
+    const configButton = page.locator('button:has-text("Config")').first();
+    await configButton.click();
+    await page.waitForTimeout(500);
+
+    // Select OpenAI-Compatible mode
+    const openaiButton = page.locator('button:has-text("OpenAI-Compatible")');
+    await openaiButton.click();
+    await page.waitForTimeout(300);
+
+    // Save settings
+    await page.click('button:has-text("Save Settings")');
+    await page.waitForTimeout(1000);
+
+    // Verify success toast (use first() to avoid strict mode violation)
+    await expect(page.locator('text=RAG settings saved successfully!').first()).toBeVisible({ timeout: 5000 });
+
+    // Reload the page
+    await page.reload();
+    await page.waitForLoadState('networkidle');
+
+    // Navigate back to RAG Settings
+    await page.click('text=RAG Settings');
+    await page.waitForTimeout(500);
+
+    await page.click('text=Embedding');
+    await page.waitForTimeout(300);
+
+    // Open Ollama configuration
+    const configBtn = page.locator('button:has-text("Config")').first();
+    await configBtn.click();
+    await page.waitForTimeout(500);
+
+    // Verify OpenAI-Compatible is still selected after reload
+    const openaiButtonAfterReload = page.locator('button:has-text("OpenAI-Compatible")');
+    await expect(openaiButtonAfterReload).toHaveClass(/border-green-500/);
+  });
+
+  test('should show API mode for both chat and embedding configurations', async ({ page }) => {
+    // Navigate to RAG Settings tab
+    await page.click('text=RAG Settings');
+    await page.waitForTimeout(500);
+
+    // Test with Chat tab
+    await page.click('text=Chat');
+    await page.waitForTimeout(300);
+
+    const ollamaCardChat = page.locator('button:has-text("Ollama")').first();
+    await ollamaCardChat.click();
+    await page.waitForTimeout(500);
+
+    const configButtonChat = page.locator('button:has-text("Config")').first();
+    await configButtonChat.click();
+    await page.waitForTimeout(500);
+
+    // Verify API mode section is visible for chat
+    await expect(page.locator('text=Ollama API Mode')).toBeVisible();
+
+    // Switch to Embedding tab
+    await page.click('text=Embedding');
+    await page.waitForTimeout(500);
+
+    // Verify API mode section is still visible (shared across both tabs)
+    await expect(page.locator('text=Ollama API Mode')).toBeVisible();
+  });
+});
diff --git a/docker-compose.yml b/docker-compose.yml
index 9d1e5888aa..845660638b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -15,6 +15,7 @@ services:
         BUILDKIT_INLINE_CACHE: 1
         ARCHON_SERVER_PORT: ${ARCHON_SERVER_PORT:-8181}
     container_name: archon-server
+    restart: unless-stopped
     ports:
       - "${ARCHON_SERVER_PORT:-8181}:${ARCHON_SERVER_PORT:-8181}"
     environment:
@@ -72,6 +73,7 @@ services:
         BUILDKIT_INLINE_CACHE: 1
         ARCHON_MCP_PORT: ${ARCHON_MCP_PORT:-8051}
     container_name: archon-mcp
+    restart: unless-stopped
     ports:
       - "${ARCHON_MCP_PORT:-8051}:${ARCHON_MCP_PORT:-8051}"
     environment:
@@ -120,6 +122,7 @@ services:
         BUILDKIT_INLINE_CACHE: 1
         ARCHON_AGENTS_PORT: ${ARCHON_AGENTS_PORT:-8052}
     container_name: archon-agents
+    restart: unless-stopped
     ports:
       - "${ARCHON_AGENTS_PORT:-8052}:${ARCHON_AGENTS_PORT:-8052}"
     environment:
@@ -150,6 +153,7 @@ services:
   archon-frontend:
     build: ./archon-ui-main
     container_name: archon-ui
+    restart: unless-stopped
     ports:
       - "${ARCHON_UI_PORT:-3737}:3737"
     environment:
diff --git a/python/src/server/api_routes/ollama_api.py b/python/src/server/api_routes/ollama_api.py
index d961551e88..54b4f08388 100644
--- a/python/src/server/api_routes/ollama_api.py
+++ b/python/src/server/api_routes/ollama_api.py
@@ -95,26 +95,50 @@ async def discover_models_endpoint(
     """
     try:
         logger.info(f"Starting model discovery for {len(instance_urls)} instances with fetch_details={fetch_details}")
-        
+
+        # Get auth tokens from RAG settings
+        from ..services.credential_service import credential_service
+        rag_settings = await credential_service.get_credentials_by_category("rag_strategy")
+
+        # Extract configured instance URLs and their auth tokens (handle None values)
+        llm_base_url = (rag_settings.get("LLM_BASE_URL") or "").replace("/v1", "").rstrip("/")
+        embedding_base_url = (rag_settings.get("OLLAMA_EMBEDDING_URL") or "").replace("/v1", "").rstrip("/")
+
+        chat_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN") or ""
+        embedding_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN") or ""
+
         # Validate instance URLs
         valid_urls = []
+        auth_tokens_map = {}
         for url in instance_urls:
             try:
                 # Basic URL validation
                 if not url.startswith(('http://', 'https://')):
                     logger.warning(f"Invalid URL format: {url}")
                     continue
-                valid_urls.append(url.rstrip('/'))
+                normalized_url = url.rstrip('/')
+                valid_urls.append(normalized_url)
+
+                # Determine which auth token to use based on URL matching
+                if normalized_url == llm_base_url and chat_auth_token:
+                    auth_tokens_map[normalized_url] = chat_auth_token
+                    logger.info(f"Using chat auth token for {normalized_url}")
+                elif normalized_url == embedding_base_url and embedding_auth_token:
+                    auth_tokens_map[normalized_url] = embedding_auth_token
+                    logger.info(f"Using embedding auth token for {normalized_url}")
+                else:
+                    logger.debug(f"No auth token configured for {normalized_url}")
             except Exception as e:
                 logger.warning(f"Error validating URL {url}: {e}")
 
         if not valid_urls:
             raise HTTPException(status_code=400, detail="No valid instance URLs provided")
 
-        # Perform model discovery with optional detailed fetching
+        # Perform model discovery with optional detailed fetching and auth tokens
         discovery_result = await model_discovery_service.discover_models_from_multiple_instances(
-            valid_urls, 
-            fetch_details=fetch_details
+            valid_urls,
+            fetch_details=fetch_details,
+            auth_tokens=auth_tokens_map
         )
 
         logger.info(f"Discovery complete: {discovery_result['total_models']} models found")
@@ -145,21 +169,52 @@ async def health_check_endpoint(
     include_models: bool = Query(False, description="Include model count in response")
 ) -> dict[str, Any]:
     """
-    Check health status of multiple Ollama instances.
-    
+    Check health status of multiple Ollama instances with auth token support.
+
     Provides real-time health monitoring with response times, model availability,
     and error diagnostics for distributed Ollama deployments.
     """
     try:
         logger.info(f"Checking health for {len(instance_urls)} instances")
 
+        # Get auth tokens from RAG settings for each instance
+        from ..services.credential_service import credential_service
+        rag_settings = await credential_service.get_credentials_by_category("rag_strategy")
+
+        # Debug: Log all RAG settings keys
+        logger.info(f"RAG settings keys: {list(rag_settings.keys())}")
+        logger.info(f"RAG settings: {rag_settings}")
+
+        # Extract configured instance URLs and their auth tokens (handle None values)
+        llm_base_url = (rag_settings.get("LLM_BASE_URL") or "").replace("/v1", "").rstrip("/")
+        embedding_base_url = (rag_settings.get("OLLAMA_EMBEDDING_URL") or "").replace("/v1", "").rstrip("/")
+
+        chat_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN") or ""
+        embedding_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN") or ""
+
         health_results = {}
 
         # Check health for each instance
         for instance_url in instance_urls:
             try:
                 url = instance_url.rstrip('/')
-                health_status = await model_discovery_service.check_instance_health(url)
+
+                # Determine which auth token to use based on the URL
+                auth_token = None
+                logger.info(f"Checking instance: {url}")
+                logger.info(f"LLM base URL: {llm_base_url}, Chat token: {'set' if chat_auth_token else 'not set'}")
+                logger.info(f"Embedding base URL: {embedding_base_url}, Embedding token: {'set' if embedding_auth_token else 'not set'}")
+
+                if url == llm_base_url and chat_auth_token:
+                    auth_token = chat_auth_token
+                    logger.info(f"Using chat auth token for {url}")
+                elif url == embedding_base_url and embedding_auth_token:
+                    auth_token = embedding_auth_token
+                    logger.info(f"Using embedding auth token for {url}")
+                else:
+                    logger.warning(f"No matching auth token found for {url}")
+
+                health_status = await model_discovery_service.check_instance_health(url, auth_token=auth_token)
 
                 health_results[url] = {
                     "is_healthy": health_status.is_healthy,
diff --git a/python/src/server/services/embeddings/embedding_service.py b/python/src/server/services/embeddings/embedding_service.py
index 87ce390b67..259f15a3f8 100644
--- a/python/src/server/services/embeddings/embedding_service.py
+++ b/python/src/server/services/embeddings/embedding_service.py
@@ -104,6 +104,73 @@ async def create_embeddings(
         return [item.embedding for item in response.data]
 
 
+class NativeOllamaEmbeddingAdapter(EmbeddingProviderAdapter):
+    """Adapter for Ollama's native /api/embeddings endpoint."""
+
+    def __init__(self, base_url: str, auth_token: str | None = None):
+        self._base_url = base_url.rstrip("/")
+        self._auth_token = auth_token
+
+    async def create_embeddings(
+        self,
+        texts: list[str],
+        model: str,
+        dimensions: int | None = None,
+    ) -> list[list[float]]:
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as http_client:
+                embeddings = await asyncio.gather(
+                    *(
+                        self._fetch_single_embedding(http_client, model, text)
+                        for text in texts
+                    )
+                )
+            return embeddings
+        except httpx.HTTPStatusError as error:
+            error_content = error.response.text
+            search_logger.error(
+                f"Ollama native API returned {error.response.status_code} - {error_content}",
+                exc_info=True,
+            )
+            raise EmbeddingAPIError(
+                f"Ollama native API error: {error.response.status_code} - {error_content}",
+                original_error=error,
+            ) from error
+        except Exception as error:
+            search_logger.error(f"Error calling Ollama native API: {error}", exc_info=True)
+            raise EmbeddingAPIError(
+                f"Ollama native API error: {str(error)}", original_error=error
+            ) from error
+
+    async def _fetch_single_embedding(
+        self,
+        http_client: httpx.AsyncClient,
+        model: str,
+        text: str,
+    ) -> list[float]:
+        url = f"{self._base_url}/api/embeddings"
+        headers = {"Content-Type": "application/json"}
+
+        if self._auth_token:
+            headers["Authorization"] = f"Bearer {self._auth_token}"
+
+        payload = {
+            "model": model,
+            "prompt": text,
+        }
+
+        response = await http_client.post(url, headers=headers, json=payload)
+        response.raise_for_status()
+
+        result = response.json()
+        embedding = result.get("embedding")
+
+        if not isinstance(embedding, list):
+            raise EmbeddingAPIError(f"Invalid embedding response from Ollama: {result}")
+
+        return embedding
+
+
 class GoogleEmbeddingAdapter(EmbeddingProviderAdapter):
     """Adapter for Google's native embedding endpoint."""
 
@@ -217,10 +284,22 @@ def _normalize_embedding(self, embedding: list[float]) -> list[float]:
             return embedding
 
 
-def _get_embedding_adapter(provider: str, client: Any) -> EmbeddingProviderAdapter:
+def _get_embedding_adapter(
+    provider: str, client: Any, base_url: str | None = None, auth_token: str | None = None, api_mode: str | None = None
+) -> EmbeddingProviderAdapter:
     provider_name = (provider or "").lower()
     if provider_name == "google":
+        search_logger.info("Using Google native embedding adapter")
         return GoogleEmbeddingAdapter()
+    if provider_name == "ollama" and base_url:
+        # Check API mode - default to native if not specified or if set to native
+        if (api_mode or "native") == "native":
+            search_logger.info(f"Using Ollama native API adapter with base URL: {base_url}")
+            return NativeOllamaEmbeddingAdapter(base_url, auth_token)
+        else:
+            search_logger.info(f"Using OpenAI-compatible adapter for Ollama with base URL: {getattr(client, 'base_url', 'N/A')}")
+            return OpenAICompatibleEmbeddingAdapter(client)
+    search_logger.info(f"Using OpenAI-compatible embedding adapter for provider: {provider_name}")
     return OpenAICompatibleEmbeddingAdapter(client)
 
 
@@ -365,6 +444,9 @@ async def create_embeddings_batch(
 
             search_logger.info(f"Using embedding provider: '{embedding_provider}' (from EMBEDDING_PROVIDER setting)")
             async with get_llm_client(provider=embedding_provider, use_embedding_provider=True) as client:
+                # Log client configuration for debugging
+                search_logger.info(f"Embedding client base URL: {getattr(client, 'base_url', 'N/A')}")
+                search_logger.info(f"Embedding client has API key: {bool(getattr(client, 'api_key', None))}")
                 # Load batch size and dimensions from settings
                 try:
                     rag_settings = await _maybe_await(
@@ -372,13 +454,23 @@ async def create_embeddings_batch(
                     )
                     batch_size = int(rag_settings.get("EMBEDDING_BATCH_SIZE", "100"))
                     embedding_dimensions = int(rag_settings.get("EMBEDDING_DIMENSIONS", "1536"))
+
+                    # For Ollama, get native API URL, auth token, and API mode
+                    ollama_base_url = rag_settings.get("OLLAMA_EMBEDDING_URL", "").rstrip("/v1").rstrip("/")
+                    ollama_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN", "")
+                    ollama_api_mode = rag_settings.get("OLLAMA_API_MODE", "native")
                 except Exception as e:
                     search_logger.warning(f"Failed to load embedding settings: {e}, using defaults")
                     batch_size = 100
                     embedding_dimensions = 1536
+                    ollama_base_url = ""
+                    ollama_auth_token = ""
+                    ollama_api_mode = "native"
 
                 total_tokens_used = 0
-                adapter = _get_embedding_adapter(embedding_provider, client)
+                adapter = _get_embedding_adapter(
+                    embedding_provider, client, base_url=ollama_base_url, auth_token=ollama_auth_token, api_mode=ollama_api_mode
+                )
                 dimensions_to_use = embedding_dimensions if embedding_dimensions > 0 else None
 
                 for i in range(0, len(texts), batch_size):
diff --git a/python/src/server/services/llm_provider_service.py b/python/src/server/services/llm_provider_service.py
index 00197926fd..b94e3a5bc4 100644
--- a/python/src/server/services/llm_provider_service.py
+++ b/python/src/server/services/llm_provider_service.py
@@ -412,15 +412,32 @@ async def get_llm_client(
                     if not ollama_base_url:
                         raise RuntimeError("No Ollama base URL resolved")
 
+                    # Check for auth token in RAG settings for fallback
+                    cache_key = "rag_strategy_settings"
+                    rag_settings = _get_cached_settings(cache_key)
+                    if rag_settings is None:
+                        rag_settings = await credential_service.get_credentials_by_category("rag_strategy")
+                        _set_cached_settings(cache_key, rag_settings)
+
+                    # Get correct auth token based on operation type
+                    if use_embedding_provider:
+                        ollama_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN", "")
+                    else:
+                        ollama_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN", "")
+
+                    # Use "required-but-ignored" as default if no token is set (Ollama doesn't validate when auth is disabled)
+                    if not ollama_auth_token:
+                        ollama_auth_token = "required-but-ignored"
+
                     client = openai.AsyncOpenAI(
-                        api_key="ollama",
+                        api_key=ollama_auth_token,
                         base_url=ollama_base_url,
                     )
                     logger.info(
                         f"Ollama fallback client created successfully with base URL: {ollama_base_url}"
                     )
                     provider_name = "ollama"
-                    api_key = "ollama"
+                    api_key = ollama_auth_token
                     base_url = ollama_base_url
                 except Exception as fallback_error:
                     raise ValueError(
@@ -435,9 +452,29 @@ async def get_llm_client(
                 base_url_override=base_url,
             )
 
-            # Ollama requires an API key in the client but doesn't actually use it
+            # Check for auth token in RAG settings
+            cache_key = "rag_strategy_settings"
+            rag_settings = _get_cached_settings(cache_key)
+            if rag_settings is None:
+                rag_settings = await credential_service.get_credentials_by_category("rag_strategy")
+                _set_cached_settings(cache_key, rag_settings)
+
+            # Get correct auth token based on operation type
+            if use_embedding_provider or instance_type == "embedding":
+                ollama_auth_token = rag_settings.get("OLLAMA_EMBEDDING_AUTH_TOKEN", "")
+                logger.info(f"Using OLLAMA_EMBEDDING_AUTH_TOKEN (length: {len(ollama_auth_token) if ollama_auth_token else 0})")
+            else:
+                ollama_auth_token = rag_settings.get("OLLAMA_CHAT_AUTH_TOKEN", "")
+                logger.info(f"Using OLLAMA_CHAT_AUTH_TOKEN (length: {len(ollama_auth_token) if ollama_auth_token else 0})")
+
+            # Use "required-but-ignored" as default if no token is set (Ollama doesn't validate when auth is disabled)
+            if not ollama_auth_token:
+                ollama_auth_token = "required-but-ignored"
+                logger.warning("No Ollama auth token found, using placeholder 'required-but-ignored'")
+
+            # Ollama requires an API key in the client but may not use it unless auth is enabled
             client = openai.AsyncOpenAI(
-                api_key="ollama",  # Required but unused by Ollama
+                api_key=ollama_auth_token,
                 base_url=ollama_base_url,
             )
             logger.info(f"Ollama client created successfully with base URL: {ollama_base_url}")
@@ -578,8 +615,11 @@ async def _get_optimal_ollama_instance(instance_type: str | None = None,
         # Check if we need embedding provider and have separate embedding URL
         if use_embedding_provider or instance_type == "embedding":
             embedding_url = rag_settings.get("OLLAMA_EMBEDDING_URL")
+            logger.info(f"Embedding URL from settings: {embedding_url}")
             if embedding_url:
-                return embedding_url if embedding_url.endswith('/v1') else f"{embedding_url}/v1"
+                final_url = embedding_url if embedding_url.endswith('/v1') else f"{embedding_url}/v1"
+                logger.info(f"Resolved embedding URL: {final_url}")
+                return final_url
 
         # Default to LLM base URL for chat operations
         fallback_url = rag_settings.get("LLM_BASE_URL", "http://host.docker.internal:11434")
diff --git a/python/src/server/services/ollama/model_discovery_service.py b/python/src/server/services/ollama/model_discovery_service.py
index a5b92cac55..3888323d01 100644
--- a/python/src/server/services/ollama/model_discovery_service.py
+++ b/python/src/server/services/ollama/model_discovery_service.py
@@ -119,13 +119,14 @@ def _cache_models(self, instance_url: str, models: list[OllamaModel]) -> None:
         self.model_cache[cache_key] = models
         logger.debug(f"Cached {len(models)} models for {instance_url}")
 
-    async def discover_models(self, instance_url: str, fetch_details: bool = False) -> list[OllamaModel]:
+    async def discover_models(self, instance_url: str, fetch_details: bool = False, auth_token: str | None = None) -> list[OllamaModel]:
         """
         Discover all available models from an Ollama instance.
 
         Args:
             instance_url: Base URL of the Ollama instance
             fetch_details: If True, fetch comprehensive model details via /api/show
+            auth_token: Optional authentication token for protected instances
 
         Returns:
             List of OllamaModel objects with discovered capabilities
@@ -188,7 +189,12 @@ async def discover_models(self, instance_url: str, fetch_details: bool = False)
                 # Ollama API endpoint for listing models
                 tags_url = f"{base_url}/api/tags"
 
-                response = await client.get(tags_url)
+                # Prepare headers with optional auth token
+                headers = {}
+                if auth_token:
+                    headers["Authorization"] = f"Bearer {auth_token}"
+
+                response = await client.get(tags_url, headers=headers)
                 response.raise_for_status()
                 data = response.json()
 
@@ -955,12 +961,13 @@ async def get_model_info(self, model_name: str, instance_url: str) -> OllamaMode
             logger.error(f"Error getting model info for {model_name}: {e}")
             return None
 
-    async def check_instance_health(self, instance_url: str) -> InstanceHealthStatus:
+    async def check_instance_health(self, instance_url: str, auth_token: str | None = None) -> InstanceHealthStatus:
         """
         Check the health status of an Ollama instance.
 
         Args:
             instance_url: Base URL of the Ollama instance
+            auth_token: Optional authentication token for protected instances
 
         Returns:
             InstanceHealthStatus with current health information
@@ -979,11 +986,16 @@ async def check_instance_health(self, instance_url: str) -> InstanceHealthStatus
         status = InstanceHealthStatus(is_healthy=False)
 
         try:
+            # Prepare headers with optional auth token
+            headers = {}
+            if auth_token:
+                headers["Authorization"] = f"Bearer {auth_token}"
+
             async with httpx.AsyncClient(timeout=httpx.Timeout(10)) as client:
                 # Try to ping the Ollama API
                 ping_url = f"{instance_url.rstrip('/')}/api/tags"
 
-                response = await client.get(ping_url)
+                response = await client.get(ping_url, headers=headers)
                 response.raise_for_status()
 
                 data = response.json()
@@ -1011,13 +1023,19 @@ async def check_instance_health(self, instance_url: str) -> InstanceHealthStatus
 
         return status
 
-    async def discover_models_from_multiple_instances(self, instance_urls: list[str], fetch_details: bool = False) -> dict[str, Any]:
+    async def discover_models_from_multiple_instances(
+        self,
+        instance_urls: list[str],
+        fetch_details: bool = False,
+        auth_tokens: dict[str, str] | None = None
+    ) -> dict[str, Any]:
         """
         Discover models from multiple Ollama instances concurrently.
 
         Args:
             instance_urls: List of Ollama instance URLs
             fetch_details: If True, fetch comprehensive model details via /api/show
+            auth_tokens: Optional dictionary mapping instance URLs to auth tokens
 
         Returns:
             Dictionary with discovery results and aggregated information
@@ -1034,7 +1052,11 @@ async def discover_models_from_multiple_instances(self, instance_urls: list[str]
         logger.info(f"Discovering models from {len(instance_urls)} Ollama instances with fetch_details={fetch_details}")
 
         # Discover models from all instances concurrently
-        tasks = [self.discover_models(url, fetch_details=fetch_details) for url in instance_urls]
+        auth_tokens = auth_tokens or {}
+        tasks = [
+            self.discover_models(url, fetch_details=fetch_details, auth_token=auth_tokens.get(url))
+            for url in instance_urls
+        ]
         results = await asyncio.gather(*tasks, return_exceptions=True)
 
         # Aggregate results