coleam00 · coleam00 · Oct 5, 2025 · Sep 18, 2025 · Sep 19, 2025 · Sep 19, 2025
diff --git a/archon-ui-main/package-lock.json b/archon-ui-main/package-lock.json
diff --git a/archon-ui-main/package.json b/archon-ui-main/package.json
@@ -50,6 +50,7 @@
     "react-dnd": "^16.0.1",
     "react-dnd-html5-backend": "^16.0.1",
     "react-dom": "^18.3.1",
+    "react-icons": "^5.5.0",
     "react-markdown": "^10.1.0",
     "react-router-dom": "^6.26.2",
     "tailwind-merge": "latest",

diff --git a/archon-ui-main/src/components/settings/RAGSettings.tsx b/archon-ui-main/src/components/settings/RAGSettings.tsx
diff --git a/archon-ui-main/src/services/credentialsService.ts b/archon-ui-main/src/services/credentialsService.ts
@@ -23,6 +23,7 @@ export interface RagSettings {
   OLLAMA_EMBEDDING_URL?: string;
   OLLAMA_EMBEDDING_INSTANCE_NAME?: string;
   EMBEDDING_MODEL?: string;
+  EMBEDDING_PROVIDER?: string;
   // Crawling Performance Settings
   CRAWL_BATCH_SIZE?: number;
   CRAWL_MAX_CONCURRENT?: number;
@@ -75,6 +76,16 @@ import { getApiUrl } from "../config/api";
 class CredentialsService {
   private baseUrl = getApiUrl();
 
+  private notifyCredentialUpdate(keys: string[]): void {
+    if (typeof window === "undefined") {
+      return;
+    }
+
+    window.dispatchEvent(
+      new CustomEvent("archon:credentials-updated", { detail: { keys } })
+    );
+  }
+
   private handleCredentialError(error: any, context: string): Error {
     const errorMessage = error instanceof Error ? error.message : String(error);
 
@@ -182,15 +193,16 @@ class CredentialsService {
       USE_CONTEXTUAL_EMBEDDINGS: false,
       CONTEXTUAL_EMBEDDINGS_MAX_WORKERS: 3,
       USE_HYBRID_SEARCH: true,
-      USE_AGENTIC_RAG: true,
-      USE_RERANKING: true,
-      MODEL_CHOICE: "gpt-4.1-nano",
-      LLM_PROVIDER: "openai",
-      LLM_BASE_URL: "",
-      LLM_INSTANCE_NAME: "",
-      OLLAMA_EMBEDDING_URL: "",
-      OLLAMA_EMBEDDING_INSTANCE_NAME: "",
-      EMBEDDING_MODEL: "",
+  USE_AGENTIC_RAG: true,
+  USE_RERANKING: true,
+  MODEL_CHOICE: "gpt-4.1-nano",
+  LLM_PROVIDER: "openai",
+  LLM_BASE_URL: "",
+  LLM_INSTANCE_NAME: "",
+  OLLAMA_EMBEDDING_URL: "",
+  OLLAMA_EMBEDDING_INSTANCE_NAME: "",
+  EMBEDDING_PROVIDER: "openai",
+  EMBEDDING_MODEL: "",
       // Crawling Performance Settings defaults
       CRAWL_BATCH_SIZE: 50,
       CRAWL_MAX_CONCURRENT: 10,
@@ -221,6 +233,7 @@ class CredentialsService {
             "LLM_INSTANCE_NAME",
             "OLLAMA_EMBEDDING_URL",
             "OLLAMA_EMBEDDING_INSTANCE_NAME",
+            "EMBEDDING_PROVIDER",
             "EMBEDDING_MODEL",
             "CRAWL_WAIT_STRATEGY",
           ].includes(cred.key)
@@ -278,7 +291,9 @@ class CredentialsService {
         throw new Error(`HTTP ${response.status}: ${errorText}`);
       }
 
-      return response.json();
+      const updated = await response.json();
+      this.notifyCredentialUpdate([credential.key]);
+      return updated;
     } catch (error) {
       throw this.handleCredentialError(
         error,
@@ -302,7 +317,9 @@ class CredentialsService {
         throw new Error(`HTTP ${response.status}: ${errorText}`);
       }
 
-      return response.json();
+      const created = await response.json();
+      this.notifyCredentialUpdate([credential.key]);
+      return created;
     } catch (error) {
       throw this.handleCredentialError(
         error,
@@ -321,6 +338,8 @@ class CredentialsService {
         const errorText = await response.text();
         throw new Error(`HTTP ${response.status}: ${errorText}`);
       }
+
+      this.notifyCredentialUpdate([key]);
     } catch (error) {
       throw this.handleCredentialError(error, `Deleting credential '${key}'`);
     }

diff --git a/migration/0.1.0/009_add_provider_placeholders.sql b/migration/0.1.0/009_add_provider_placeholders.sql
@@ -0,0 +1,18 @@
+-- Migration: 009_add_provider_placeholders.sql
+-- Description: Add placeholder API key rows for OpenRouter, Anthropic, and Grok
+-- Version: 0.1.0
+-- Author: Archon Team
+-- Date: 2025
+
+-- Insert provider API key placeholders (idempotent)
+INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description)
+VALUES
+    ('OPENROUTER_API_KEY', NULL, true, 'api_keys', 'OpenRouter API key for hosted community models. Get from: https://openrouter.ai/keys'),
+    ('ANTHROPIC_API_KEY', NULL, true, 'api_keys', 'Anthropic API key for Claude models. Get from: https://console.anthropic.com/account/keys'),
+    ('GROK_API_KEY', NULL, true, 'api_keys', 'Grok API key for xAI models. Get from: https://console.x.ai/')
+ON CONFLICT (key) DO NOTHING;
+
+-- Record migration application for tracking
+INSERT INTO archon_migrations (version, migration_name)
+VALUES ('0.1.0', '009_add_provider_placeholders')
+ON CONFLICT (version, migration_name) DO NOTHING;
diff --git a/migration/complete_setup.sql b/migration/complete_setup.sql
@@ -100,7 +100,10 @@ ON CONFLICT (key) DO NOTHING;
 
 -- Add provider API key placeholders
 INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description) VALUES
-('GOOGLE_API_KEY', NULL, true, 'api_keys', 'Google API Key for Gemini models. Get from: https://aistudio.google.com/apikey')
+('GOOGLE_API_KEY', NULL, true, 'api_keys', 'Google API key for Gemini models. Get from: https://aistudio.google.com/apikey'),
+('OPENROUTER_API_KEY', NULL, true, 'api_keys', 'OpenRouter API key for hosted community models. Get from: https://openrouter.ai/keys'),
+('ANTHROPIC_API_KEY', NULL, true, 'api_keys', 'Anthropic API key for Claude models. Get from: https://console.anthropic.com/account/keys'),
+('GROK_API_KEY', NULL, true, 'api_keys', 'Grok API key for xAI models. Get from: https://console.x.ai/')
 ON CONFLICT (key) DO NOTHING;
 
 -- Code Extraction Settings Migration

diff --git a/python/src/server/api_routes/knowledge_api.py b/python/src/server/api_routes/knowledge_api.py
@@ -1288,7 +1288,7 @@ async def stop_crawl_task(progress_id: str):
 
         found = False
         # Step 1: Cancel the orchestration service
-        orchestration = get_active_orchestration(progress_id)
+        orchestration = await get_active_orchestration(progress_id)
         if orchestration:
             orchestration.cancel()
             found = True
@@ -1306,7 +1306,7 @@ async def stop_crawl_task(progress_id: str):
             found = True
 
         # Step 3: Remove from active orchestrations registry
-        unregister_orchestration(progress_id)
+        await unregister_orchestration(progress_id)
 
         # Step 4: Update progress tracker to reflect cancellation (only if we found and cancelled something)
         if found:

diff --git a/python/src/server/services/crawling/code_extraction_service.py b/python/src/server/services/crawling/code_extraction_service.py
@@ -140,6 +140,7 @@ async def extract_and_store_code_examples(
         progress_callback: Callable | None = None,
         cancellation_check: Callable[[], None] | None = None,
         provider: str | None = None,
+        embedding_provider: str | None = None,
     ) -> int:
         """
         Extract code examples from crawled documents and store them.
@@ -150,6 +151,8 @@ async def extract_and_store_code_examples(
             source_id: The unique source_id for all documents
             progress_callback: Optional async callback for progress updates
             cancellation_check: Optional function to check for cancellation
+            provider: Optional LLM provider identifier for summary generation
+            embedding_provider: Optional embedding provider override for vector creation
 
         Returns:
             Number of code examples stored
@@ -158,9 +161,16 @@ async def extract_and_store_code_examples(
         extraction_callback = None
         if progress_callback:
             async def extraction_progress(data: dict):
-                # Scale progress to 0-20% range
-                raw_progress = data.get("progress", 0)
-                scaled_progress = int(raw_progress * 0.2)  # 0-20%
+                # Scale progress to 0-20% range with normalization similar to later phases
+                raw = data.get("progress", data.get("percentage", 0))
+                try:
+                    raw_num = float(raw)
+                except (TypeError, ValueError):
+                    raw_num = 0.0
+                if 0.0 <= raw_num <= 1.0:
+                    raw_num *= 100.0
+                # 0-20% with clamping
+                scaled_progress = min(20, max(0, int(raw_num * 0.2)))
                 data["progress"] = scaled_progress
                 await progress_callback(data)
             extraction_callback = extraction_progress
@@ -197,8 +207,15 @@ async def extraction_progress(data: dict):
         if progress_callback:
             async def summary_progress(data: dict):
                 # Scale progress to 20-90% range
-                raw_progress = data.get("progress", 0)
-                scaled_progress = 20 + int(raw_progress * 0.7)  # 20-90%
+                raw = data.get("progress", data.get("percentage", 0))
+                try:
+                    raw_num = float(raw)
+                except (TypeError, ValueError):
+                    raw_num = 0.0
+                if 0.0 <= raw_num <= 1.0:
+                    raw_num *= 100.0
+                # 20-90% with clamping
+                scaled_progress = min(90, max(20, 20 + int(raw_num * 0.7)))
                 data["progress"] = scaled_progress
                 await progress_callback(data)
             summary_callback = summary_progress
@@ -216,15 +233,26 @@ async def summary_progress(data: dict):
         if progress_callback:
             async def storage_progress(data: dict):
                 # Scale progress to 90-100% range
-                raw_progress = data.get("progress", 0)
-                scaled_progress = 90 + int(raw_progress * 0.1)  # 90-100%
+                raw = data.get("progress", data.get("percentage", 0))
+                try:
+                    raw_num = float(raw)
+                except (TypeError, ValueError):
+                    raw_num = 0.0
+                if 0.0 <= raw_num <= 1.0:
+                    raw_num *= 100.0
+                # 90-100% with clamping
+                scaled_progress = min(100, max(90, 90 + int(raw_num * 0.1)))
                 data["progress"] = scaled_progress
                 await progress_callback(data)
             storage_callback = storage_progress
 
         # Store code examples in database
         return await self._store_code_examples(
-            storage_data, url_to_full_document, storage_callback, provider
+            storage_data,
+            url_to_full_document,
+            storage_callback,
+            provider,
+            embedding_provider,
         )
 
     async def _extract_code_blocks_from_documents(
@@ -880,9 +908,20 @@ async def _extract_text_file_code_blocks(
                         current_indent = indent
                         block_start_idx = i
                     current_block.append(line)
-                elif current_block and len("\n".join(current_block)) >= min_length:
+                elif current_block:
+                    block_text = "\n".join(current_block)
+                    threshold = (
+                        min_length
+                        if min_length is not None
+                        else await self._get_min_code_length()
+                    )
+                    if len(block_text) < threshold:
+                        current_block = []
+                        current_indent = None
+                        continue
+
                     # End of indented block, check if it's code
-                    code_content = "\n".join(current_block)
+                    code_content = block_text
 
                     # Try to detect language from content
                     language = self._detect_language_from_content(code_content)
@@ -1670,12 +1709,20 @@ async def _store_code_examples(
         url_to_full_document: dict[str, str],
         progress_callback: Callable | None = None,
         provider: str | None = None,
+        embedding_provider: str | None = None,
     ) -> int:
         """
         Store code examples in the database.
 
         Returns:
             Number of code examples stored
+
+        Args:
+            storage_data: Prepared code example payloads
+            url_to_full_document: Mapping of URLs to their full document content
+            progress_callback: Optional callback for progress updates
+            provider: Optional LLM provider identifier for summaries
+            embedding_provider: Optional embedding provider override for vector storage
         """
         # Create progress callback for storage phase
         storage_progress_callback = None
@@ -1713,6 +1760,7 @@ async def storage_callback(data: dict):
                 url_to_full_document=url_to_full_document,
                 progress_callback=storage_progress_callback,
                 provider=provider,
+                embedding_provider=embedding_provider,
             )
 
             # Report completion of code extraction/storage phase