diff --git a/.env.example b/.env.example index 32386d5586..d811c99758 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,25 @@ # Minimal startup configuration - only Supabase connection required # All other settings (API keys, model choices, RAG flags) are managed via the Settings page +# ===================================================== +# LOCAL DATABASE MODE (optional) +# Set LOCAL_DB=true to run PostgreSQL + PostgREST locally +# instead of using Supabase cloud. All data stays on your machine. +# ===================================================== +LOCAL_DB=false + +# Local database credentials (used when LOCAL_DB=true) +LOCAL_DB_USER=archon +LOCAL_DB_PASSWORD=archon_password +LOCAL_DB_NAME=archon +LOCAL_DB_PORT=5433 +POSTGREST_PORT=3001 +LOCAL_REST_PORT=3002 + +# ===================================================== +# SUPABASE CLOUD MODE (default when LOCAL_DB=false) +# ===================================================== + # Get your SUPABASE_URL from the Data API section of your Supabase project settings - # https://supabase.com/dashboard/project//settings/api SUPABASE_URL= diff --git a/Makefile b/Makefile index ad2b44e6a8..696d51a029 100644 --- a/Makefile +++ b/Makefile @@ -5,23 +5,37 @@ SHELL := /bin/bash # Docker compose command - prefer newer 'docker compose' plugin over standalone 'docker-compose' COMPOSE ?= $(shell docker compose version >/dev/null 2>&1 && echo "docker compose" || echo "docker-compose") -.PHONY: help dev dev-docker stop test test-fe test-be lint lint-fe lint-be clean install check +# Load .env if it exists +ifneq (,$(wildcard .env)) + include .env + export +endif + +.PHONY: help dev dev-docker dev-local-db stop test test-fe test-be lint lint-fe lint-be clean install check help: @echo "Archon Development Commands" @echo "===========================" - @echo " make dev - Backend in Docker, frontend local (recommended)" - @echo " make dev-docker - Everything in Docker" - @echo " make stop - Stop all services" - @echo " make test - Run all tests" - @echo " make test-fe - Run frontend tests only" - @echo " make test-be - Run backend tests only" - @echo " make lint - Run all linters" - @echo " make lint-fe - Run frontend linter only" - @echo " make lint-be - Run backend linter only" - @echo " make clean - Remove containers and volumes" - @echo " make install - Install dependencies" - @echo " make check - Check environment setup" + @echo " make dev - Backend in Docker, frontend local (recommended)" + @echo " make dev-docker - Everything in Docker" + @echo " make dev-local-db - Full stack with local PostgreSQL + PostgREST" + @echo " make stop - Stop all services" + @echo " make test - Run all tests" + @echo " make test-fe - Run frontend tests only" + @echo " make test-be - Run backend tests only" + @echo " make lint - Run all linters" + @echo " make lint-fe - Run frontend linter only" + @echo " make lint-be - Run backend linter only" + @echo " make clean - Remove containers and volumes" + @echo " make install - Install dependencies" + @echo " make check - Check environment setup" + @echo "" + @echo "Local Database Commands" + @echo "=======================" + @echo " make local-db-up - Start local database stack only" + @echo " make local-db-down - Stop local database stack" + @echo " make local-db-reset - Reset local database (deletes all data)" + @echo " make local-db-logs - View local database logs" # Install dependencies install: @@ -62,12 +76,50 @@ dev-docker: check @echo "Frontend: http://localhost:3737" @echo "API: http://localhost:8181" +# Full stack with local database +dev-local-db: check + @echo "Starting Archon with local database..." + @$(COMPOSE) --profile full --profile local-db up -d --build + @echo "✓ All services running with local database" + @echo "Frontend: http://localhost:3737" + @echo "API: http://localhost:8181" + @echo "Database: localhost:$(LOCAL_DB_PORT:-5433)" + @echo "PostgREST: http://localhost:$(POSTGREST_PORT:-3001)" + # Stop all services stop: @echo "Stopping all services..." - @$(COMPOSE) --profile backend --profile frontend --profile full down + @$(COMPOSE) --profile backend --profile frontend --profile full --profile local-db down @echo "✓ Services stopped" +# Local database commands +local-db-up: + @echo "Starting local database stack..." + @$(COMPOSE) --profile local-db up -d + @echo "✓ Local database running" + @echo "PostgreSQL: localhost:$(LOCAL_DB_PORT:-5433)" + @echo "PostgREST: http://localhost:$(POSTGREST_PORT:-3001)" + +local-db-down: + @echo "Stopping local database..." + @$(COMPOSE) --profile local-db down + @echo "✓ Local database stopped" + +local-db-reset: + @echo "⚠️ This will remove the local database volume and recreate it" + @read -p "Are you sure? All data will be lost! (y/N) " -n 1 -r; \ + echo; \ + if [[ $$REPLY =~ ^[Yy]$$ ]]; then \ + $(COMPOSE) --profile local-db down -v; \ + $(COMPOSE) --profile local-db up -d; \ + echo "✓ Local database reset and recreated"; \ + else \ + echo "Cancelled"; \ + fi + +local-db-logs: + @$(COMPOSE) --profile local-db logs -f + # Run all tests test: test-fe test-be diff --git a/README.md b/README.md index 76e2e93d7a..ca3ed8c9a4 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ This new vision for Archon replaces the old one (the agenteer). Archon used to b - [Docker Desktop](https://www.docker.com/products/docker-desktop/) - [Node.js 18+](https://nodejs.org/) (for hybrid development mode) -- [Supabase](https://supabase.com/) account (free tier or local Supabase both work) +- [Supabase](https://supabase.com/) account (free tier or local Supabase both work) **OR use local database mode** - [OpenAI API key](https://platform.openai.com/api-keys) (Gemini and Ollama are supported too!) - (OPTIONAL) [Make](https://www.gnu.org/software/make/) (see [Installing Make](#installing-make) below) @@ -77,16 +77,34 @@ This new vision for Archon replaces the old one (the agenteer). Archon used to b **Note:** The `stable` branch is recommended for using Archon. If you want to contribute or try the latest features, use the `main` branch with `git clone https://github.com/coleam00/archon.git` 2. **Environment Configuration**: - ```bash - cp .env.example .env - # Edit .env and add your Supabase credentials: - # SUPABASE_URL=https://your-project.supabase.co - # SUPABASE_SERVICE_KEY=your-service-key-here - ``` + ```bash + cp .env.example .env + ``` + + **Option A — Supabase Cloud (Default)**: + ```bash + # Edit .env and add your Supabase credentials: + SUPABASE_URL=https://your-project.supabase.co + SUPABASE_SERVICE_KEY=your-service-key-here + ``` + + **Option B — Local Database (No Cloud)**: + ```bash + # Edit .env and set: + LOCAL_DB=true + # Leave SUPABASE_URL and SUPABASE_SERVICE_KEY empty + ``` + + IMPORTANT NOTES: + - For cloud Supabase: they recently introduced a new type of service role key but use the legacy one (the longer one). + - For local Supabase: set SUPABASE_URL to http://host.docker.internal:8000 (unless you have an IP address set up). + - For local database mode: no Supabase credentials needed — just set `LOCAL_DB=true`. + +3. **Database Setup**: - IMPORTANT NOTES: - - For cloud Supabase: they recently introduced a new type of service role key but use the legacy one (the longer one). - - For local Supabase: set SUPABASE_URL to http://host.docker.internal:8000 (unless you have an IP address set up). + **Supabase Cloud**: In your [Supabase project](https://supabase.com/dashboard) SQL Editor, copy, paste, and execute the contents of `migration/complete_setup.sql` + + **Local Database**: The database is automatically initialized when you start the services — no manual SQL execution needed. 3. **Database Setup**: In your [Supabase project](https://supabase.com/dashboard) SQL Editor, copy, paste, and execute the contents of `migration/complete_setup.sql` @@ -105,6 +123,25 @@ This new vision for Archon replaces the old one (the agenteer). Archon used to b Ports are configurable in your .env as well! + **Local Database Mode (No Supabase Account Required)** + + If you want to keep all data on your machine with no cloud dependencies: + + ```bash + # Option 1: Using Make (recommended) + make dev-local-db + + # Option 2: Using Docker Compose directly + docker compose --profile full --profile local-db up -d + ``` + + This starts the full Archon stack **plus** a local PostgreSQL + PostgREST database: + - **Database**: PostgreSQL 16 with pgvector (Port: 5433) + - **PostgREST**: REST API compatible with supabase-py (Port: 3001) + - **All data stays on your machine** – no Supabase account needed + + No additional configuration required – just set `LOCAL_DB=true` in your `.env` and everything works automatically. + 5. **Configure API Keys**: - Open http://localhost:3737 - You'll automatically be brought through an onboarding flow to set your API key (OpenAI is default) @@ -160,16 +197,20 @@ sudo yum install make 🚀 Quick Command Reference for Make
-| Command | Description | -| ----------------- | ------------------------------------------------------- | -| `make dev` | Start hybrid dev (backend in Docker, frontend local) ⭐ | -| `make dev-docker` | Everything in Docker | -| `make stop` | Stop all services | -| `make test` | Run all tests | -| `make lint` | Run linters | -| `make install` | Install dependencies | -| `make check` | Check environment setup | -| `make clean` | Remove containers and volumes (with confirmation) | +| Command | Description | +| -------------------- | ------------------------------------------------------- | +| `make dev` | Start hybrid dev (backend in Docker, frontend local) ⭐ | +| `make dev-docker` | Everything in Docker | +| `make dev-local-db` | Full stack with local PostgreSQL + PostgREST 🆕 | +| `make stop` | Stop all services | +| `make test` | Run all tests | +| `make lint` | Run linters | +| `make install` | Install dependencies | +| `make check` | Check environment setup | +| `make clean` | Remove containers and volumes (with confirmation) | +| `make local-db-up` | Start local database stack only | +| `make local-db-down` | Stop local database stack | +| `make local-db-reset`| Reset local database (deletes all data) | @@ -204,12 +245,20 @@ The reset script safely removes all tables, functions, triggers, and policies wi ### Core Services -| Service | Container Name | Default URL | Purpose | -| ------------------ | -------------- | --------------------- | --------------------------------- | -| **Web Interface** | archon-ui | http://localhost:3737 | Main dashboard and controls | -| **API Service** | archon-server | http://localhost:8181 | Web crawling, document processing | -| **MCP Server** | archon-mcp | http://localhost:8051 | Model Context Protocol interface | -| **Agents Service** | archon-agents | http://localhost:8052 | AI/ML operations, reranking | +| Service | Container Name | Default URL | Purpose | +| ------------------ | --------------------- | --------------------- | --------------------------------- | +| **Web Interface** | archon-ui | http://localhost:3737 | Main dashboard and controls | +| **API Service** | archon-server | http://localhost:8181 | Web crawling, document processing | +| **MCP Server** | archon-mcp | http://localhost:8051 | Model Context Protocol interface | +| **Agents Service** | archon-agents | http://localhost:8052 | AI/ML operations, reranking | + +### Local Database Services (opt-in) + +| Service | Container Name | Default URL | Purpose | +| ------------------ | --------------------- | --------------------- | --------------------------------- | +| **PostgreSQL** | archon-db | localhost:5433 | Database with pgvector extension | +| **PostgREST** | archon-postgrest | http://localhost:3001 | REST API for PostgreSQL | +| **Nginx Proxy** | archon-postgrest-proxy| http://localhost:3002 | Supabase URL compatibility layer | ## Upgrading @@ -285,12 +334,27 @@ Archon uses true microservices architecture with clear separation of concerns: ┌─────────────────┐ │ │ Database │ │ │ │ │ - │ Supabase │◄──────────────┘ - │ PostgreSQL │ - │ PGVector │ + │ Supabase Cloud │◄──────────────┘ + │ or Local DB │ + │ PostgreSQL │ + │ + PostgREST │ └─────────────────┘ ``` +### Local Database Architecture + +When running with `LOCAL_DB=true`, Archon uses a local PostgreSQL stack instead of Supabase Cloud: + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ PostgreSQL │───►│ PostgREST │───►│ Nginx Proxy │ +│ 16 + pgvec │ │ (REST API) │ │ /rest/v1/ │ +│ Port 5433 │ │ Port 3001 │ │ Port 3002 │ +└──────────────┘ └──────────────┘ └──────────────┘ +``` + +The Nginx proxy maps `/rest/v1/` paths to PostgREST's root paths, making it fully compatible with the `supabase-py` client. **No code changes needed** – Archon's Python services work identically in both modes. + ### Service Responsibilities | Service | Location | Purpose | Key Features | diff --git a/docker-compose.yml b/docker-compose.yml index 99544f117e..00d6e29ff2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,181 +1,256 @@ -# Docker Compose profiles: -# - Default (no profile): Starts archon-server, archon-mcp, and archon-frontend -# - Agents are opt-in: archon-agents starts only with the "agents" profile -# Usage: -# docker compose up # Starts server, mcp, frontend (agents disabled) -# docker compose --profile agents up -d # Also starts archon-agents - -services: - # Server Service (FastAPI + Socket.IO + Crawling) - archon-server: - build: - context: ./python - dockerfile: Dockerfile.server - args: - BUILDKIT_INLINE_CACHE: 1 - ARCHON_SERVER_PORT: ${ARCHON_SERVER_PORT:-8181} - container_name: archon-server - ports: - - "${ARCHON_SERVER_PORT:-8181}:${ARCHON_SERVER_PORT:-8181}" - environment: - - SUPABASE_URL=${SUPABASE_URL} - - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY} - - OPENAI_API_KEY=${OPENAI_API_KEY:-} - - LOGFIRE_TOKEN=${LOGFIRE_TOKEN:-} - - SERVICE_DISCOVERY_MODE=docker_compose - - LOG_LEVEL=${LOG_LEVEL:-INFO} - - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} - - ARCHON_MCP_PORT=${ARCHON_MCP_PORT:-8051} - - ARCHON_AGENTS_PORT=${ARCHON_AGENTS_PORT:-8052} - - AGENTS_ENABLED=${AGENTS_ENABLED:-false} - - ARCHON_HOST=${HOST:-localhost} - networks: - - app-network - volumes: - - /var/run/docker.sock:/var/run/docker.sock # Docker socket for MCP container control - - ./python/src:/app/src # Mount source code for hot reload - - ./python/tests:/app/tests # Mount tests for UI test execution - - ./migration:/app/migration # Mount migration files for version tracking - extra_hosts: - - "host.docker.internal:host-gateway" - command: - [ - "python", - "-m", - "uvicorn", - "src.server.main:app", - "--host", - "0.0.0.0", - "--port", - "${ARCHON_SERVER_PORT:-8181}", - "--reload", - ] - healthcheck: - test: - [ - "CMD", - "sh", - "-c", - 'python -c "import urllib.request; urllib.request.urlopen(''http://localhost:${ARCHON_SERVER_PORT:-8181}/health'')"', - ] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s - - # Lightweight MCP Server Service (HTTP-based) - archon-mcp: - build: - context: ./python - dockerfile: Dockerfile.mcp - args: - BUILDKIT_INLINE_CACHE: 1 - ARCHON_MCP_PORT: ${ARCHON_MCP_PORT:-8051} - container_name: archon-mcp - ports: - - "${ARCHON_MCP_PORT:-8051}:${ARCHON_MCP_PORT:-8051}" - environment: - - SUPABASE_URL=${SUPABASE_URL} - - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY} - - LOGFIRE_TOKEN=${LOGFIRE_TOKEN:-} - - SERVICE_DISCOVERY_MODE=docker_compose - - TRANSPORT=sse - - LOG_LEVEL=${LOG_LEVEL:-INFO} - # MCP needs to know where to find other services - - API_SERVICE_URL=http://archon-server:${ARCHON_SERVER_PORT:-8181} - - AGENTS_ENABLED=${AGENTS_ENABLED:-false} - - AGENTS_SERVICE_URL=${AGENTS_SERVICE_URL:-http://archon-agents:${ARCHON_AGENTS_PORT:-8052}} - - ARCHON_MCP_PORT=${ARCHON_MCP_PORT:-8051} - - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} - - ARCHON_AGENTS_PORT=${ARCHON_AGENTS_PORT:-8052} - networks: - - app-network - depends_on: - archon-server: - condition: service_healthy - - extra_hosts: - - "host.docker.internal:host-gateway" - healthcheck: - test: - [ - "CMD", - "sh", - "-c", - 'python -c "import socket; s=socket.socket(); s.connect((''localhost'', ${ARCHON_MCP_PORT:-8051})); s.close()"', - ] - interval: 30s - timeout: 10s - retries: 3 - start_period: 60s # Give dependencies time to start - - # AI Agents Service (ML/Reranking) - archon-agents: - profiles: - - agents # Only starts when explicitly using --profile agents - build: - context: ./python - dockerfile: Dockerfile.agents - args: - BUILDKIT_INLINE_CACHE: 1 - ARCHON_AGENTS_PORT: ${ARCHON_AGENTS_PORT:-8052} - container_name: archon-agents - ports: - - "${ARCHON_AGENTS_PORT:-8052}:${ARCHON_AGENTS_PORT:-8052}" - environment: - - SUPABASE_URL=${SUPABASE_URL} - - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY} - - OPENAI_API_KEY=${OPENAI_API_KEY:-} - - LOGFIRE_TOKEN=${LOGFIRE_TOKEN:-} - - SERVICE_DISCOVERY_MODE=docker_compose - - LOG_LEVEL=${LOG_LEVEL:-INFO} - - ARCHON_AGENTS_PORT=${ARCHON_AGENTS_PORT:-8052} - - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} - networks: - - app-network - healthcheck: - test: - [ - "CMD", - "sh", - "-c", - 'python -c "import urllib.request; urllib.request.urlopen(''http://localhost:${ARCHON_AGENTS_PORT:-8052}/health'')"', - ] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s - - # Frontend - archon-frontend: - build: ./archon-ui-main - container_name: archon-ui - ports: - - "${ARCHON_UI_PORT:-3737}:3737" - environment: - # Don't set VITE_API_URL so frontend uses relative URLs through proxy - # - VITE_API_URL=http://${HOST:-localhost}:${ARCHON_SERVER_PORT:-8181} - - VITE_ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} - - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} - - HOST=${HOST:-localhost} - - PROD=${PROD:-false} - - VITE_ALLOWED_HOSTS=${VITE_ALLOWED_HOSTS:-} - - VITE_SHOW_DEVTOOLS=${VITE_SHOW_DEVTOOLS:-false} - - DOCKER_ENV=true - networks: - - app-network - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:3737"] - interval: 30s - timeout: 10s - retries: 3 - volumes: - - ./archon-ui-main/src:/app/src - - ./archon-ui-main/public:/app/public - depends_on: - archon-server: - condition: service_healthy - -networks: - app-network: - driver: bridge +# Docker Compose profiles: +# - Default (no profile): Starts archon-server, archon-mcp, and archon-frontend +# - Agents are opt-in: archon-agents starts only with the "agents" profile +# - Local DB: Use LOCAL_DB=true to run PostgreSQL + PostgREST locally instead of Supabase cloud +# Usage: +# docker compose up # Starts server, mcp, frontend (agents disabled) +# docker compose --profile agents up -d # Also starts archon-agents +# LOCAL_DB=true docker compose up -d # Full stack with local database + +services: + # ===================================================== + # LOCAL DATABASE SERVICES (opt-in via LOCAL_DB=true) + # ===================================================== + + # PostgreSQL with pgvector extension + archon-db: + image: pgvector/pgvector:pg16 + container_name: archon-db + profiles: + - local-db + ports: + - "${LOCAL_DB_PORT:-5433}:5432" + environment: + - POSTGRES_USER=${LOCAL_DB_USER:-archon} + - POSTGRES_PASSWORD=${LOCAL_DB_PASSWORD:-archon_password} + - POSTGRES_DB=${LOCAL_DB_NAME:-archon} + volumes: + - archon-db-data:/var/lib/postgresql/data + - ./local-db/complete_setup_local.sql:/docker-entrypoint-initdb.d/01_complete_setup.sql + networks: + - app-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${LOCAL_DB_USER:-archon} -d ${LOCAL_DB_NAME:-archon}"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + + # PostgREST - exposes PostgreSQL as REST API (Supabase-compatible) + archon-postgrest: + image: postgrest/postgrest:latest + container_name: archon-postgrest + profiles: + - local-db + ports: + - "${POSTGREST_PORT:-3001}:3000" + environment: + - PGRST_DB_URI=postgres://${LOCAL_DB_USER:-archon}:${LOCAL_DB_PASSWORD:-archon_password}@archon-db:5432/${LOCAL_DB_NAME:-archon} + - PGRST_DB_SCHEMA=public + - PGRST_DB_ANON_ROLE=${LOCAL_DB_USER:-archon} + - PGRST_SERVER_PORT=3000 + - PGRST_DB_POOL=10 + - PGRST_DB_EXTRA_SEARCH_PATH=public,extensions + networks: + - app-network + depends_on: + archon-db: + condition: service_healthy + restart: unless-stopped + + # Lightweight proxy to map /rest/v1/ -> / (Supabase URL format compatibility) + archon-postgrest-proxy: + image: nginx:alpine + container_name: archon-postgrest-proxy + profiles: + - local-db + ports: + - "${LOCAL_REST_PORT:-3002}:80" + volumes: + - ./local-db/nginx.conf:/etc/nginx/nginx.conf:ro + networks: + - app-network + depends_on: + - archon-postgrest + restart: unless-stopped + + # ===================================================== + # CORE SERVICES + # ===================================================== + + # Server Service (FastAPI + Socket.IO + Crawling) + archon-server: + build: + context: ./python + dockerfile: Dockerfile.server + args: + BUILDKIT_INLINE_CACHE: 1 + ARCHON_SERVER_PORT: ${ARCHON_SERVER_PORT:-8181} + container_name: archon-server + ports: + - "${ARCHON_SERVER_PORT:-8181}:${ARCHON_SERVER_PORT:-8181}" + environment: + - SUPABASE_URL=${SUPABASE_URL:-http://archon-postgrest-proxy:${LOCAL_REST_PORT:-3002}} + - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY:-local-db-key} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - LOGFIRE_TOKEN=${LOGFIRE_TOKEN:-} + - SERVICE_DISCOVERY_MODE=docker_compose + - LOG_LEVEL=${LOG_LEVEL:-INFO} + - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} + - ARCHON_MCP_PORT=${ARCHON_MCP_PORT:-8051} + - ARCHON_AGENTS_PORT=${ARCHON_AGENTS_PORT:-8052} + - AGENTS_ENABLED=${AGENTS_ENABLED:-false} + - ARCHON_HOST=${HOST:-localhost} + - LOCAL_DB=${LOCAL_DB:-false} + networks: + - app-network + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./python/src:/app/src + - ./python/tests:/app/tests + - ./migration:/app/migration + extra_hosts: + - "host.docker.internal:host-gateway" + command: + [ + "python", + "-m", + "uvicorn", + "src.server.main:app", + "--host", + "0.0.0.0", + "--port", + "${ARCHON_SERVER_PORT:-8181}", + "--reload", + ] + healthcheck: + test: + [ + "CMD", + "sh", + "-c", + 'python -c "import urllib.request; urllib.request.urlopen(''http://localhost:${ARCHON_SERVER_PORT:-8181}/health'')"', + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + # Lightweight MCP Server Service (HTTP-based) + archon-mcp: + build: + context: ./python + dockerfile: Dockerfile.mcp + args: + BUILDKIT_INLINE_CACHE: 1 + ARCHON_MCP_PORT: ${ARCHON_MCP_PORT:-8051} + container_name: archon-mcp + ports: + - "${ARCHON_MCP_PORT:-8051}:${ARCHON_MCP_PORT:-8051}" + environment: + - SUPABASE_URL=${SUPABASE_URL:-http://archon-postgrest-proxy:${LOCAL_REST_PORT:-3002}} + - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY:-local-db-key} + - LOGFIRE_TOKEN=${LOGFIRE_TOKEN:-} + - SERVICE_DISCOVERY_MODE=docker_compose + - TRANSPORT=sse + - LOG_LEVEL=${LOG_LEVEL:-INFO} + - API_SERVICE_URL=http://archon-server:${ARCHON_SERVER_PORT:-8181} + - AGENTS_ENABLED=${AGENTS_ENABLED:-false} + - AGENTS_SERVICE_URL=${AGENTS_SERVICE_URL:-http://archon-agents:${ARCHON_AGENTS_PORT:-8052}} + - ARCHON_MCP_PORT=${ARCHON_MCP_PORT:-8051} + - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} + - ARCHON_AGENTS_PORT=${ARCHON_AGENTS_PORT:-8052} + - LOCAL_DB=${LOCAL_DB:-false} + networks: + - app-network + depends_on: + archon-server: + condition: service_healthy + + extra_hosts: + - "host.docker.internal:host-gateway" + healthcheck: + test: + [ + "CMD", + "sh", + "-c", + 'python -c "import socket; s=socket.socket(); s.connect((''localhost'', ${ARCHON_MCP_PORT:-8051})); s.close()"', + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + # AI Agents Service (ML/Reranking) + archon-agents: + profiles: + - agents + build: + context: ./python + dockerfile: Dockerfile.agents + args: + BUILDKIT_INLINE_CACHE: 1 + ARCHON_AGENTS_PORT: ${ARCHON_AGENTS_PORT:-8052} + container_name: archon-agents + ports: + - "${ARCHON_AGENTS_PORT:-8052}:${ARCHON_AGENTS_PORT:-8052}" + environment: + - SUPABASE_URL=${SUPABASE_URL:-http://archon-postgrest-proxy:${LOCAL_REST_PORT:-3002}} + - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY:-local-db-key} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - LOGFIRE_TOKEN=${LOGFIRE_TOKEN:-} + - SERVICE_DISCOVERY_MODE=docker_compose + - LOG_LEVEL=${LOG_LEVEL:-INFO} + - ARCHON_AGENTS_PORT=${ARCHON_AGENTS_PORT:-8052} + - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} + - LOCAL_DB=${LOCAL_DB:-false} + networks: + - app-network + healthcheck: + test: + [ + "CMD", + "sh", + "-c", + 'python -c "import urllib.request; urllib.request.urlopen(''http://localhost:${ARCHON_AGENTS_PORT:-8052}/health'')"', + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + # Frontend + archon-frontend: + build: ./archon-ui-main + container_name: archon-ui + ports: + - "${ARCHON_UI_PORT:-3737}:3737" + environment: + - VITE_ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} + - ARCHON_SERVER_PORT=${ARCHON_SERVER_PORT:-8181} + - HOST=${HOST:-localhost} + - PROD=${PROD:-false} + - VITE_ALLOWED_HOSTS=${VITE_ALLOWED_HOSTS:-} + - VITE_SHOW_DEVTOOLS=${VITE_SHOW_DEVTOOLS:-false} + - DOCKER_ENV=true + networks: + - app-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3737"] + interval: 30s + timeout: 10s + retries: 3 + volumes: + - ./archon-ui-main/src:/app/src + - ./archon-ui-main/public:/app/public + depends_on: + archon-server: + condition: service_healthy + +networks: + app-network: + driver: bridge + +volumes: + archon-db-data: diff --git a/local-db/complete_setup_local.sql b/local-db/complete_setup_local.sql new file mode 100644 index 0000000000..bd6fafdf49 --- /dev/null +++ b/local-db/complete_setup_local.sql @@ -0,0 +1,1188 @@ +-- ===================================================== +-- Archon Complete Database Setup (LOCAL DATABASE MODE) +-- ===================================================== +-- This is a modified version of complete_setup.sql for local +-- PostgreSQL + PostgREST deployments. +-- +-- Key differences from the Supabase version: +-- - No auth.role() references (Supabase Auth is not available) +-- - RLS policies use public role instead of auth.role() +-- - All tables are accessible via PostgREST without authentication +-- +-- This file is automatically loaded when LOCAL_DB=true +-- ===================================================== + +-- ===================================================== +-- SECTION 1: EXTENSIONS +-- ===================================================== + +CREATE EXTENSION IF NOT EXISTS vector; +CREATE EXTENSION IF NOT EXISTS pgcrypto; +CREATE EXTENSION IF NOT EXISTS pg_trgm; + +-- ===================================================== +-- SECTION 2: CREDENTIALS AND SETTINGS +-- ===================================================== + +CREATE TABLE IF NOT EXISTS archon_settings ( + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + key VARCHAR(255) UNIQUE NOT NULL, + value TEXT, + encrypted_value TEXT, + is_encrypted BOOLEAN DEFAULT FALSE, + category VARCHAR(100), + description TEXT, + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_archon_settings_key ON archon_settings(key); +CREATE INDEX IF NOT EXISTS idx_archon_settings_category ON archon_settings(category); + +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$ language 'plpgsql'; + +CREATE TRIGGER update_archon_settings_updated_at + BEFORE UPDATE ON archon_settings + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +-- RLS for local mode: allow public access (no Supabase Auth) +ALTER TABLE archon_settings ENABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS "Allow public full access to archon_settings" ON archon_settings; +CREATE POLICY "Allow public full access to archon_settings" ON archon_settings + FOR ALL USING (true); + +-- ===================================================== +-- SECTION 3: INITIAL SETTINGS DATA +-- ===================================================== + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('MCP_TRANSPORT', 'dual', false, 'server_config', 'MCP server transport mode - sse (web clients), stdio (IDE clients), or dual (both)'), +('HOST', 'localhost', false, 'server_config', 'Host to bind to if using sse as the transport (leave empty if using stdio)'), +('PORT', '8051', false, 'server_config', 'Port to listen on if using sse as the transport (leave empty if using stdio)'), +('MODEL_CHOICE', 'gpt-4.1-nano', false, 'rag_strategy', 'The LLM you want to use for summaries and contextual embeddings. Generally this is a very cheap and fast LLM like gpt-4.1-nano'); + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('USE_CONTEXTUAL_EMBEDDINGS', 'false', false, 'rag_strategy', 'Enhances embeddings with contextual information for better retrieval'), +('CONTEXTUAL_EMBEDDINGS_MAX_WORKERS', '3', false, 'rag_strategy', 'Maximum parallel workers for contextual embedding generation (1-10)'), +('USE_HYBRID_SEARCH', 'true', false, 'rag_strategy', 'Combines vector similarity search with keyword search for better results'), +('USE_AGENTIC_RAG', 'true', false, 'rag_strategy', 'Enables code example extraction, storage, and specialized code search functionality'), +('USE_RERANKING', 'true', false, 'rag_strategy', 'Applies cross-encoder reranking to improve search result relevance'); + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('LOGFIRE_ENABLED', 'true', false, 'monitoring', 'Enable or disable Pydantic Logfire logging and observability platform'), +('PROJECTS_ENABLED', 'true', false, 'features', 'Enable or disable Projects and Tasks functionality'); + +INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description) VALUES +('OPENAI_API_KEY', NULL, true, 'api_keys', 'OpenAI API Key for embedding model (text-embedding-3-small). Get from: https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key'); + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('LLM_PROVIDER', 'openai', false, 'rag_strategy', 'LLM provider to use: openai, ollama, or google'), +('LLM_BASE_URL', NULL, false, 'rag_strategy', 'Custom base URL for LLM provider (mainly for Ollama, e.g., http://host.docker.internal:11434/v1)'), +('EMBEDDING_MODEL', 'text-embedding-3-small', false, 'rag_strategy', 'Embedding model for vector search and similarity matching (required for all embedding operations)') +ON CONFLICT (key) DO NOTHING; + +INSERT INTO archon_settings (key, encrypted_value, is_encrypted, category, description) VALUES +('GOOGLE_API_KEY', NULL, true, 'api_keys', 'Google API key for Gemini models. Get from: https://aistudio.google.com/apikey'), +('OPENROUTER_API_KEY', NULL, true, 'api_keys', 'OpenRouter API key for hosted community models. Get from: https://openrouter.ai/keys'), +('ANTHROPIC_API_KEY', NULL, true, 'api_keys', 'Anthropic API key for Claude models. Get from: https://console.anthropic.com/account/keys'), +('GROK_API_KEY', NULL, true, 'api_keys', 'Grok API key for xAI models. Get from: https://console.x.ai/') +ON CONFLICT (key) DO NOTHING; + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('MIN_CODE_BLOCK_LENGTH', '250', false, 'code_extraction', 'Base minimum length for code blocks in characters'), +('MAX_CODE_BLOCK_LENGTH', '5000', false, 'code_extraction', 'Maximum length before stopping code block extension in characters'), +('CONTEXT_WINDOW_SIZE', '1000', false, 'code_extraction', 'Number of characters of context to preserve before and after code blocks'), +('ENABLE_COMPLETE_BLOCK_DETECTION', 'true', false, 'code_extraction', 'Extend code blocks to natural boundaries like closing braces'), +('ENABLE_LANGUAGE_SPECIFIC_PATTERNS', 'true', false, 'code_extraction', 'Use specialized patterns for different programming languages'), +('ENABLE_CONTEXTUAL_LENGTH', 'true', false, 'code_extraction', 'Adjust minimum length based on surrounding context (example, snippet, implementation)'), +('ENABLE_PROSE_FILTERING', 'true', false, 'code_extraction', 'Filter out documentation text mistakenly wrapped in code blocks'), +('MAX_PROSE_RATIO', '0.15', false, 'code_extraction', 'Maximum allowed ratio of prose indicators (0-1) in code blocks'), +('MIN_CODE_INDICATORS', '3', false, 'code_extraction', 'Minimum number of code patterns required (brackets, operators, keywords)'), +('ENABLE_DIAGRAM_FILTERING', 'true', false, 'code_extraction', 'Exclude diagram languages like Mermaid, PlantUML from code extraction'), +('CODE_EXTRACTION_MAX_WORKERS', '3', false, 'code_extraction', 'Number of parallel workers for generating code summaries'), +('ENABLE_CODE_SUMMARIES', 'true', false, 'code_extraction', 'Generate AI-powered summaries and names for extracted code examples') +ON CONFLICT (key) DO NOTHING; + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('CRAWL_BATCH_SIZE', '50', false, 'rag_strategy', 'Number of URLs to crawl in parallel per batch (10-100)'), +('CRAWL_MAX_CONCURRENT', '10', false, 'rag_strategy', 'Maximum concurrent browser sessions for crawling (1-20)'), +('CRAWL_WAIT_STRATEGY', 'domcontentloaded', false, 'rag_strategy', 'When to consider page loaded: domcontentloaded, networkidle, or load'), +('CRAWL_PAGE_TIMEOUT', '30000', false, 'rag_strategy', 'Maximum time to wait for page load in milliseconds'), +('CRAWL_DELAY_BEFORE_HTML', '0.5', false, 'rag_strategy', 'Time to wait for JavaScript rendering in seconds (0.1-5.0)') +ON CONFLICT (key) DO NOTHING; + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('DOCUMENT_STORAGE_BATCH_SIZE', '100', false, 'rag_strategy', 'Number of document chunks to process per batch (50-200) - increased for better performance'), +('EMBEDDING_BATCH_SIZE', '200', false, 'rag_strategy', 'Number of embeddings to create per API call (100-500) - increased for better throughput'), +('DELETE_BATCH_SIZE', '100', false, 'rag_strategy', 'Number of URLs to delete in one database operation (50-200) - increased for better performance'), +('ENABLE_PARALLEL_BATCHES', 'true', false, 'rag_strategy', 'Enable parallel processing of document batches') +ON CONFLICT (key) DO UPDATE SET + value = EXCLUDED.value, + description = EXCLUDED.description; + +INSERT INTO archon_settings (key, value, is_encrypted, category, description) VALUES +('MEMORY_THRESHOLD_PERCENT', '80', false, 'rag_strategy', 'Memory usage threshold for crawler dispatcher (50-90)'), +('DISPATCHER_CHECK_INTERVAL', '0.5', false, 'rag_strategy', 'How often to check memory usage in seconds (0.1-2.0)'), +('CODE_EXTRACTION_BATCH_SIZE', '40', false, 'rag_strategy', 'Number of code blocks to extract per batch (20-100) - increased for better performance'), +('CODE_SUMMARY_MAX_WORKERS', '3', false, 'rag_strategy', 'Maximum parallel workers for code summarization (1-10)'), +('CONTEXTUAL_EMBEDDING_BATCH_SIZE', '50', false, 'rag_strategy', 'Number of chunks to process in contextual embedding batch API calls (20-100)') +ON CONFLICT (key) DO UPDATE SET + value = EXCLUDED.value, + description = EXCLUDED.description; + +COMMENT ON TABLE archon_settings IS 'Stores application configuration including API keys, RAG settings, and code extraction parameters'; + +-- ===================================================== +-- SECTION 4: KNOWLEDGE BASE TABLES +-- ===================================================== + +CREATE TABLE IF NOT EXISTS archon_sources ( + source_id TEXT PRIMARY KEY, + source_url TEXT, + source_display_name TEXT, + summary TEXT, + total_word_count INTEGER DEFAULT 0, + title TEXT, + metadata JSONB DEFAULT '{}', + created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_archon_sources_title ON archon_sources(title); +CREATE INDEX IF NOT EXISTS idx_archon_sources_url ON archon_sources(source_url); +CREATE INDEX IF NOT EXISTS idx_archon_sources_display_name ON archon_sources(source_display_name); +CREATE INDEX IF NOT EXISTS idx_archon_sources_metadata ON archon_sources USING GIN(metadata); +CREATE INDEX IF NOT EXISTS idx_archon_sources_knowledge_type ON archon_sources((metadata->>'knowledge_type')); + +COMMENT ON COLUMN archon_sources.source_id IS 'Unique hash identifier for the source (16-char SHA256 hash of URL)'; +COMMENT ON COLUMN archon_sources.source_url IS 'The original URL that was crawled to create this source'; +COMMENT ON COLUMN archon_sources.source_display_name IS 'Human-readable name for UI display (e.g., "GitHub - microsoft/typescript")'; +COMMENT ON COLUMN archon_sources.title IS 'Descriptive title for the source (e.g., "Pydantic AI API Reference")'; +COMMENT ON COLUMN archon_sources.metadata IS 'JSONB field storing knowledge_type, tags, and other metadata'; + +CREATE TABLE IF NOT EXISTS archon_crawled_pages ( + id BIGSERIAL PRIMARY KEY, + url VARCHAR NOT NULL, + chunk_number INTEGER NOT NULL, + content TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + source_id TEXT NOT NULL, + embedding_384 VECTOR(384), + embedding_768 VECTOR(768), + embedding_1024 VECTOR(1024), + embedding_1536 VECTOR(1536), + embedding_3072 VECTOR(3072), + llm_chat_model TEXT, + embedding_model TEXT, + embedding_dimension INTEGER, + content_search_vector tsvector GENERATED ALWAYS AS (to_tsvector('english', content)) STORED, + created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL, + UNIQUE(url, chunk_number), + FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_384 ON archon_crawled_pages USING ivfflat (embedding_384 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_768 ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1024 ON archon_crawled_pages USING ivfflat (embedding_1024 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1536 ON archon_crawled_pages USING ivfflat (embedding_1536 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX idx_archon_crawled_pages_metadata ON archon_crawled_pages USING GIN (metadata); +CREATE INDEX idx_archon_crawled_pages_source_id ON archon_crawled_pages (source_id); +CREATE INDEX idx_archon_crawled_pages_content_search ON archon_crawled_pages USING GIN (content_search_vector); +CREATE INDEX idx_archon_crawled_pages_content_trgm ON archon_crawled_pages USING GIN (content gin_trgm_ops); +CREATE INDEX idx_archon_crawled_pages_embedding_model ON archon_crawled_pages (embedding_model); +CREATE INDEX idx_archon_crawled_pages_embedding_dimension ON archon_crawled_pages (embedding_dimension); +CREATE INDEX idx_archon_crawled_pages_llm_chat_model ON archon_crawled_pages (llm_chat_model); + +CREATE TABLE IF NOT EXISTS archon_code_examples ( + id BIGSERIAL PRIMARY KEY, + url VARCHAR NOT NULL, + chunk_number INTEGER NOT NULL, + content TEXT NOT NULL, + summary TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + source_id TEXT NOT NULL, + embedding_384 VECTOR(384), + embedding_768 VECTOR(768), + embedding_1024 VECTOR(1024), + embedding_1536 VECTOR(1536), + embedding_3072 VECTOR(3072), + llm_chat_model TEXT, + embedding_model TEXT, + embedding_dimension INTEGER, + content_search_vector tsvector GENERATED ALWAYS AS (to_tsvector('english', content || ' ' || COALESCE(summary, ''))) STORED, + created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL, + UNIQUE(url, chunk_number), + FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS archon_page_metadata ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + source_id TEXT NOT NULL, + url TEXT NOT NULL, + full_content TEXT NOT NULL, + section_title TEXT, + section_order INT DEFAULT 0, + word_count INT NOT NULL, + char_count INT NOT NULL, + chunk_count INT NOT NULL DEFAULT 0, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + metadata JSONB DEFAULT '{}'::jsonb, + CONSTRAINT archon_page_metadata_url_unique UNIQUE(url), + CONSTRAINT archon_page_metadata_source_fk FOREIGN KEY (source_id) + REFERENCES archon_sources(source_id) ON DELETE CASCADE +); + +ALTER TABLE archon_crawled_pages +ADD COLUMN IF NOT EXISTS page_id UUID REFERENCES archon_page_metadata(id) ON DELETE SET NULL; + +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_source_id ON archon_page_metadata(source_id); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_url ON archon_page_metadata(url); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_section ON archon_page_metadata(source_id, section_title, section_order); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_created_at ON archon_page_metadata(created_at); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_metadata ON archon_page_metadata USING GIN(metadata); +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_page_id ON archon_crawled_pages(page_id); + +COMMENT ON TABLE archon_page_metadata IS 'Stores complete documentation pages for agent retrieval'; +COMMENT ON COLUMN archon_page_metadata.source_id IS 'References the source this page belongs to'; +COMMENT ON COLUMN archon_page_metadata.url IS 'Unique URL of the page (synthetic for llms-full.txt sections with #anchor)'; +COMMENT ON COLUMN archon_page_metadata.full_content IS 'Complete markdown/text content of the page'; +COMMENT ON COLUMN archon_page_metadata.section_title IS 'H1 section title for llms-full.txt pages'; +COMMENT ON COLUMN archon_page_metadata.section_order IS 'Order of section in llms-full.txt file (0-based)'; +COMMENT ON COLUMN archon_page_metadata.word_count IS 'Number of words in full_content'; +COMMENT ON COLUMN archon_page_metadata.char_count IS 'Number of characters in full_content'; +COMMENT ON COLUMN archon_page_metadata.chunk_count IS 'Number of chunks created from this page'; +COMMENT ON COLUMN archon_page_metadata.metadata IS 'Flexible JSON metadata (page_type, knowledge_type, tags, etc)'; +COMMENT ON COLUMN archon_crawled_pages.page_id IS 'Foreign key linking chunk to parent page'; + +ALTER TABLE archon_page_metadata ENABLE ROW LEVEL SECURITY; +DROP POLICY IF EXISTS "Allow public full access to archon_page_metadata" ON archon_page_metadata; +CREATE POLICY "Allow public full access to archon_page_metadata" ON archon_page_metadata + FOR ALL USING (true); + +CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384 ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768 ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1024 ON archon_code_examples USING ivfflat (embedding_1024 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1536 ON archon_code_examples USING ivfflat (embedding_1536 vector_cosine_ops) WITH (lists = 100); +CREATE INDEX idx_archon_code_examples_metadata ON archon_code_examples USING GIN (metadata); +CREATE INDEX idx_archon_code_examples_source_id ON archon_code_examples (source_id); +CREATE INDEX idx_archon_code_examples_content_search ON archon_code_examples USING GIN (content_search_vector); +CREATE INDEX idx_archon_code_examples_content_trgm ON archon_code_examples USING GIN (content gin_trgm_ops); +CREATE INDEX idx_archon_code_examples_summary_trgm ON archon_code_examples USING GIN (summary gin_trgm_ops); +CREATE INDEX idx_archon_code_examples_embedding_model ON archon_code_examples (embedding_model); +CREATE INDEX idx_archon_code_examples_embedding_dimension ON archon_code_examples (embedding_dimension); +CREATE INDEX idx_archon_code_examples_llm_chat_model ON archon_code_examples (llm_chat_model); + +-- ===================================================== +-- SECTION 4.5: MULTI-DIMENSIONAL EMBEDDING HELPER FUNCTIONS +-- ===================================================== + +CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector) +RETURNS INTEGER AS $$ +BEGIN + RETURN vector_dims(embedding_vector); +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER) +RETURNS TEXT AS $$ +BEGIN + CASE dimension + WHEN 384 THEN RETURN 'embedding_384'; + WHEN 768 THEN RETURN 'embedding_768'; + WHEN 1024 THEN RETURN 'embedding_1024'; + WHEN 1536 THEN RETURN 'embedding_1536'; + WHEN 3072 THEN RETURN 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %. Supported dimensions are: 384, 768, 1024, 1536, 3072', dimension; + END CASE; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +-- ===================================================== +-- SECTION 5: SEARCH FUNCTIONS +-- ===================================================== + +CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi ( + query_embedding VECTOR, + embedding_dimension INTEGER, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column +DECLARE + sql_query TEXT; + embedding_column TEXT; +BEGIN + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + + sql_query := format(' + SELECT id, url, chunk_number, content, metadata, source_id, + 1 - (%I <=> $1) AS similarity + FROM archon_crawled_pages + WHERE (%I IS NOT NULL) + AND metadata @> $3 + AND ($4 IS NULL OR source_id = $4) + ORDER BY %I <=> $1 + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter; +END; +$$; + +CREATE OR REPLACE FUNCTION match_archon_crawled_pages ( + query_embedding VECTOR(1536), + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT +) +LANGUAGE plpgsql +AS $$ +BEGIN + RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter); +END; +$$; + +CREATE OR REPLACE FUNCTION match_archon_code_examples_multi ( + query_embedding VECTOR, + embedding_dimension INTEGER, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column +DECLARE + sql_query TEXT; + embedding_column TEXT; +BEGIN + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + + sql_query := format(' + SELECT id, url, chunk_number, content, summary, metadata, source_id, + 1 - (%I <=> $1) AS similarity + FROM archon_code_examples + WHERE (%I IS NOT NULL) + AND metadata @> $3 + AND ($4 IS NULL OR source_id = $4) + ORDER BY %I <=> $1 + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter; +END; +$$; + +CREATE OR REPLACE FUNCTION match_archon_code_examples ( + query_embedding VECTOR(1536), + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT +) +LANGUAGE plpgsql +AS $$ +BEGIN + RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter); +END; +$$; + +-- ===================================================== +-- SECTION 5B: HYBRID SEARCH FUNCTIONS WITH TS_VECTOR +-- ===================================================== + +CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages_multi( + query_embedding VECTOR, + embedding_dimension INTEGER, + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column +DECLARE + max_vector_results INT; + max_text_results INT; + sql_query TEXT; + embedding_column TEXT; +BEGIN + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + + max_vector_results := match_count; + max_text_results := match_count; + + sql_query := format(' + WITH vector_results AS ( + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + 1 - (cp.%I <=> $1) AS vector_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> $4 + AND ($5 IS NULL OR cp.source_id = $5) + AND cp.%I IS NOT NULL + ORDER BY cp.%I <=> $1 + LIMIT $2 + ), + text_results AS ( + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + ts_rank_cd(cp.content_search_vector, plainto_tsquery(''english'', $6)) AS text_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> $4 + AND ($5 IS NULL OR cp.source_id = $5) + AND cp.content_search_vector @@ plainto_tsquery(''english'', $6) + ORDER BY text_sim DESC + LIMIT $3 + ), + combined_results AS ( + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN ''hybrid'' + WHEN v.id IS NOT NULL THEN ''vector'' + ELSE ''keyword'' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + RETURN QUERY EXECUTE sql_query USING query_embedding, max_vector_results, max_text_results, filter, source_filter, query_text; +END; +$$; + +CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages( + query_embedding vector(1536), + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +BEGIN + RETURN QUERY SELECT * FROM hybrid_search_archon_crawled_pages_multi(query_embedding, 1536, query_text, match_count, filter, source_filter); +END; +$$; + +CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples_multi( + query_embedding VECTOR, + embedding_dimension INTEGER, + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column +DECLARE + max_vector_results INT; + max_text_results INT; + sql_query TEXT; + embedding_column TEXT; +BEGIN + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + + max_vector_results := match_count; + max_text_results := match_count; + + sql_query := format(' + WITH vector_results AS ( + SELECT + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + 1 - (ce.%I <=> $1) AS vector_sim + FROM archon_code_examples ce + WHERE ce.metadata @> $4 + AND ($5 IS NULL OR ce.source_id = $5) + AND ce.%I IS NOT NULL + ORDER BY ce.%I <=> $1 + LIMIT $2 + ), + text_results AS ( + SELECT + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + ts_rank_cd(ce.content_search_vector, plainto_tsquery(''english'', $6)) AS text_sim + FROM archon_code_examples ce + WHERE ce.metadata @> $4 + AND ($5 IS NULL OR ce.source_id = $5) + AND ce.content_search_vector @@ plainto_tsquery(''english'', $6) + ORDER BY text_sim DESC + LIMIT $3 + ), + combined_results AS ( + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.summary, t.summary) AS summary, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN ''hybrid'' + WHEN v.id IS NOT NULL THEN ''vector'' + ELSE ''keyword'' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + RETURN QUERY EXECUTE sql_query USING query_embedding, max_vector_results, max_text_results, filter, source_filter, query_text; +END; +$$; + +CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples( + query_embedding vector(1536), + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +BEGIN + RETURN QUERY SELECT * FROM hybrid_search_archon_code_examples_multi(query_embedding, 1536, query_text, match_count, filter, source_filter); +END; +$$; + +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages_multi IS 'Multi-dimensional hybrid search combining vector similarity and full-text search with configurable embedding dimensions'; +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages IS 'Legacy hybrid search function for backward compatibility (uses 1536D embeddings)'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples_multi IS 'Multi-dimensional hybrid search on code examples with configurable embedding dimensions'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples IS 'Legacy hybrid search function for code examples (uses 1536D embeddings)'; + +-- ===================================================== +-- SECTION 6: RLS POLICIES FOR KNOWLEDGE BASE (LOCAL MODE) +-- ===================================================== + +ALTER TABLE archon_crawled_pages ENABLE ROW LEVEL SECURITY; +ALTER TABLE archon_sources ENABLE ROW LEVEL SECURITY; +ALTER TABLE archon_code_examples ENABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS "Allow public full access to archon_crawled_pages" ON archon_crawled_pages; +DROP POLICY IF EXISTS "Allow public full access to archon_sources" ON archon_sources; +DROP POLICY IF EXISTS "Allow public full access to archon_code_examples" ON archon_code_examples; + +CREATE POLICY "Allow public full access to archon_crawled_pages" ON archon_crawled_pages + FOR ALL USING (true); + +CREATE POLICY "Allow public full access to archon_sources" ON archon_sources + FOR ALL USING (true); + +CREATE POLICY "Allow public full access to archon_code_examples" ON archon_code_examples + FOR ALL USING (true); + +-- ===================================================== +-- SECTION 7: PROJECTS AND TASKS MODULE +-- ===================================================== + +DO $$ BEGIN + CREATE TYPE task_status AS ENUM ('todo','doing','review','done'); +EXCEPTION + WHEN duplicate_object THEN null; +END $$; + +DO $$ BEGIN + CREATE TYPE task_priority AS ENUM ('low', 'medium', 'high', 'critical'); +EXCEPTION + WHEN duplicate_object THEN null; +END $$; + +CREATE TABLE IF NOT EXISTS archon_projects ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + title TEXT NOT NULL, + description TEXT DEFAULT '', + docs JSONB DEFAULT '[]'::jsonb, + features JSONB DEFAULT '[]'::jsonb, + data JSONB DEFAULT '[]'::jsonb, + github_repo TEXT, + pinned BOOLEAN DEFAULT false, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS archon_tasks ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + project_id UUID REFERENCES archon_projects(id) ON DELETE CASCADE, + parent_task_id UUID REFERENCES archon_tasks(id) ON DELETE CASCADE, + title TEXT NOT NULL, + description TEXT DEFAULT '', + status task_status DEFAULT 'todo', + assignee TEXT DEFAULT 'User' CHECK (assignee IS NOT NULL AND assignee != ''), + task_order INTEGER DEFAULT 0, + priority task_priority DEFAULT 'medium' NOT NULL, + feature TEXT, + sources JSONB DEFAULT '[]'::jsonb, + code_examples JSONB DEFAULT '[]'::jsonb, + archived BOOLEAN DEFAULT false, + archived_at TIMESTAMPTZ NULL, + archived_by TEXT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS archon_project_sources ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + project_id UUID REFERENCES archon_projects(id) ON DELETE CASCADE, + source_id TEXT NOT NULL, + linked_at TIMESTAMPTZ DEFAULT NOW(), + created_by TEXT DEFAULT 'system', + notes TEXT, + UNIQUE(project_id, source_id) +); + +CREATE TABLE IF NOT EXISTS archon_document_versions ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + project_id UUID REFERENCES archon_projects(id) ON DELETE CASCADE, + task_id UUID REFERENCES archon_tasks(id) ON DELETE CASCADE, + field_name TEXT NOT NULL, + version_number INTEGER NOT NULL, + content JSONB NOT NULL, + change_summary TEXT, + change_type TEXT DEFAULT 'update', + document_id TEXT, + created_by TEXT DEFAULT 'system', + created_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT chk_project_or_task CHECK ( + (project_id IS NOT NULL AND task_id IS NULL) OR + (project_id IS NULL AND task_id IS NOT NULL) + ), + UNIQUE(project_id, task_id, field_name, version_number) +); + +CREATE INDEX IF NOT EXISTS idx_archon_tasks_project_id ON archon_tasks(project_id); +CREATE INDEX IF NOT EXISTS idx_archon_tasks_status ON archon_tasks(status); +CREATE INDEX IF NOT EXISTS idx_archon_tasks_assignee ON archon_tasks(assignee); +CREATE INDEX IF NOT EXISTS idx_archon_tasks_order ON archon_tasks(task_order); +CREATE INDEX IF NOT EXISTS idx_archon_tasks_priority ON archon_tasks(priority); +CREATE INDEX IF NOT EXISTS idx_archon_tasks_archived ON archon_tasks(archived); +CREATE INDEX IF NOT EXISTS idx_archon_tasks_archived_at ON archon_tasks(archived_at); +CREATE INDEX IF NOT EXISTS idx_archon_project_sources_project_id ON archon_project_sources(project_id); +CREATE INDEX IF NOT EXISTS idx_archon_project_sources_source_id ON archon_project_sources(source_id); +CREATE INDEX IF NOT EXISTS idx_archon_document_versions_project_id ON archon_document_versions(project_id); +CREATE INDEX IF NOT EXISTS idx_archon_document_versions_task_id ON archon_document_versions(task_id); +CREATE INDEX IF NOT EXISTS idx_archon_document_versions_field_name ON archon_document_versions(field_name); +CREATE INDEX IF NOT EXISTS idx_archon_document_versions_version_number ON archon_document_versions(version_number); +CREATE INDEX IF NOT EXISTS idx_archon_document_versions_created_at ON archon_document_versions(created_at); + +CREATE OR REPLACE TRIGGER update_archon_projects_updated_at + BEFORE UPDATE ON archon_projects + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE OR REPLACE TRIGGER update_archon_tasks_updated_at + BEFORE UPDATE ON archon_tasks + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE OR REPLACE FUNCTION archive_task( + task_id_param UUID, + archived_by_param TEXT DEFAULT 'system' +) +RETURNS BOOLEAN AS $$ +DECLARE + task_exists BOOLEAN; +BEGIN + SELECT EXISTS( + SELECT 1 FROM archon_tasks + WHERE id = task_id_param AND archived = FALSE + ) INTO task_exists; + + IF NOT task_exists THEN + RETURN FALSE; + END IF; + + UPDATE archon_tasks + SET + archived = TRUE, + archived_at = NOW(), + archived_by = archived_by_param, + updated_at = NOW() + WHERE id = task_id_param; + + UPDATE archon_tasks + SET + archived = TRUE, + archived_at = NOW(), + archived_by = archived_by_param, + updated_at = NOW() + WHERE parent_task_id = task_id_param AND archived = FALSE; + + RETURN TRUE; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON COLUMN archon_tasks.assignee IS 'The agent or user assigned to this task. Can be any valid agent name or "User"'; +COMMENT ON COLUMN archon_tasks.priority IS 'Task priority level independent of visual ordering - used for semantic importance (low, medium, high, critical)'; +COMMENT ON COLUMN archon_tasks.archived IS 'Soft delete flag - TRUE if task is archived/deleted'; +COMMENT ON COLUMN archon_tasks.archived_at IS 'Timestamp when task was archived'; +COMMENT ON COLUMN archon_tasks.archived_by IS 'User/system that archived the task'; + +COMMENT ON TABLE archon_document_versions IS 'Version control for JSONB fields in projects only - task versioning has been removed to simplify MCP operations'; +COMMENT ON COLUMN archon_document_versions.field_name IS 'Name of JSONB field being versioned (docs, features, data) - task fields and prd removed as unused'; +COMMENT ON COLUMN archon_document_versions.content IS 'Full snapshot of field content at this version'; +COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: create, update, delete, restore, backup'; +COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed'; +COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data'; + +-- ===================================================== +-- SECTION 7: MIGRATION TRACKING +-- ===================================================== + +CREATE TABLE IF NOT EXISTS archon_migrations ( + id UUID DEFAULT gen_random_uuid() PRIMARY KEY, + version VARCHAR(20) NOT NULL, + migration_name VARCHAR(255) NOT NULL, + applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + checksum VARCHAR(32), + UNIQUE(version, migration_name) +); + +CREATE INDEX IF NOT EXISTS idx_archon_migrations_version ON archon_migrations(version); +CREATE INDEX IF NOT EXISTS idx_archon_migrations_applied_at ON archon_migrations(applied_at DESC); + +COMMENT ON TABLE archon_migrations IS 'Tracks database migrations that have been applied to maintain schema version consistency'; +COMMENT ON COLUMN archon_migrations.version IS 'Archon version that introduced this migration'; +COMMENT ON COLUMN archon_migrations.migration_name IS 'Filename of the migration SQL file'; +COMMENT ON COLUMN archon_migrations.applied_at IS 'Timestamp when migration was applied'; +COMMENT ON COLUMN archon_migrations.checksum IS 'Optional MD5 checksum of migration file content'; + +INSERT INTO archon_migrations (version, migration_name) +VALUES + ('0.1.0', '001_add_source_url_display_name'), + ('0.1.0', '002_add_hybrid_search_tsvector'), + ('0.1.0', '003_ollama_add_columns'), + ('0.1.0', '004_ollama_migrate_data'), + ('0.1.0', '005_ollama_create_functions'), + ('0.1.0', '006_ollama_create_indexes_optional'), + ('0.1.0', '007_add_priority_column_to_tasks'), + ('0.1.0', '008_add_migration_tracking'), + ('0.1.0', '009_add_cascade_delete_constraints'), + ('0.1.0', '010_add_provider_placeholders'), + ('0.1.0', '011_add_page_metadata_table') +ON CONFLICT (version, migration_name) DO NOTHING; + +ALTER TABLE archon_migrations ENABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS "Allow public full access to archon_migrations" ON archon_migrations; +CREATE POLICY "Allow public full access to archon_migrations" ON archon_migrations + FOR ALL USING (true); + +-- ===================================================== +-- SECTION 8: PROMPTS TABLE +-- ===================================================== + +CREATE TABLE IF NOT EXISTS archon_prompts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + prompt_name TEXT UNIQUE NOT NULL, + prompt TEXT NOT NULL, + description TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_archon_prompts_name ON archon_prompts(prompt_name); + +CREATE OR REPLACE TRIGGER update_archon_prompts_updated_at + BEFORE UPDATE ON archon_prompts + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +-- ===================================================== +-- SECTION 9: RLS POLICIES FOR PROJECTS MODULE (LOCAL MODE) +-- ===================================================== + +ALTER TABLE archon_projects ENABLE ROW LEVEL SECURITY; +ALTER TABLE archon_tasks ENABLE ROW LEVEL SECURITY; +ALTER TABLE archon_project_sources ENABLE ROW LEVEL SECURITY; +ALTER TABLE archon_document_versions ENABLE ROW LEVEL SECURITY; +ALTER TABLE archon_prompts ENABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS "Allow public full access to archon_projects" ON archon_projects; +DROP POLICY IF EXISTS "Allow public full access to archon_tasks" ON archon_tasks; +DROP POLICY IF EXISTS "Allow public full access to archon_project_sources" ON archon_project_sources; +DROP POLICY IF EXISTS "Allow public full access to archon_document_versions" ON archon_document_versions; +DROP POLICY IF EXISTS "Allow public full access to archon_prompts" ON archon_prompts; + +CREATE POLICY "Allow public full access to archon_projects" ON archon_projects + FOR ALL USING (true); + +CREATE POLICY "Allow public full access to archon_tasks" ON archon_tasks + FOR ALL USING (true); + +CREATE POLICY "Allow public full access to archon_project_sources" ON archon_project_sources + FOR ALL USING (true); + +CREATE POLICY "Allow public full access to archon_document_versions" ON archon_document_versions + FOR ALL USING (true); + +CREATE POLICY "Allow public full access to archon_prompts" ON archon_prompts + FOR ALL USING (true); + +-- ===================================================== +-- SECTION 10: DEFAULT PROMPTS DATA +-- ===================================================== + +INSERT INTO archon_prompts (prompt_name, prompt, description) VALUES +('document_builder', 'SYSTEM PROMPT – Document-Builder Agent + +⸻ + +1. Mission + +You are the Document-Builder Agent. Your sole purpose is to transform a user''s natural-language description of work (a project, feature, or refactor) into a structured JSON record stored in the docs table. Produce documentation that is concise yet thorough—clear enough for an engineer to act after a single read-through. + +⸻ + +2. Workflow + 1. Classify requests → Decide which document type fits best: + • PRD – net-new product or major initiative. + • FEATURE_SPEC – incremental feature expressed in user-story form. + • REFACTOR_PLAN – internal code quality improvement. + 2. Clarify (if needed) → If the description is ambiguous, ask exactly one clarifying question, then continue. + 3. Generate JSON → Build an object that follows the schema below and insert (or return) it for the docs table. + +⸻ + +3. docs JSON Schema + +{ + "id": "uuid|string", // generate using uuid + "doc_type": "PRD | FEATURE_SPEC | REFACTOR_PLAN", + "title": "string", // short, descriptive + "author": "string", // requestor name + "body": { /* see templates below */ }, + "created_at": "ISO-8601", + "updated_at": "ISO-8601" +} + +⸻ + +4. Section Templates + +PRD → body must include + • Background_and_Context + • Problem_Statement + • Goals_and_Success_Metrics + • Non_Goals + • Assumptions + • Stakeholders + • User_Personas + • Functional_Requirements // bullet list or user stories + • Technical_Requirements // tech stack, APIs, data + • UX/UI_and_Style_Guidelines + • Architecture_Overview // diagram link or text + • Milestones_and_Timeline + • Risks_and_Mitigations + • Open_Questions + +FEATURE_SPEC → body must include + • Epic + • User_Stories // list of { id, as_a, i_want, so_that } + • Acceptance_Criteria // Given / When / Then + • Edge_Cases + • Dependencies + • Technical_Notes + • Design_References + • Metrics + • Risks + +REFACTOR_PLAN → body must include + • Current_State_Summary + • Refactor_Goals + • Design_Principles_and_Best_Practices + • Proposed_Approach // step-by-step plan + • Impacted_Areas + • Test_Strategy + • Roll_Back_and_Recovery + • Timeline + • Risks + +⸻ + +5. Writing Guidelines + • Brevity with substance: no fluff, no filler, no passive voice. + • Markdown inside strings: use headings, lists, and code fences for clarity. + • Consistent conventions: ISO dates, 24-hour times, SI units. + • Insert "TBD" where information is genuinely unknown. + • Produce valid JSON only—no comments or trailing commas. + +⸻ + +6. Example Output (truncated) + +{ + "id": "01HQ2VPZ62KSF185Y54MQ93VD2", + "doc_type": "PRD", + "title": "Real-time Collaboration for Docs", + "author": "Sean", + "body": { + "Background_and_Context": "Customers need to co-edit documents ...", + "Problem_Statement": "Current single-editor flow slows teams ...", + "Goals_and_Success_Metrics": "Reduce hand-off time by 50% ..." + /* remaining sections */ + }, + "created_at": "2025-06-17T00:10:00-04:00", + "updated_at": "2025-06-17T00:10:00-04:00" +} + +⸻ + +Remember: Your output is the JSON itself—no explanatory prose before or after. Stay sharp, write once, write right.', 'System prompt for DocumentAgent to create structured documentation following the Document-Builder pattern'), + +('feature_builder', 'SYSTEM PROMPT – Feature-Builder Agent + +⸻ + +1. Mission + +You are the Feature-Builder Agent. Your purpose is to transform user descriptions of features into structured feature plans stored in the features array. Create feature documentation that developers can implement directly. + +⸻ + +2. Feature JSON Schema + +{ + "id": "uuid|string", // generate using uuid + "feature_type": "feature_plan", // always "feature_plan" + "name": "string", // short feature name + "title": "string", // descriptive title + "content": { + "feature_overview": { + "name": "string", + "description": "string", + "priority": "high|medium|low", + "estimated_effort": "string" + }, + "user_stories": ["string"], // list of user stories + "react_flow_diagram": { // optional visual flow + "nodes": [...], + "edges": [...], + "viewport": {...} + }, + "acceptance_criteria": ["string"], // testable criteria + "technical_notes": { + "frontend_components": ["string"], + "backend_endpoints": ["string"], + "database_changes": "string" + } + }, + "created_by": "string" // author +} + +⸻ + +3. Writing Guidelines + • Focus on implementation clarity + • Include specific technical details + • Define clear acceptance criteria + • Consider edge cases + • Keep descriptions actionable + +⸻ + +Remember: Create structured, implementable feature plans.', 'System prompt for creating feature plans in the features array'), + +('data_builder', 'SYSTEM PROMPT – Data-Builder Agent + +⸻ + +1. Mission + +You are the Data-Builder Agent. Your purpose is to transform descriptions of data models into structured ERDs and schemas stored in the data array. Create clear data models that can guide database implementation. + +⸻ + +2. Data JSON Schema + +{ + "id": "uuid|string", // generate using uuid + "data_type": "erd", // always "erd" for now + "name": "string", // system name + "title": "string", // descriptive title + "content": { + "entities": [...], // entity definitions + "relationships": [...], // entity relationships + "sql_schema": "string", // Generated SQL + "mermaid_diagram": "string", // Optional diagram + "notes": { + "indexes": ["string"], + "constraints": ["string"], + "diagram_tool": "string", + "normalization_level": "string", + "scalability_notes": "string" + } + }, + "created_by": "string" // author +} + +⸻ + +3. Writing Guidelines + • Follow database normalization principles + • Include proper indexes and constraints + • Consider scalability from the start + • Provide clear relationship definitions + • Generate valid, executable SQL + +⸻ + +Remember: Create production-ready data models.', 'System prompt for creating data models in the data array'); + +-- ===================================================== +-- SETUP COMPLETE (LOCAL DATABASE MODE) +-- ===================================================== +-- Your local Archon database is now fully configured! +-- +-- All data stays on your machine. No cloud dependencies. +-- ===================================================== diff --git a/local-db/nginx.conf b/local-db/nginx.conf new file mode 100644 index 0000000000..53af3fcbd3 --- /dev/null +++ b/local-db/nginx.conf @@ -0,0 +1,58 @@ +events { + worker_connections 1024; +} + +http { + upstream postgrest { + server archon-postgrest:3000; + keepalive 64; + } + + server { + listen 80; + server_name _; + + # Supabase-compatible REST API path + # supabase-py sends requests to /rest/v1/table_name + # PostgREST serves them at /table_name + location /rest/v1/ { + # Strip /rest/v1/ prefix + rewrite ^/rest/v1/(.*) /$1 break; + + proxy_pass http://postgrest; + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # PostgREST expects these headers + proxy_set_header Accept "application/json"; + proxy_set_header Content-Type "application/json"; + proxy_set_header Prefer "return=representation"; + } + + # Direct RPC function calls + # supabase-py sends RPC calls to /rest/v1/rpc/function_name + location /rest/v1/rpc/ { + rewrite ^/rest/v1/rpc/(.*) /rpc/$1 break; + + proxy_pass http://postgrest; + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Accept "application/json"; + proxy_set_header Content-Type "application/json"; + } + + # Health check endpoint + location /health { + return 200 '{"status": "ok"}'; + add_header Content-Type application/json; + } + } +} diff --git a/python/src/server/config/config.py b/python/src/server/config/config.py index aeec4bec9d..d6249477a1 100644 --- a/python/src/server/config/config.py +++ b/python/src/server/config/config.py @@ -68,14 +68,14 @@ def validate_supabase_key(supabase_key: str) -> tuple[bool, str]: # Also skip all other validations (aud, exp, etc) since we only care about the role decoded = jwt.decode( supabase_key, - '', + "", options={ "verify_signature": False, "verify_aud": False, "verify_exp": False, "verify_nbf": False, - "verify_iat": False - } + "verify_iat": False, + }, ) role = decoded.get("role") @@ -140,47 +140,60 @@ def load_environment_config() -> EnvironmentConfig: # OpenAI API key is optional at startup - can be set via API openai_api_key = os.getenv("OPENAI_API_KEY") + # Check if running in local database mode + local_db = os.getenv("LOCAL_DB", "false").lower() == "true" + # Required environment variables for database access supabase_url = os.getenv("SUPABASE_URL") if not supabase_url: - raise ConfigurationError("SUPABASE_URL environment variable is required") + if local_db: + # Auto-configure for local PostgREST + local_rest_port = os.getenv("LOCAL_REST_PORT", "3002") + supabase_url = f"http://archon-postgrest-proxy:{local_rest_port}" + else: + raise ConfigurationError("SUPABASE_URL environment variable is required (or set LOCAL_DB=true)") supabase_service_key = os.getenv("SUPABASE_SERVICE_KEY") if not supabase_service_key: - raise ConfigurationError("SUPABASE_SERVICE_KEY environment variable is required") + if local_db: + # Use a placeholder key for local mode - no JWT validation needed + supabase_service_key = "local-db-key" + else: + raise ConfigurationError("SUPABASE_SERVICE_KEY environment variable is required (or set LOCAL_DB=true)") # Validate required fields if openai_api_key: validate_openai_api_key(openai_api_key) validate_supabase_url(supabase_url) - # Validate Supabase key type - is_valid_key, key_message = validate_supabase_key(supabase_service_key) - if not is_valid_key: - if key_message == "ANON_KEY_DETECTED": - raise ConfigurationError( - "CRITICAL: You are using a Supabase ANON key instead of a SERVICE key.\n\n" - "The ANON key is a public key with read-only permissions that cannot write to the database.\n" - "This will cause all database operations to fail with 'permission denied' errors.\n\n" - "To fix this:\n" - "1. Go to your Supabase project dashboard\n" - "2. Navigate to Settings > API keys\n" - "3. Find the 'service_role' key (NOT the 'anon' key)\n" - "4. Update your SUPABASE_SERVICE_KEY environment variable\n\n" - "Key characteristics:\n" - "- ANON key: Starts with 'eyJ...' and has role='anon' (public, read-only)\n" - "- SERVICE key: Starts with 'eyJ...' and has role='service_role' (private, full access)\n\n" - "Current key role detected: anon" - ) - elif key_message.startswith("UNKNOWN_KEY_TYPE:"): - role = key_message.split(":", 1)[1] - raise ConfigurationError( - f"CRITICAL: Unknown Supabase key role '{role}'.\n\n" - f"Expected 'service_role' but found '{role}'.\n" - f"This key type is not supported and will likely cause failures.\n\n" - f"Please use a valid service_role key from your Supabase dashboard." - ) - # For UNABLE_TO_VALIDATE, we continue silently + # Skip Supabase key validation in local database mode + if not local_db: + is_valid_key, key_message = validate_supabase_key(supabase_service_key) + if not is_valid_key: + if key_message == "ANON_KEY_DETECTED": + raise ConfigurationError( + "CRITICAL: You are using a Supabase ANON key instead of a SERVICE key.\n\n" + "The ANON key is a public key with read-only permissions that cannot write to the database.\n" + "This will cause all database operations to fail with 'permission denied' errors.\n\n" + "To fix this:\n" + "1. Go to your Supabase project dashboard\n" + "2. Navigate to Settings > API keys\n" + "3. Find the 'service_role' key (NOT the 'anon' key)\n" + "4. Update your SUPABASE_SERVICE_KEY environment variable\n\n" + "Key characteristics:\n" + "- ANON key: Starts with 'eyJ...' and has role='anon' (public, read-only)\n" + "- SERVICE key: Starts with 'eyJ...' and has role='service_role' (private, full access)\n\n" + "Current key role detected: anon" + ) + elif key_message.startswith("UNKNOWN_KEY_TYPE:"): + role = key_message.split(":", 1)[1] + raise ConfigurationError( + f"CRITICAL: Unknown Supabase key role '{role}'.\n\n" + f"Expected 'service_role' but found '{role}'.\n" + f"This key type is not supported and will likely cause failures.\n\n" + f"Please use a valid service_role key from your Supabase dashboard." + ) + # For UNABLE_TO_VALIDATE, we continue silently # Optional environment variables with defaults host = os.getenv("HOST", "0.0.0.0") diff --git a/python/src/server/services/client_manager.py b/python/src/server/services/client_manager.py index e9c60823e9..7098af29e5 100644 --- a/python/src/server/services/client_manager.py +++ b/python/src/server/services/client_manager.py @@ -16,16 +16,26 @@ def get_supabase_client() -> Client: """ Get a Supabase client instance. + In local database mode (LOCAL_DB=true), the URL points to the local + PostgREST proxy and the key is a placeholder. + Returns: Supabase client instance """ url = os.getenv("SUPABASE_URL") key = os.getenv("SUPABASE_SERVICE_KEY") + local_db = os.getenv("LOCAL_DB", "false").lower() == "true" if not url or not key: - raise ValueError( - "SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in environment variables" - ) + if local_db: + local_rest_port = os.getenv("LOCAL_REST_PORT", "3002") + url = f"http://archon-postgrest-proxy:{local_rest_port}" + key = "local-db-key" + else: + raise ValueError( + "SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in environment variables " + "(or set LOCAL_DB=true for local database mode)" + ) try: # Let Supabase handle connection pooling internally @@ -36,6 +46,8 @@ def get_supabase_client() -> Client: if match: project_id = match.group(1) search_logger.debug(f"Supabase client initialized - project_id={project_id}") + elif local_db: + search_logger.debug("Supabase client initialized - local database mode (PostgREST)") return client except Exception as e: diff --git a/python/src/server/services/credential_service.py b/python/src/server/services/credential_service.py index 8fe680f309..9ed411942b 100644 --- a/python/src/server/services/credential_service.py +++ b/python/src/server/services/credential_service.py @@ -36,8 +36,6 @@ class CredentialItem: description: str | None = None - - class CredentialService: """Service for managing application credentials and configuration.""" @@ -57,11 +55,15 @@ def _get_supabase_client(self) -> Client: if self._supabase is None: url = os.getenv("SUPABASE_URL") key = os.getenv("SUPABASE_SERVICE_KEY") + local_db = os.getenv("LOCAL_DB", "false").lower() == "true" if not url or not key: - raise ValueError( - "SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in environment variables" - ) + if local_db: + local_rest_port = os.getenv("LOCAL_REST_PORT", "3002") + url = f"http://archon-postgrest-proxy:{local_rest_port}" + key = "local-db-key" + else: + raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in environment variables") try: # Initialize with standard Supabase client - no need for custom headers @@ -72,6 +74,8 @@ def _get_supabase_client(self) -> Client: if match: project_id = match.group(1) logger.debug(f"Supabase client initialized for project: {project_id}") + elif local_db: + logger.debug("Supabase client initialized - local database mode (PostgREST)") else: logger.debug("Supabase client initialized successfully") @@ -244,6 +248,7 @@ async def set_credential( # Also invalidate provider service cache to ensure immediate effect try: from .llm_provider_service import clear_provider_cache + clear_provider_cache() logger.debug("Also cleared LLM provider service cache") except Exception as e: @@ -252,6 +257,7 @@ async def set_credential( # Also invalidate LLM provider service cache for provider config try: from . import llm_provider_service + # Clear the provider config caches that depend on RAG settings cache_keys_to_clear = ["provider_config_llm", "provider_config_embedding", "rag_strategy_settings"] for cache_key in cache_keys_to_clear: @@ -263,9 +269,7 @@ async def set_credential( except Exception as e: logger.error(f"Error invalidating LLM provider service cache: {e}") - logger.info( - f"Successfully {'encrypted and ' if is_encrypted else ''}stored credential: {key}" - ) + logger.info(f"Successfully {'encrypted and ' if is_encrypted else ''}stored credential: {key}") return True except Exception as e: @@ -294,6 +298,7 @@ async def delete_credential(self, key: str) -> bool: # Also invalidate provider service cache to ensure immediate effect try: from .llm_provider_service import clear_provider_cache + clear_provider_cache() logger.debug("Also cleared LLM provider service cache") except Exception as e: @@ -302,6 +307,7 @@ async def delete_credential(self, key: str) -> bool: # Also invalidate LLM provider service cache for provider config try: from . import llm_provider_service + # Clear the provider config caches that depend on RAG settings cache_keys_to_clear = ["provider_config_llm", "provider_config_embedding", "rag_strategy_settings"] for cache_key in cache_keys_to_clear: @@ -340,9 +346,7 @@ async def get_credentials_by_category(self, category: str) -> dict[str, Any]: try: supabase = self._get_supabase_client() - result = ( - supabase.table("archon_settings").select("*").eq("category", category).execute() - ) + result = supabase.table("archon_settings").select("*").eq("category", category).execute() credentials = {} for item in result.data: @@ -445,16 +449,20 @@ async def get_active_provider(self, service_type: str = "llm") -> dict[str, Any] # Validate that embedding provider actually supports embeddings embedding_capable_providers = {"openai", "google", "ollama"} - if (explicit_embedding_provider and - explicit_embedding_provider != "" and - explicit_embedding_provider in embedding_capable_providers): + if ( + explicit_embedding_provider + and explicit_embedding_provider != "" + and explicit_embedding_provider in embedding_capable_providers + ): # Use the explicitly set embedding provider provider = explicit_embedding_provider logger.debug(f"Using explicit embedding provider: '{provider}'") else: # Fall back to OpenAI as default embedding provider for backward compatibility if explicit_embedding_provider and explicit_embedding_provider not in embedding_capable_providers: - logger.warning(f"Invalid embedding provider '{explicit_embedding_provider}' doesn't support embeddings, defaulting to OpenAI") + logger.warning( + f"Invalid embedding provider '{explicit_embedding_provider}' doesn't support embeddings, defaulting to OpenAI" + ) provider = "openai" logger.debug(f"No explicit embedding provider set, defaulting to OpenAI for backward compatibility") else: