diff --git a/CLAUDE.md b/CLAUDE.md index f02314cde..75c50699e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -84,6 +84,27 @@ PostgreSQL with pgvector. Schema managed via Alembic migrations in `hindsight-ap Key tables: `banks`, `memory_units`, `documents`, `entities`, `entity_links` +### Database Backups (IMPORTANT) +**Before any operation that may affect the database, run a backup:** +```bash +docker exec hindsight /backups/backup.sh +``` + +Operations requiring backup: +- Running database migrations +- Modifying Alembic migration files +- Rebuilding Docker images +- Resetting or recreating containers +- Any schema changes +- Bulk data operations + +Backups are stored in `~/hindsight-backups/` on the host. + +To restore: +```bash +docker exec -it hindsight /backups/restore.sh +``` + ## Key Conventions ### Memory Banks diff --git a/docker/standalone/Dockerfile b/docker/standalone/Dockerfile index a68471117..9b761805f 100644 --- a/docker/standalone/Dockerfile +++ b/docker/standalone/Dockerfile @@ -157,9 +157,7 @@ USER hindsight # Set PATH for hindsight user ENV PATH="/app/api/.venv/bin:${PATH}" -# Pre-cache PostgreSQL binaries by starting/stopping pg0-embedded -ENV PG0_HOME=/home/hindsight/.pg0-cache - +# pg0 will download PostgreSQL binaries on first run ENV PG0_HOME=/home/hindsight/.pg0 # Pre-download ML models to avoid runtime download (conditional) @@ -272,16 +270,17 @@ USER hindsight ENV PATH="/app/api/.venv/bin:${PATH}" # Pre-cache PostgreSQL binaries by starting/stopping pg0-embedded -ENV PG0_HOME=/home/hindsight/.pg0-cache +# Note: We use a temp instance just to download binaries, then delete instance data +# to avoid stale port config. Only installation binaries are kept. +ENV PG0_HOME=/home/hindsight/.pg0 RUN /app/api/.venv/bin/python -c "\ from pg0 import Pg0; \ print('Pre-caching PostgreSQL binaries...'); \ -pg = Pg0(name='hindsight', port=5555, username='hindsight', password='hindsight', database='hindsight'); \ +pg = Pg0(name='temp-cache', username='hindsight', password='hindsight', database='hindsight'); \ pg.start(); \ pg.stop(); \ -print('PostgreSQL pre-cached to PG0_HOME')" || echo "Pre-download skipped" - -ENV PG0_HOME=/home/hindsight/.pg0 +print('PostgreSQL binaries cached')" && \ + rm -rf /home/hindsight/.pg0/instances || echo "Pre-download skipped" # Pre-download ML models to avoid runtime download (conditional) ARG PRELOAD_ML_MODELS diff --git a/docker/standalone/start-all.sh b/docker/standalone/start-all.sh index 0206534fc..e4e5d5275 100755 --- a/docker/standalone/start-all.sh +++ b/docker/standalone/start-all.sh @@ -5,16 +5,70 @@ set -e ENABLE_API="${HINDSIGHT_ENABLE_API:-true}" ENABLE_CP="${HINDSIGHT_ENABLE_CP:-true}" -# Copy pre-cached PostgreSQL data if runtime directory is empty (first run with volume) -if [ "$ENABLE_API" = "true" ]; then - PG0_CACHE="/home/hindsight/.pg0-cache" - PG0_HOME="/home/hindsight/.pg0" - if [ -d "$PG0_CACHE" ] && [ "$(ls -A $PG0_CACHE 2>/dev/null)" ]; then - if [ ! "$(ls -A $PG0_HOME 2>/dev/null)" ]; then - echo "📦 Copying pre-cached PostgreSQL data..." - cp -r "$PG0_CACHE"/* "$PG0_HOME"/ 2>/dev/null || true - fi +# ============================================================================= +# Dependency waiting (opt-in via HINDSIGHT_WAIT_FOR_DEPS=true) +# +# Problem: When running with LM Studio, the LLM may take time to load models. +# If Hindsight starts before LM Studio is ready, it fails on LLM verification. +# This wait loop ensures dependencies are ready before starting. +# ============================================================================= +if [ "${HINDSIGHT_WAIT_FOR_DEPS:-false}" = "true" ]; then + LLM_BASE_URL="${HINDSIGHT_API_LLM_BASE_URL:-http://host.docker.internal:1234/v1}" + MAX_RETRIES="${HINDSIGHT_RETRY_MAX:-0}" # 0 = infinite + RETRY_INTERVAL="${HINDSIGHT_RETRY_INTERVAL:-10}" + + # Check if external database is configured (skip check for embedded pg0) + SKIP_DB_CHECK=false + if [ -z "${HINDSIGHT_API_DATABASE_URL}" ]; then + SKIP_DB_CHECK=true + else + DB_CHECK_HOST=$(echo "$HINDSIGHT_API_DATABASE_URL" | sed -E 's|.*@([^:/]+):([0-9]+)/.*|\1 \2|') fi + + check_db() { + if $SKIP_DB_CHECK; then + return 0 + fi + if command -v pg_isready &> /dev/null; then + pg_isready -h $(echo $DB_CHECK_HOST | cut -d' ' -f1) -p $(echo $DB_CHECK_HOST | cut -d' ' -f2) &>/dev/null + else + python3 -c "import socket; s=socket.socket(); s.settimeout(5); exit(0 if s.connect_ex(('$(echo $DB_CHECK_HOST | cut -d' ' -f1)', $(echo $DB_CHECK_HOST | cut -d' ' -f2))) == 0 else 1)" 2>/dev/null + fi + } + + check_llm() { + curl -sf "${LLM_BASE_URL}/models" --connect-timeout 5 &>/dev/null + } + + echo "⏳ Waiting for dependencies to be ready..." + attempt=1 + + while true; do + db_ok=false + llm_ok=false + + if check_db; then + db_ok=true + fi + + if check_llm; then + llm_ok=true + fi + + if $db_ok && $llm_ok; then + echo "✅ Dependencies ready!" + break + fi + + if [ "$MAX_RETRIES" -ne 0 ] && [ "$attempt" -ge "$MAX_RETRIES" ]; then + echo "❌ Max retries ($MAX_RETRIES) reached. Dependencies not available." + exit 1 + fi + + echo " Attempt $attempt: DB=$( $db_ok && echo 'ok' || echo 'waiting' ), LLM=$( $llm_ok && echo 'ok' || echo 'waiting' )" + sleep "$RETRY_INTERVAL" + ((attempt++)) + done fi # Track PIDs for wait diff --git a/hindsight-api/hindsight_api/config.py b/hindsight-api/hindsight_api/config.py index 34a68f0e2..8762fd3fb 100644 --- a/hindsight-api/hindsight_api/config.py +++ b/hindsight-api/hindsight_api/config.py @@ -18,6 +18,7 @@ ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL" ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT" ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT" +ENV_LLM_STRIP_THINKING = "HINDSIGHT_API_LLM_STRIP_THINKING" ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER" ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL" diff --git a/hindsight-api/hindsight_api/engine/llm_wrapper.py b/hindsight-api/hindsight_api/engine/llm_wrapper.py index 8b9a32c79..e2daa8e52 100644 --- a/hindsight-api/hindsight_api/engine/llm_wrapper.py +++ b/hindsight-api/hindsight_api/engine/llm_wrapper.py @@ -6,6 +6,7 @@ import json import logging import os +import re import time from typing import Any @@ -19,6 +20,7 @@ DEFAULT_LLM_MAX_CONCURRENT, DEFAULT_LLM_TIMEOUT, ENV_LLM_MAX_CONCURRENT, + ENV_LLM_STRIP_THINKING, ENV_LLM_TIMEOUT, ) @@ -308,6 +310,20 @@ async def call( content = response.choices[0].message.content + # Strip reasoning model thinking tags when enabled (opt-in for local LLMs) + # Supports: , , , |startthink|/|endthink| + # Enable with HINDSIGHT_API_LLM_STRIP_THINKING=true for reasoning models + # that embed thinking in their output (e.g., Qwen3, DeepSeek on LM Studio) + if content and os.getenv(ENV_LLM_STRIP_THINKING, "false").lower() == "true": + original_len = len(content) + content = re.sub(r".*?", "", content, flags=re.DOTALL) + content = re.sub(r".*?", "", content, flags=re.DOTALL) + content = re.sub(r".*?", "", content, flags=re.DOTALL) + content = re.sub(r"\|startthink\|.*?\|endthink\|", "", content, flags=re.DOTALL) + content = content.strip() + if len(content) < original_len: + logger.debug(f"Stripped {original_len - len(content)} chars of reasoning tokens") + # For local models, they may wrap JSON in markdown code blocks if self.provider in ("lmstudio", "ollama"): clean_content = content