diff --git a/app/services/embedding.py b/app/services/embedding.py index 140ed26..b148316 100644 --- a/app/services/embedding.py +++ b/app/services/embedding.py @@ -1,27 +1,24 @@ -from langchain_google_genai import GoogleGenerativeAIEmbeddings -from app.core.config import settings +from langchain_community.embeddings.fastembed import FastEmbedEmbeddings -# Google's text-embedding-004 is a powerful model. -# By default it produces 768 dimensions. -# Since the database was initialized with 1536 dimensions, -# we should ideally match it or update the database schema. -# For now, we'll use Gemini embeddings and recommend a schema update if necessary. - -MODEL_NAME = "models/gemini-embedding-001" +# FastEmbed provides the exact same model (all-MiniLM-L6-v2) but via ONNX, +# bypassing the need for PyTorch and Microsoft C++ Redistributables on Windows. +MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" class EmbeddingService: def __init__(self): - # Using Gemini API for embeddings avoids the heavy local torch dependency - self.embeddings = GoogleGenerativeAIEmbeddings( - model=MODEL_NAME, - google_api_key=settings.GEMINI_API_KEY + print(f"Loading local embedding model: {MODEL_NAME} (FastEmbed)...") + self.embeddings = FastEmbedEmbeddings( + model_name=MODEL_NAME, + max_length=512 ) + print("Local embedding model loaded successfully.") def generate_embedding(self, text: str) -> list[float]: """ - Generates a vector embedding for the given text using Gemini. + Generates a vector embedding for the given text using the FastEmbed model. + Returns a list of 384 floats. """ - # The invoke method returns a list of floats + # The embed_query method returns a list of floats return self.embeddings.embed_query(text) # Singleton instance diff --git a/push-error.log b/push-error.log new file mode 100644 index 0000000..245ee1d --- /dev/null +++ b/push-error.log @@ -0,0 +1,22 @@ +node.exe : Initialising login role... +위치 C:\Program Files\nodejs\npx.ps1:28 문자: +12 ++ $input | & $NODE_EXE $NPX_CLI_JS $arg +s ++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~ + + CategoryInfo : NotSpecifi + ed: (Initialising login role...:Stri + ng) [], RemoteException + + FullyQualifiedErrorId : NativeComm + andError + +Connecting to remote database... +Do you want to push these migrations to t +he remote database? + ??20260225141500_add_hnsw_index.sql + + [Y/n] y +Applying migration 20260225141500_add_hns +w_index.sql... +Finished supabase db push. diff --git a/requirements.txt b/requirements.txt index 17cb9b0..616e26a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ sse-starlette pydantic>=2.7.0 pydantic-settings python-dotenv +fastembed diff --git a/scripts/ingest_all_data.py b/scripts/ingest_all_data.py index 95b9ffd..6301f34 100644 --- a/scripts/ingest_all_data.py +++ b/scripts/ingest_all_data.py @@ -1,6 +1,9 @@ import os import sys +# Force UTF-8 encoding for standard output to prevent cp949 encoding errors on Windows +if sys.stdout.encoding != 'utf-8': + sys.stdout.reconfigure(encoding='utf-8') # Ensure we can import app modules sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) diff --git a/supabase/migrations/20260226140500_update_vector_to_mini_lm.sql b/supabase/migrations/20260226140500_update_vector_to_mini_lm.sql new file mode 100644 index 0000000..8ba0a43 --- /dev/null +++ b/supabase/migrations/20260226140500_update_vector_to_mini_lm.sql @@ -0,0 +1,43 @@ +-- This migration changes the `embedding` column dimension from 3072 to 384 +-- to support the local `all-MiniLM-L6-v2` model. + +-- 1. Drop the existing HNSW index and match_documents function +DROP INDEX IF EXISTS documents_embedding_idx; +DROP FUNCTION IF EXISTS match_documents; + +-- 2. Clear existing incompatible 3072-dimension vectors to avoid casting errors +TRUNCATE TABLE documents; + +-- 3. Alter the column type now that the table is empty +ALTER TABLE documents +ALTER COLUMN embedding TYPE vector(384); + +-- 3. Recreate the match_documents function with the new dimension +create or replace function match_documents ( + query_embedding vector(384), + match_count int DEFAULT null, + filter jsonb DEFAULT '{}' +) returns table ( + id uuid, + content text, + metadata jsonb, + similarity float +) +language plpgsql +as $$ +begin + return query + select + documents.id, + documents.content, + documents.metadata, + 1 - (documents.embedding <=> query_embedding) as similarity + from documents + where documents.metadata @> filter + order by documents.embedding <=> query_embedding + limit match_count; +end; +$$; + +-- 4. Recreate the HNSW index for the 384 dimension +CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops); diff --git a/verify_and_clear.py b/verify_and_clear.py index b9086ef..e108bf6 100644 --- a/verify_and_clear.py +++ b/verify_and_clear.py @@ -18,9 +18,9 @@ def verify_and_clear(): print(f"Error clearing table: {e}") return - print("Verifying vector dimension (3072)...") - # Generate a dummy 3072-dimensional vector - dummy_vector = [0.0] * 3072 + print("Verifying vector dimension (384)...") + # Generate a dummy 384-dimensional vector + dummy_vector = [0.0] * 384 dummy_id = str(uuid.uuid4()) try: @@ -30,13 +30,13 @@ def verify_and_clear(): "embedding": dummy_vector, "metadata": {"type": "verification"} }).execute() - print("Success: Vector dimension 3072 is accepted by the database.") + print("Success: Vector dimension 384 is accepted by the database.") # Clean up the dummy row supabase_client.table("documents").delete().eq("id", dummy_id).execute() print("Verification complete and database is clean.") except Exception as e: - print("Failed to insert 3072-dimensional vector. The schema might not be updated to 3072.") + print("Failed to insert 384-dimensional vector. The schema might not be updated to 384.") print(f"Error details: {e}") if __name__ == "__main__":