SanghunYun95 · SanghunYun95 · Feb 26, 2026 · Feb 25, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/app/services/embedding.py b/app/services/embedding.py
@@ -1,27 +1,24 @@
-from langchain_google_genai import GoogleGenerativeAIEmbeddings
-from app.core.config import settings
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
 
-# Google's text-embedding-004 is a powerful model.
-# By default it produces 768 dimensions.
-# Since the database was initialized with 1536 dimensions, 
-# we should ideally match it or update the database schema.
-# For now, we'll use Gemini embeddings and recommend a schema update if necessary.
-
-MODEL_NAME = "models/gemini-embedding-001"
+# FastEmbed provides the exact same model (all-MiniLM-L6-v2) but via ONNX,
+# bypassing the need for PyTorch and Microsoft C++ Redistributables on Windows.
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 
 class EmbeddingService:
     def __init__(self):
-        # Using Gemini API for embeddings avoids the heavy local torch dependency
-        self.embeddings = GoogleGenerativeAIEmbeddings(
-            model=MODEL_NAME,
-            google_api_key=settings.GEMINI_API_KEY
+        print(f"Loading local embedding model: {MODEL_NAME} (FastEmbed)...")
+        self.embeddings = FastEmbedEmbeddings(
+            model_name=MODEL_NAME,
+            max_length=512
         )
+        print("Local embedding model loaded successfully.")
 
     def generate_embedding(self, text: str) -> list[float]:
         """
-        Generates a vector embedding for the given text using Gemini.
+        Generates a vector embedding for the given text using the FastEmbed model.
+        Returns a list of 384 floats.
         """
-        # The invoke method returns a list of floats
+        # The embed_query method returns a list of floats
         return self.embeddings.embed_query(text)
 
 # Singleton instance

diff --git a/push-error.log b/push-error.log
@@ -0,0 +1,22 @@
+node.exe : Initialising login role...
+위치 C:\Program Files\nodejs\npx.ps1:28 문자:
+12
++   $input | & $NODE_EXE $NPX_CLI_JS $arg
+s
++            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~
+    + CategoryInfo          : NotSpecifi 
+   ed: (Initialising login role...:Stri  
+  ng) [], RemoteException
+    + FullyQualifiedErrorId : NativeComm 
+   andError
+
+Connecting to remote database...
+Do you want to push these migrations to t
+he remote database?
+ ??20260225141500_add_hnsw_index.sql
+
+ [Y/n] y
+Applying migration 20260225141500_add_hns
+w_index.sql...
+Finished supabase db push.
diff --git a/requirements.txt b/requirements.txt
@@ -8,3 +8,4 @@ sse-starlette
 pydantic>=2.7.0
 pydantic-settings
 python-dotenv
+fastembed
diff --git a/scripts/ingest_all_data.py b/scripts/ingest_all_data.py
@@ -1,6 +1,9 @@
 import os
 import sys
 
+# Force UTF-8 encoding for standard output to prevent cp949 encoding errors on Windows
+if sys.stdout.encoding != 'utf-8':
+    sys.stdout.reconfigure(encoding='utf-8')
 # Ensure we can import app modules
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 

diff --git a/supabase/migrations/20260226140500_update_vector_to_mini_lm.sql b/supabase/migrations/20260226140500_update_vector_to_mini_lm.sql
@@ -0,0 +1,43 @@
+-- This migration changes the `embedding` column dimension from 3072 to 384
+-- to support the local `all-MiniLM-L6-v2` model.
+
+-- 1. Drop the existing HNSW index and match_documents function 
+DROP INDEX IF EXISTS documents_embedding_idx;
+DROP FUNCTION IF EXISTS match_documents;
+
+-- 2. Clear existing incompatible 3072-dimension vectors to avoid casting errors
+TRUNCATE TABLE documents;
+
+-- 3. Alter the column type now that the table is empty
+ALTER TABLE documents 
+ALTER COLUMN embedding TYPE vector(384);
+
+-- 3. Recreate the match_documents function with the new dimension
+create or replace function match_documents (
+  query_embedding vector(384),
+  match_count int DEFAULT null,
+  filter jsonb DEFAULT '{}'
+) returns table (
+  id uuid,
+  content text,
+  metadata jsonb,
+  similarity float
+)
+language plpgsql
+as $$
+begin
+  return query
+  select
+    documents.id,
+    documents.content,
+    documents.metadata,
+    1 - (documents.embedding <=> query_embedding) as similarity
+  from documents
+  where documents.metadata @> filter
+  order by documents.embedding <=> query_embedding
+  limit match_count;
+end;
+$$;
+
+-- 4. Recreate the HNSW index for the 384 dimension
+CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
diff --git a/verify_and_clear.py b/verify_and_clear.py
@@ -18,9 +18,9 @@ def verify_and_clear():
         print(f"Error clearing table: {e}")
         return
 
-    print("Verifying vector dimension (3072)...")
-    # Generate a dummy 3072-dimensional vector
-    dummy_vector = [0.0] * 3072
+    print("Verifying vector dimension (384)...")
+    # Generate a dummy 384-dimensional vector
+    dummy_vector = [0.0] * 384
     dummy_id = str(uuid.uuid4())
 
     try:
@@ -30,13 +30,13 @@ def verify_and_clear():
             "embedding": dummy_vector,
             "metadata": {"type": "verification"}
         }).execute()
-        print("Success: Vector dimension 3072 is accepted by the database.")
+        print("Success: Vector dimension 384 is accepted by the database.")
 
         # Clean up the dummy row
         supabase_client.table("documents").delete().eq("id", dummy_id).execute()
         print("Verification complete and database is clean.")
     except Exception as e:
-        print("Failed to insert 3072-dimensional vector. The schema might not be updated to 3072.")
+        print("Failed to insert 384-dimensional vector. The schema might not be updated to 384.")
         print(f"Error details: {e}")
 
 if __name__ == "__main__":