SanghunYun95 · SanghunYun95 · Mar 1, 2026 · Feb 27, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/README.md b/README.md
@@ -32,10 +32,12 @@ pip install -r requirements.txt
 
 Set up environment variables:
 Create a `.env` file in the `backend` directory based on the `.env.example` structure.
-You will need your `GEMINI_API_KEY`.
+You will need your `GEMINI_API_KEY`, as well as `SUPABASE_URL` and `SUPABASE_SERVICE_KEY` which are required by the backend configuration (`app/core/config.py`).
 ```bash
 # example .env contents
 GEMINI_API_KEY="your-api-key-here"
+SUPABASE_URL="your-supabase-url"
+SUPABASE_SERVICE_KEY="your-supabase-service-key"
 ```
 
 Start the backend server on `http://localhost:8000`:
@@ -73,4 +75,4 @@ Open [http://localhost:3000](http://localhost:3000) with your browser to see the
 
 ## Architecture Highlights
 - Frontend: Next.js 14+ (App Router), TailwindCSS, TypeScript, custom SSE streaming integration.
-- Backend: FastAPI, LangChain, SentenceTransformers embedding, and Supabase integration.
+- Backend: FastAPI, LangChain, HuggingFace embedding, and Supabase integration.
diff --git a/backend/app/api/routes/chat.py b/backend/app/api/routes/chat.py
@@ -1,47 +1,55 @@
 import json
 import asyncio
+import logging
 from fastapi import APIRouter, Request
 from pydantic import BaseModel
 from sse_starlette.sse import EventSourceResponse
 
-from app.services.llm import get_english_translation, get_response_stream
+from app.services.llm import get_english_translation, get_response_stream_async
 from app.services.embedding import embedding_service
-from app.services.database import supabase_client
+from app.services.database import get_client
 
 router = APIRouter()
+logger = logging.getLogger(__name__)
 
 class ChatRequest(BaseModel):
     query: str
 
+def _search_documents(query_vector):
+    return get_client().rpc(
+        'match_documents', 
+        {'query_embedding': query_vector, 'match_count': 3}
+    ).execute()
+
 async def generate_chat_events(request: Request, query: str):
     """
     Generator function that streams SSE events.
     It yields 'metadata' first, then chunks of 'content'.
     """
     # 1. Translate Korean query to English
     try:
-        english_query = get_english_translation(query)
-    except Exception as e:
+        english_query = await asyncio.to_thread(get_english_translation, query)
+    except Exception:
+        logger.exception("Failed to translate query")
         yield {"event": "error", "data": "오늘은 철학자도 사색의 시간이 필요하답니다. 내일 다시 지혜를 나누러 올게요."}
         return
 
     # 2. Generate vector representation
     try:
-        query_vector = embedding_service.generate_embedding(english_query)
+        query_vector = await asyncio.to_thread(embedding_service.generate_embedding, english_query)
     except Exception:
+        logger.exception("Failed to generate query embedding")
         yield {"event": "error", "data": "오늘은 철학자도 사색의 시간이 필요하답니다. 내일 다시 지혜를 나누러 올게요."}
         return
 
     # 3. Perform hybrid search in Supabase
     # We use the RPC match_documents function defined in schema.sql
     try:
-        response = supabase_client.rpc(
-            'match_documents', 
-            {'query_embedding': query_vector, 'match_count': 3}
-        ).execute()
+        response = await asyncio.to_thread(_search_documents, query_vector)
         documents = response.data
-    except Exception as e:
-        yield {"event": "error", "data": f"Database search failed: {str(e)}"}
+    except Exception:
+        logger.exception("Database search failed")
+        yield {"event": "error", "data": "검색 중 오류가 발생했습니다. 잠시 후 다시 시도해 주세요."}
         return
 
     if not documents:
@@ -72,17 +80,16 @@ async def generate_chat_events(request: Request, query: str):
     combined_context = "\n\n".join(contexts)
 
     try:
-        llm_stream = get_response_stream(context=combined_context, query=english_query)
-
-        for chunk in llm_stream:
+        async for chunk in get_response_stream_async(context=combined_context, query=english_query):
             # If client disconnects, stop generating
             if await request.is_disconnected():
                 break
 
             # Clean up chunk to avoid SSE formatting issues with newlines
             chunk_clean = chunk.replace("\n", "\\n")
             yield {"event": "content", "data": chunk_clean}
-    except Exception as e:
+    except Exception:
+        logger.exception("Failed while streaming LLM response")
         yield {"event": "error", "data": "오늘은 철학자도 사색의 시간이 필요하답니다. 내일 다시 지혜를 나누러 올게요."}
         return
 

diff --git a/backend/app/core/config.py b/backend/app/core/config.py
@@ -11,7 +11,7 @@ class Settings(BaseSettings):
     SUPABASE_SERVICE_KEY: str = "" # Use Service Role Key for backend operations
 
     model_config = SettingsConfigDict(
-        env_file=str(Path(__file__).resolve().parent.parent.parent / ".env"), 
+        env_file=str(Path(__file__).resolve().parents[2] / ".env"), 
         env_file_encoding="utf-8"
     )
 

diff --git a/backend/app/services/database.py b/backend/app/services/database.py
@@ -1,15 +1,30 @@
+import threading
 from supabase import create_client, Client
 from app.core.config import settings
 
-def get_supabase_client() -> Client:
+SUPABASE_CONFIG_ERROR = "SUPABASE_URL and SUPABASE_SERVICE_KEY must be configured"
+
+def _get_supabase_client() -> Client:
     """
     Returns a configured Supabase client using the URL and Service Key.
     The Service Key is used to bypass RLS for administrative backend tasks 
     like upserting documents or fetching metadata securely.
     """
-    supabase_url = settings.SUPABASE_URL or "http://localhost:8000"
-    supabase_key = settings.SUPABASE_SERVICE_KEY or "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSJ9.1234567890"
+    supabase_url = settings.SUPABASE_URL
+    supabase_key = settings.SUPABASE_SERVICE_KEY
+    if not supabase_url or not supabase_key:
+        raise RuntimeError(SUPABASE_CONFIG_ERROR)
     return create_client(supabase_url, supabase_key)
 
-# Initialize a global client to be reused
-supabase_client = get_supabase_client()
+
+_client_lock = threading.Lock()
+# Lazy initialization for Supabase client
+_supabase_client: Client | None = None
+
+def get_client() -> Client:
+    global _supabase_client
+    if _supabase_client is None:
+        with _client_lock:
+            if _supabase_client is None:
+                _supabase_client = _get_supabase_client()
+    return _supabase_client
diff --git a/backend/app/services/embedding.py b/backend/app/services/embedding.py
@@ -1,6 +1,10 @@
 import threading
+import logging
 from langchain_community.embeddings import HuggingFaceEmbeddings
 
+logger = logging.getLogger(__name__)
+
+
 MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 
 class EmbeddingService:
@@ -13,19 +17,19 @@ def embeddings(self):
         if self._embeddings is None:
             with self._lock:
                 if self._embeddings is None:
-                    print(f"Loading local embedding model: {MODEL_NAME} (HuggingFace)...")
+                    logger.info("Loading local embedding model: %s (HuggingFace)...", MODEL_NAME)
                     self._embeddings = HuggingFaceEmbeddings(
                         model_name=MODEL_NAME,
                         model_kwargs={'device': 'cpu'},
                         encode_kwargs={'normalize_embeddings': True}
                     )
-                    print("Local embedding model loaded successfully.")
+                    logger.info("Local embedding model loaded successfully.")
         return self._embeddings
 
     def generate_embedding(self, text: str) -> list[float]:
         """
-        Generates a vector embedding for the given text using the FastEmbed model.
-        Returns a list of 384 floats.
+        Generates a vector embedding for the given text using the HuggingFace model.
+        Returns a list of 384 floats matching the model's actual vector length.
         """
         # The embed_query method returns a list of floats
         embedding = self.embeddings.embed_query(text)

diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py
@@ -4,16 +4,19 @@
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_core.output_parsers import StrOutputParser
 
+if not settings.GEMINI_API_KEY:
+    raise RuntimeError("GEMINI_API_KEY must be configured")
+
 # Configure Gemini API natively (optional, if native SDK features are needed)
 genai.configure(api_key=settings.GEMINI_API_KEY)
 
 # Configure LangChain model
 # We use gemini-2.5-flash for faster and highly capable inference
 llm = ChatGoogleGenerativeAI(
     model="gemini-2.5-flash", 
-    google_api_key=settings.GEMINI_API_KEY or "dummy_key_for_testing",
+    google_api_key=settings.GEMINI_API_KEY,
     temperature=0.7,
-    max_retries=0
+    max_retries=2
 )
 
 translation_prompt = PromptTemplate.from_template(
@@ -58,3 +61,12 @@ def get_response_stream(context: str, query: str):
     prompt = get_rag_prompt()
     chain = prompt | llm | StrOutputParser()
     return chain.stream({"context": context, "query": query})
+
+async def get_response_stream_async(context: str, query: str):
+    """
+    Returns an async stream of strings from the LLM.
+    """
+    prompt = get_rag_prompt()
+    chain = prompt | llm | StrOutputParser()
+    async for chunk in chain.astream({"context": context, "query": query}):
+        yield chunk
diff --git a/backend/pytest_log.txt b/backend/pytest_log.txt
diff --git a/backend/pytest_log_utf8.txt b/backend/pytest_log_utf8.txt
@@ -0,0 +1,26 @@
+============================= test session starts =============================
+platform win32 -- Python 3.12.12, pytest-9.0.2, pluggy-1.6.0
+rootdir: C:\Users\ysn65\Desktop\antigravity\philo-rag\backend
+plugins: anyio-4.12.1, asyncio-1.3.0, cov-7.0.0
+asyncio: mode=Mode.STRICT, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function
+collected 0 items / 1 error
+
+=================================== ERRORS ====================================
+___________________ ERROR collecting tests/unit/test_llm.py ___________________
+tests\unit\test_llm.py:12: in <module>
+    from app.services.llm import get_english_translation, get_response_stream, get_response_stream_async
+app\services\llm.py:8: in <module>
+    raise RuntimeError("GEMINI_API_KEY must be configured")
+E   RuntimeError: GEMINI_API_KEY must be configured
+============================== warnings summary ===============================
+<frozen importlib._bootstrap>:488
+  <frozen importlib._bootstrap>:488: DeprecationWarning: Type google._upb._message.MessageMapContainer uses PyType_Spec with a metaclass that has custom tp_new. This is deprecated and will no longer be allowed in Python 3.14.
+
+<frozen importlib._bootstrap>:488
+  <frozen importlib._bootstrap>:488: DeprecationWarning: Type google._upb._message.ScalarMapContainer uses PyType_Spec with a metaclass that has custom tp_new. This is deprecated and will no longer be allowed in Python 3.14.
+
+-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
+=========================== short test summary info ===========================
+ERROR tests/unit/test_llm.py - RuntimeError: GEMINI_API_KEY must be configured
+!!!!!!!!!!!!!!!!!!! Interrupted: 1 error during collection !!!!!!!!!!!!!!!!!!!!
+======================== 2 warnings, 1 error in 4.52s =========================
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -10,4 +10,5 @@ pydantic>=2.7.0
 pydantic-settings
 python-dotenv
 langchain-community==0.4.1
-sentence-transformers
+sentence-transformers>=2.2.0,<3.0.0
+pytest-asyncio>=0.23.0
diff --git a/backend/tests/unit/test_llm.py b/backend/tests/unit/test_llm.py
@@ -1,45 +1,87 @@
-import asyncio
-import os
+
 import sys
 import pytest
 from pathlib import Path
 
-# dynamically add backend dir to path
-backend_dir = Path(__file__).resolve().parent
+# dynamically add backend root dir to path
+backend_dir = Path(__file__).resolve().parents[2]
 if str(backend_dir) not in sys.path:
     sys.path.insert(0, str(backend_dir))
 
-from app.services.llm import get_english_translation, get_response_stream
-from app.core.config import settings
+import os
 
-@pytest.mark.skipif(not settings.GEMINI_API_KEY, reason="GEMINI_API_KEY is not configured")
-def test_translation():
-    print("Testing translation...")
+@pytest.fixture(autouse=True)
+def setup_test_env(monkeypatch):
+    monkeypatch.setenv("GEMINI_API_KEY", "dummy_test_key")
+    monkeypatch.setenv("SUPABASE_URL", "http://localhost:8000")
+    monkeypatch.setenv("SUPABASE_SERVICE_KEY", "dummy_test_key")
+
+    # Ensure settings reflect the mocked env vars globally in case they were initialized
     try:
+        from app.core.config import settings
+        monkeypatch.setattr(settings, "GEMINI_API_KEY", "dummy_test_key")
+        monkeypatch.setattr(settings, "SUPABASE_URL", "http://localhost:8000")
+        monkeypatch.setattr(settings, "SUPABASE_SERVICE_KEY", "dummy_test_key")
+    except ImportError:
+        pass
+
+from unittest.mock import patch, MagicMock
+
+def test_translation(setup_test_env):
+    print("Testing translation...")
+    from app.services.llm import get_english_translation
+    with patch("app.services.llm.translation_prompt") as mock_prompt, \
+         patch("app.services.llm.llm") as _mock_llm, \
+         patch("app.services.llm.StrOutputParser") as _mock_parser:
+
+        mock_chain = MagicMock()
+        mock_chain.invoke.return_value = "Translated Text"
+        mock_chain.__or__.return_value = mock_chain
+        mock_prompt.__or__.return_value = mock_chain
+
         translated = get_english_translation("미덕이란 무엇인가?")
         print("Translation:", translated)
-        assert translated.strip() != "", "Translation must not be empty"
-    except Exception as e:
-        raise AssertionError(f"Translation error: {str(e)}")
+        assert translated == "Translated Text", "Translation output mocked mismatch"
 
-@pytest.mark.skipif(not settings.GEMINI_API_KEY, reason="GEMINI_API_KEY is not configured")
-def test_streaming():
+def test_streaming(setup_test_env):
     print("Testing streaming...")
-    try:
+    from app.services.llm import get_response_stream
+    with patch("app.services.llm.get_rag_prompt") as mock_prompt, \
+         patch("app.services.llm.llm") as _mock_llm, \
+         patch("app.services.llm.StrOutputParser") as _mock_parser:
+
+        mock_chain = MagicMock()
+        mock_chain.stream.return_value = ["안녕하세요", " ", "철학자", "입니다."]
+        mock_chain.__or__.return_value = mock_chain
+        mock_prompt.return_value.__or__.return_value = mock_chain
+
         stream = get_response_stream(context="Virtue is excellence.", query="What is virtue?")
-        chunks_received = 0
-        for chunk in stream:
-            print(chunk, end="", flush=True)
-            chunks_received += 1
-        print("\nStream finished")
-        assert chunks_received > 0, "No chunks received from streaming API"
-    except Exception as e:
-        raise AssertionError(f"Stream error: {str(e)}")
+        results = list(stream)
+        assert results == ["안녕하세요", " ", "철학자", "입니다."], "Stream chunks mocked mismatch"
+
+@pytest.mark.asyncio
+async def test_streaming_async(setup_test_env):
+    print("Testing streaming async...")
+    from app.services.llm import get_response_stream_async
+    with patch("app.services.llm.get_rag_prompt") as mock_prompt, \
+         patch("app.services.llm.llm") as _mock_llm, \
+         patch("app.services.llm.StrOutputParser") as _mock_parser:
+
+        mock_chain = MagicMock()
+        async def mock_astream(*_args, **_kwargs):
+            for chunk in ["안녕하세요", " ", "철학자", "입니다."]:
+                yield chunk
+        mock_chain.astream = mock_astream
+        mock_chain.__or__.return_value = mock_chain
+        mock_prompt.return_value.__or__.return_value = mock_chain
+
+        stream = get_response_stream_async(context="Virtue is excellence.", query="What is virtue?")
+        results = [chunk async for chunk in stream]
+        assert results == ["안녕하세요", " ", "철학자", "입니다."], "Async stream chunks mocked mismatch"
 
 # For manual execution
-async def run_manual_test():
+if __name__ == "__main__":
+    import asyncio
     test_translation()
     test_streaming()
-
-if __name__ == "__main__":
-    asyncio.run(run_manual_test())
+    asyncio.run(test_streaming_async())
diff --git a/frontend/.stylelintrc.json b/frontend/.stylelintrc.json
@@ -1,11 +1,20 @@
 {
+    "extends": [
+        "stylelint-config-standard-scss"
+    ],
     "rules": {
         "scss/at-rule-no-unknown": [
             true,
             {
                 "ignoreAtRules": [
                     "theme",
-                    "import"
+                    "import",
+                    "apply",
+                    "layer",
+                    "screen",
+                    "variants",
+                    "responsive",
+                    "tailwind"
                 ]
             }
         ]