SanghunYun95 · SanghunYun95 · Mar 6, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/backend/app/api/routes/chat.py b/backend/app/api/routes/chat.py
@@ -1,6 +1,7 @@
 import json
 import asyncio
 import logging
+import time
 from typing import List, Dict, Optional
 from fastapi import APIRouter, Request, Depends
 from pydantic import BaseModel, Field
@@ -13,6 +14,10 @@
 logger = logging.getLogger(__name__)
 
 DEFAULT_CHAT_TITLE = "새로운 대화"
+CHAT_TIMEOUT = 30.0
+
+# Concurrency limit for database RPC calls to prevent thread pool exhaustion
+_db_rpc_semaphore = asyncio.Semaphore(16)
 
 class HistoryMessage(BaseModel):
     role: str
@@ -40,13 +45,16 @@ async def generate_chat_events(request: Request, query: str, history: List[Histo
     from app.services.embedding import embedding_service
 
     # 1. Translate Korean query to English // Note: We don't translate history here to save costs and reduce latency
+    t0 = time.perf_counter()
     try:
         english_query = await asyncio.wait_for(
             get_english_translation(query),
-            timeout=10.0,
+            timeout=CHAT_TIMEOUT,
         )
+        t1 = time.perf_counter()
+        logger.info(f"Translation successful in {t1 - t0:.2f}s")
     except asyncio.TimeoutError:
-        logger.warning("Translation timed out")
+        logger.warning(f"Translation timed out after {time.perf_counter() - t0:.2f}s")
         yield {"event": "error", "data": "응답이 지연되고 있어요. 잠시 후 다시 시도해 주세요."}
         return
     except Exception:
@@ -55,13 +63,16 @@ async def generate_chat_events(request: Request, query: str, history: List[Histo
         return
 
     # 2. Generate vector representation
+    t2 = time.perf_counter()
     try:
         query_vector = await asyncio.wait_for(
             embedding_service.agenerate_embedding(english_query),
-            timeout=10.0,
+            timeout=CHAT_TIMEOUT,
         )
+        t3 = time.perf_counter()
+        logger.info(f"Embedding successful in {t3 - t2:.2f}s")
     except asyncio.TimeoutError:
-        logger.warning("Embedding generation timed out")
+        logger.warning(f"Embedding generation timed out after {time.perf_counter() - t2:.2f}s")
         yield {"event": "error", "data": "응답이 지연되고 있어요. 잠시 후 다시 시도해 주세요."}
         return
     except Exception:
@@ -71,15 +82,27 @@ async def generate_chat_events(request: Request, query: str, history: List[Histo
 
     # 3. Perform hybrid search in Supabase
     # We use the RPC match_documents function defined in schema.sql
+    t4 = time.perf_counter()
     try:
-        response = await asyncio.to_thread(_search_documents, query_vector)
-        documents = response.data
+        async with _db_rpc_semaphore:
+            response = await asyncio.wait_for(
+                asyncio.to_thread(_search_documents, query_vector),
+                timeout=CHAT_TIMEOUT,
+            )
+        documents = response.data or []
+        t5 = time.perf_counter()
+        logger.info(f"Database search successful in {t5 - t4:.2f}s. Found {len(documents)} docs.")
+    except asyncio.TimeoutError:
+        logger.error(f"Database search timed out after {time.perf_counter() - t4:.2f}s")
+        yield {"event": "error", "data": "검색이 지연되고 있어요. 잠시 후 다시 시도해 주세요."}
+        return
     except Exception:
         logger.exception("Database search failed")
         yield {"event": "error", "data": "검색 중 오류가 발생했습니다. 잠시 후 다시 시도해 주세요."}
         return
 
     if not documents:
+        logger.warning(f"No documents found for query in {time.perf_counter() - t4:.2f}s")
         yield {"event": "content", "data": "관련 철학적 내용을 찾을 수 없습니다."}
         return
 
@@ -126,23 +149,31 @@ async def generate_chat_events(request: Request, query: str, history: List[Histo
 
     formatted_history = "\n\n".join(formatted_parts)
 
+    t6 = time.perf_counter()
     try:
         chunk_count = 0
         disconnected = False
         async for chunk in get_response_stream_async(context=combined_context, query=english_query, history=formatted_history):
+            if chunk_count == 0:
+                logger.info(f"First LLM chunk received in {time.perf_counter() - t6:.2f}s")
+
             # If client disconnects, stop generating
             if await request.is_disconnected():
                 disconnected = True
+                logger.info(f"Client disconnected during streaming after {chunk_count} chunks.")
                 break
 
             chunk_count += 1
             # Clean up chunk to avoid SSE formatting issues with newlines
             chunk_clean = chunk.replace("\n", "\\n")
             yield {"event": "content", "data": chunk_clean}
 
-        if not disconnected and chunk_count == 0:
-            logger.warning("LLM returned 0 chunks. Sending a fallback message.")
-            yield {"event": "content", "data": "철학자는 난색을 표하며 서적을 뒤적거립니다. 대신 철학자가 답변을 해줄 만한 다른 질문은 없을까요?"}
+        if not disconnected:
+            if chunk_count == 0:
+                logger.warning(f"LLM returned 0 chunks after {time.perf_counter() - t6:.2f}s. Sending a fallback message.")
+                yield {"event": "content", "data": "철학자는 난색을 표하며 서적을 뒤적거립니다. 대신 철학자가 답변을 해줄 만한 다른 질문은 없을까요?"}
+            else:
+                logger.info(f"Stream finished successfully. Total chunks: {chunk_count}, Total time: {time.perf_counter() - t0:.2f}s")
 
     except Exception:
         logger.exception("Failed while streaming LLM response")
@@ -170,7 +201,7 @@ async def chat_title_endpoint(request: Request, title_request: TitleRequest):
         return {"title": DEFAULT_CHAT_TITLE}
 
     try:
-        title = await asyncio.wait_for(generate_chat_title_async(query), timeout=10.0)
+        title = await asyncio.wait_for(generate_chat_title_async(query), timeout=CHAT_TIMEOUT)
         # Handle case where LLM returns something too long or with quotes
         title = title.replace('"', '').replace("'", "").strip()
         if not title:
@@ -181,7 +212,7 @@ async def chat_title_endpoint(request: Request, title_request: TitleRequest):
             title = title[: MAX_TITLE_LEN - len(ELLIPSIS)] + ELLIPSIS
         return {"title": title}
     except asyncio.TimeoutError:
-        logger.warning("Timeout generating chat title")
+        logger.warning(f"Timeout generating chat title after {CHAT_TIMEOUT}s")
         return {"title": DEFAULT_CHAT_TITLE}
     except Exception:
         logger.exception("Failed to generate chat title")

diff --git a/backend/app/core/config.py b/backend/app/core/config.py
@@ -3,7 +3,7 @@
 
 class Settings(BaseSettings):
     # API Keys
-    GEMINI_API_KEY: str = ""
+    OPENAI_API_KEY: str = ""
     ALADIN_API_KEY: str = ""
     HUGGINGFACEHUB_API_TOKEN: str = ""
 
@@ -13,7 +13,8 @@ class Settings(BaseSettings):
 
     model_config = SettingsConfigDict(
         env_file=str(Path(__file__).resolve().parents[3] / ".env"), 
-        env_file_encoding="utf-8"
+        env_file_encoding="utf-8",
+        extra="ignore"
     )
 
 settings = Settings()
diff --git a/backend/app/core/env_utils.py b/backend/app/core/env_utils.py
@@ -2,11 +2,11 @@
 import re
 from pathlib import Path
 
-def parse_gemini_api_keys(env_path: Path) -> list[str]:
+def parse_openai_api_keys(env_path: Path) -> list[str]:
     """
-    Reads active GEMINI_API_KEY assignments from the given .env file.
+    Reads active OPENAI_API_KEY assignments from the given .env file.
     Extracts active assignments and strips inline comments and quotes.
-    Also merges GEMINI_API_KEYS (comma-separated) and GEMINI_API_KEY
+    Also merges OPENAI_API_KEYS (comma-separated) and OPENAI_API_KEY
     from os.environ with de-duplication, preserving first-seen order.
     """
     def _normalize_key(value: str) -> str:
@@ -16,9 +16,9 @@ def _normalize_key(value: str) -> str:
     if env_path.is_file():
         with open(env_path, 'r', encoding='utf-8') as f:
             content = f.read()
-            # Find all variations of GEMINI_API_KEY assignments
+            # Find all variations of OPENAI_API_KEY assignments
             matches = re.findall(
-                r'^\s*GEMINI_API_KEY\s*=\s*(.+?)\s*(?:#.*)?$',
+                r'^\s*OPENAI_API_KEY\s*=\s*(.+?)\s*(?:#.*)?$',
                 content,
                 flags=re.MULTILINE,
             )
@@ -29,17 +29,17 @@ def _normalize_key(value: str) -> str:
                 if key and key not in api_keys:
                     api_keys.append(key)
 
-    # Also check GEMINI_API_KEYS (comma-separated list) from environment variables
+    # Also check OPENAI_API_KEYS (comma-separated list) from environment variables
     # This is highly useful for deployment environments like Render
-    env_keys_str = os.getenv("GEMINI_API_KEYS")
+    env_keys_str = os.getenv("OPENAI_API_KEYS")
     if env_keys_str:
         for k in env_keys_str.split(','):
             key = _normalize_key(k)
             if key and key not in api_keys:
                 api_keys.append(key)
 
-    # Also merge single GEMINI_API_KEY from environment (if present)
-    k = os.getenv("GEMINI_API_KEY")
+    # Also merge single OPENAI_API_KEY from environment (if present)
+    k = os.getenv("OPENAI_API_KEY")
     if k:
         key = _normalize_key(k)
         if key and key not in api_keys:

diff --git a/backend/app/services/database.py b/backend/app/services/database.py
@@ -1,5 +1,5 @@
 import threading
-from supabase import create_client, Client
+from supabase import create_client, Client, ClientOptions
 from app.core.config import settings
 
 SUPABASE_CONFIG_ERROR = "SUPABASE_URL and SUPABASE_SERVICE_KEY must be configured"
@@ -14,7 +14,9 @@ def _get_supabase_client() -> Client:
     supabase_key = settings.SUPABASE_SERVICE_KEY
     if not supabase_url or not supabase_key:
         raise RuntimeError(SUPABASE_CONFIG_ERROR)
-    return create_client(supabase_url, supabase_key)
+
+    options = ClientOptions(postgrest_client_timeout=30)
+    return create_client(supabase_url, supabase_key, options=options)
 
 
 _client_lock = threading.Lock()

diff --git a/backend/app/services/llm.py b/backend/app/services/llm.py
@@ -3,25 +3,24 @@
 import threading
 from pathlib import Path
 import asyncio
-import google.generativeai as genai
 from app.core.config import settings
-from app.core.env_utils import parse_gemini_api_keys
+from app.core.env_utils import parse_openai_api_keys
 from langchain_core.prompts import PromptTemplate
-from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import ChatOpenAI
 from langchain_core.output_parsers import StrOutputParser
 
 # Models will be instantiated lazily or during function call
 _llm = None
 _llm_lock = threading.Lock()
 
-def get_all_gemini_keys() -> list[str]:
-    """Reads active GEMINI_API_KEY assignments from the root .env file."""
+def get_all_openai_keys() -> list[str]:
+    """Reads active OPENAI_API_KEY assignments from the root .env file."""
     env_path = Path(__file__).resolve().parents[3] / ".env"
-    keys = parse_gemini_api_keys(env_path)
+    keys = parse_openai_api_keys(env_path)
 
     # Ensure the one from environment variables/settings is also included
-    if getattr(settings, "GEMINI_API_KEY", None) and settings.GEMINI_API_KEY not in keys:
-        keys.insert(0, settings.GEMINI_API_KEY)
+    if getattr(settings, "OPENAI_API_KEY", None) and settings.OPENAI_API_KEY not in keys:
+        keys.insert(0, settings.OPENAI_API_KEY)
 
     return keys
 
@@ -30,30 +29,27 @@ def get_llm():
     if _llm is None:
         with _llm_lock:
             if _llm is None:  # Double-checked locking
-                keys = get_all_gemini_keys()
+                keys = get_all_openai_keys()
 
                 if not keys:
-                    raise RuntimeError("No GEMINI_API_KEY found in .env or environment")
+                    raise RuntimeError("No OPENAI_API_KEY found in .env or environment")
 
-                # Configure Gemini API natively with the first key
-                genai.configure(api_key=keys[0])
-
-                print(f"Loaded {len(keys)} Gemini API keys for rotation/fallbacks.")
+                print(f"Loaded {len(keys)} OpenAI API keys for rotation/fallbacks.")
 
                 # Create the primary model
-                primary_llm = ChatGoogleGenerativeAI(
-                    model="gemini-2.5-flash-lite", 
-                    google_api_key=keys[0],
+                primary_llm = ChatOpenAI(
+                    model="gpt-4o-mini", 
+                    api_key=keys[0],
                     temperature=0.7,
                     max_retries=1
                 )
 
                 if len(keys) > 1:
                     # Create fallback models with the other keys
                     fallback_llms = [
-                        ChatGoogleGenerativeAI(
-                            model="gemini-2.5-flash-lite", 
-                            google_api_key=k,
+                        ChatOpenAI(
+                            model="gpt-4o-mini", 
+                            api_key=k,
                             temperature=0.7,
                             max_retries=1
                         )
@@ -77,7 +73,7 @@ def get_llm():
 
 async def get_english_translation(korean_query: str) -> str:
     """
-    Translates a Korean query to English using Gemini via LangChain.
+    Translates a Korean query to English using OpenAI via LangChain.
     """
     chain = translation_prompt | get_llm() | StrOutputParser()
     return await chain.ainvoke({"query": korean_query})
@@ -134,10 +130,18 @@ async def get_response_stream_async(context: str, query: str, history: str = "")
     except asyncio.TimeoutError:
         import logging
         logger = logging.getLogger(__name__)
-        logger.warning(f"LLM stream chunk timed out after 30 seconds. Query: {query}")
+        logger.warning(
+            f"LLM stream chunk timed out after 30 seconds (query_length={len(query)})"
+        )
         raise
     finally:
-        await generator.aclose()
+        try:
+            await generator.aclose()
+        except Exception:
+            import logging
+            logging.getLogger(__name__).debug(
+                "LLM stream generator close failed", exc_info=True
+            )
 
 title_prompt = PromptTemplate.from_template(
     """주어진 질문을 기반으로 철학적인 대화방 제목을 15자 이내로 지어줘.
@@ -149,7 +153,7 @@ async def get_response_stream_async(context: str, query: str, history: str = "")
 
 async def generate_chat_title_async(query: str) -> str:
     """
-    Generates a short chat title based on the user's first query using Gemini.
+    Generates a short chat title based on the user's first query using OpenAI.
     """
     chain = title_prompt | get_llm() | StrOutputParser()
     title = await chain.ainvoke({"query": query})

diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -3,8 +3,7 @@ uvicorn
 supabase
 langchain-core>=1.2.5,<2.0.0
 langchain-classic>=1.0.0,<2.0.0
-langchain-google-genai>=4.2.1,<5.0.0
-google-generativeai
+langchain-openai>=0.1.0,<1.0.0
 sse-starlette
 pydantic>=2.7.0
 pydantic-settings

diff --git a/backend/scripts/check_models.py b/backend/scripts/check_models.py
@@ -1,25 +1,25 @@
 import os
 from pathlib import Path
 from dotenv import load_dotenv
-import google.generativeai as genai
+from openai import OpenAI
 
 env_path = Path(__file__).resolve().parents[2] / ".env"
 load_dotenv(dotenv_path=env_path)
 
 import sys
 
 def main() -> int:
-    api_key = os.getenv("GEMINI_API_KEY")
+    api_key = os.getenv("OPENAI_API_KEY")
     if not api_key:
         print("No API key found!")
         return 1
 
-    genai.configure(api_key=api_key)
+    client = OpenAI(api_key=api_key)
     print("Available Models:")
     try:
-        for m in genai.list_models():
-            if 'generateContent' in m.supported_generation_methods:
-                print(m.name)
+        models = client.models.list()
+        for m in models:
+            print(m.id)
     except Exception as e:
         print(f"Error listing models: {e}")
         return 1