Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .gitignore
Binary file not shown.
2 changes: 1 addition & 1 deletion backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class Settings(BaseSettings):
SUPABASE_SERVICE_KEY: str = "" # Use Service Role Key for backend operations

model_config = SettingsConfigDict(
env_file=str(Path(__file__).resolve().parents[2] / ".env"),
env_file=str(Path(__file__).resolve().parents[3] / ".env"),
env_file_encoding="utf-8"
)

Expand Down
35 changes: 35 additions & 0 deletions backend/app/core/env_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os
import re
from pathlib import Path

def parse_gemini_api_keys(env_path: Path) -> list[str]:
"""
Reads active GEMINI_API_KEY assignments from the given .env file.
Only extracts active assignments and strips inline comments and quotes.
Also falls back to os.environ if no keys are found in the file.
"""
api_keys = []

if env_path.exists():
with open(env_path, 'r', encoding='utf-8') as f:
content = f.read()
# Find all variations of GEMINI_API_KEY assignments
matches = re.findall(
r'^\s*GEMINI_API_KEY\s*=\s*(.+?)\s*(?:#.*)?$',
content,
flags=re.MULTILINE,
)
for m in matches:
# Remove inline comments and strip quotes
m = re.split(r'\s+#', m, 1)[0]
key = m.strip().strip('"').strip("'")
if key and key not in api_keys:
api_keys.append(key)

# Fallback to os.environ when parsing produced no key or file doesn't exist
if not api_keys:
k = os.getenv("GEMINI_API_KEY")
if k and k not in api_keys:
api_keys.append(k)

return api_keys
57 changes: 46 additions & 11 deletions backend/app/services/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import os
import re
import threading
from pathlib import Path
import google.generativeai as genai
from app.core.config import settings
from app.core.env_utils import parse_gemini_api_keys
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
Expand All @@ -9,25 +13,56 @@
_llm = None
_llm_lock = threading.Lock()

def get_all_gemini_keys() -> list[str]:
"""Reads active GEMINI_API_KEY assignments from the root .env file."""
env_path = Path(__file__).resolve().parents[3] / ".env"
keys = parse_gemini_api_keys(env_path)

# Ensure the one from environment variables/settings is also included
if getattr(settings, "GEMINI_API_KEY", None) and settings.GEMINI_API_KEY not in keys:
keys.insert(0, settings.GEMINI_API_KEY)

return keys

def get_llm():
global _llm
if not settings.GEMINI_API_KEY:
raise RuntimeError("GEMINI_API_KEY must be configured")

if _llm is None:
with _llm_lock:
if _llm is None: # Double-checked locking
# Configure Gemini API natively (optional, if native SDK features are needed)
genai.configure(api_key=settings.GEMINI_API_KEY)
keys = get_all_gemini_keys()

if not keys:
raise RuntimeError("No GEMINI_API_KEY found in .env or environment")

# Configure LangChain model
# TODO: model gemini-2.5-flash will be deprecated by June 17, 2026. Plan migration to gemini-3-flash.
_llm = ChatGoogleGenerativeAI(
model="gemini-3-flash",
google_api_key=settings.GEMINI_API_KEY,
# Configure Gemini API natively with the first key
genai.configure(api_key=keys[0])

print(f"Loaded {len(keys)} Gemini API keys for rotation/fallbacks.")

# Create the primary model
primary_llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash-lite",
google_api_key=keys[0],
temperature=0.7,
max_retries=2
max_retries=1
)

if len(keys) > 1:
# Create fallback models with the other keys
fallback_llms = [
ChatGoogleGenerativeAI(
model="gemini-2.5-flash-lite",
google_api_key=k,
temperature=0.7,
max_retries=1
)
for k in keys[1:]
]
# LangChain will automatically retry with the next model if one throws an error (e.g. rate limit / quota)
_llm = primary_llm.with_fallbacks(fallback_llms)
else:
_llm = primary_llm

return _llm


Expand Down
Loading