Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ DJANGO_SENTRY_DSN=None
DJANGO_SLACK_BOT_TOKEN=None
DJANGO_SLACK_SIGNING_SECRET=None
GITHUB_TOKEN=None
DJANGO_LLM_PROVIDER=None
DJANGO_GOOGLE_API_KEY=None
34 changes: 24 additions & 10 deletions backend/apps/ai/common/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,46 @@

from __future__ import annotations

import os
import logging

from crewai import LLM
from django.conf import settings

logger = logging.getLogger(__name__)


def get_llm() -> LLM:
"""Get configured LLM instance.

Returns:
LLM: Configured LLM instance with gpt-4.1-mini as default model.
LLM: Configured LLM instance based on settings.

"""
provider = os.getenv("LLM_PROVIDER", "openai")
provider = settings.LLM_PROVIDER

if provider == "openai":
return LLM(
model=os.getenv("OPENAI_MODEL_NAME", "gpt-4.1-mini"),
api_key=os.getenv("DJANGO_OPEN_AI_SECRET_KEY"),
model=settings.OPENAI_MODEL_NAME,
api_key=settings.OPEN_AI_SECRET_KEY,
temperature=0.1,
)
if provider == "anthropic":
if provider == "google":
return LLM(
model=os.getenv("ANTHROPIC_MODEL_NAME", "claude-3-5-sonnet-20241022"),
api_key=os.getenv("ANTHROPIC_API_KEY"),
model=settings.GOOGLE_MODEL_NAME,
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
api_key=settings.GOOGLE_API_KEY,
temperature=0.1,
)

error_msg = f"Unsupported LLM provider: {provider}"
raise ValueError(error_msg)
# Fallback to OpenAI if provider not recognized or not specified
if provider and provider not in ("openai", "google"):
logger.warning(
"Unrecognized LLM_PROVIDER '%s'. Falling back to OpenAI. "
"Supported providers: 'openai', 'google'",
provider,
)
return LLM(
model=settings.OPENAI_MODEL_NAME,
api_key=settings.OPEN_AI_SECRET_KEY,
temperature=0.1,
)
10 changes: 8 additions & 2 deletions backend/apps/ai/embeddings/factory.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
"""Factory function to get the configured embedder."""

from django.conf import settings

from apps.ai.embeddings.base import Embedder
from apps.ai.embeddings.google import GoogleEmbedder
from apps.ai.embeddings.openai import OpenAIEmbedder


def get_embedder() -> Embedder:
"""Get the configured embedder.

Currently returns OpenAI embedder, but can be extended to support
Currently returns OpenAI and Google embedder, but can be extended to support
other providers (e.g., Anthropic, Cohere, etc.).

Returns:
Embedder instance configured for the current provider.

"""
# Currently OpenAI, but can be extended to support other providers
# Currently OpenAI and Google, but can be extended to support other providers
if settings.LLM_PROVIDER == "google":
return GoogleEmbedder()

return OpenAIEmbedder()
142 changes: 142 additions & 0 deletions backend/apps/ai/embeddings/google.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Google implementation of embedder."""

from __future__ import annotations

try:
from google import genai
except ImportError:
# Fallback to deprecated package if new one not available
try:
import warnings

import google.generativeai as genai

warnings.warn(
(
"google.generativeai is deprecated. "
"Please install google-genai package: pip install google-genai"
),
DeprecationWarning,
stacklevel=2,
)
except ImportError:
genai = None

import requests
from django.conf import settings

from apps.ai.embeddings.base import Embedder


class GoogleEmbedder(Embedder):
"""Google implementation of embedder using Google Generative AI SDK."""

def __init__(self, model: str = "gemini-embedding-001") -> None:
"""Initialize Google embedder.

Args:
model: The Google embedding model to use.
Default: gemini-embedding-001 (recommended, 768 dimensions)
Note: text-embedding-004 is deprecated

"""
self.api_key = settings.GOOGLE_API_KEY
self.model = model
# gemini-embedding-001 has 768 dimensions
self._dimensions = 768

# Use Google Generative AI SDK (preferred method)
# The SDK handles endpoint URLs and authentication automatically
if genai:
genai.configure(api_key=self.api_key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P0: Bug: The new google.genai SDK doesn't have genai.configure() or genai.embed_content() — these are from the deprecated google.generativeai package. The new SDK uses a client-based API: client = genai.Client(api_key=...) and client.models.embed_content(...). This code will raise AttributeError at runtime.

Initialize a client in __init__ and use self.client.models.embed_content(...) in the embed methods, consistent with how OpenAIEmbedder uses self.client.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/apps/ai/embeddings/google.py, line 51:

<comment>Bug: The new `google.genai` SDK doesn't have `genai.configure()` or `genai.embed_content()` — these are from the deprecated `google.generativeai` package. The new SDK uses a client-based API: `client = genai.Client(api_key=...)` and `client.models.embed_content(...)`. This code will raise `AttributeError` at runtime.

Initialize a client in `__init__` and use `self.client.models.embed_content(...)` in the embed methods, consistent with how `OpenAIEmbedder` uses `self.client`.</comment>

<file context>
@@ -0,0 +1,142 @@
+        # Use Google Generative AI SDK (preferred method)
+        # The SDK handles endpoint URLs and authentication automatically
+        if genai:
+            genai.configure(api_key=self.api_key)
+            self.use_sdk = True
+        else:
</file context>

self.use_sdk = True
else:
# Fallback to REST API (not recommended - use SDK instead)
self.base_url = "https://generativelanguage.googleapis.com/v1beta"
self.use_sdk = False
import warnings

warnings.warn(
"Google GenAI SDK not available. Install it with: pip install google-genai",
UserWarning,
stacklevel=2,
)

def embed_query(self, text: str) -> list[float]:
"""Generate embedding for a query string.

Args:
text: The query text to embed.

Returns:
List of floats representing the embedding vector.

"""
if self.use_sdk and genai:
# Use Google Generative AI SDK (preferred method)
# SDK automatically handles the correct endpoint and model format
result = genai.embed_content(
model=self.model,
content=text,
)
# SDK returns embedding in 'embedding' key
return result["embedding"]

# Fallback to REST API
endpoint = f"{self.base_url}/models/{self.model}:embedContent?key={self.api_key}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Security: API key is passed as a URL query parameter, which can leak into server/proxy logs and error reports. Use the x-goog-api-key header instead, consistent with Google's own API examples.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/apps/ai/embeddings/google.py, line 86:

<comment>Security: API key is passed as a URL query parameter, which can leak into server/proxy logs and error reports. Use the `x-goog-api-key` header instead, consistent with Google's own API examples.</comment>

<file context>
@@ -0,0 +1,142 @@
+            return result["embedding"]
+
+        # Fallback to REST API
+        endpoint = f"{self.base_url}/models/{self.model}:embedContent?key={self.api_key}"
+        response = requests.post(
+            endpoint,
</file context>

response = requests.post(
endpoint,
headers={"Content-Type": "application/json"},
json={
"content": {"parts": [{"text": text}]},
},
timeout=30,
)
response.raise_for_status()
data = response.json()
return data["embedding"]["values"]

def embed_documents(self, texts: list[str]) -> list[list[float]]:
"""Generate embeddings for multiple documents.

Args:
texts: List of document texts to embed.

Returns:
List of embedding vectors, one per document.

"""
if self.use_sdk and genai:
# Use Google Generative AI SDK (preferred method)
# SDK handles batching automatically
results = []
for text in texts:
result = genai.embed_content(
model=self.model,
content=text,
)
results.append(result["embedding"])
return results

# Fallback to REST API
endpoint = f"{self.base_url}/models/{self.model}:batchEmbedContents?key={self.api_key}"
response = requests.post(
endpoint,
headers={"Content-Type": "application/json"},
json={
"requests": [{"content": {"parts": [{"text": text}]}} for text in texts],
},
timeout=60,
)
response.raise_for_status()
data = response.json()
return [item["embedding"]["values"] for item in data["embeddings"]]

def get_dimensions(self) -> int:
"""Get the dimension of embeddings produced by this embedder.

Returns:
Integer representing the embedding dimension.

"""
return self._dimensions
2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ packages = [ { include = "apps" } ]
[tool.poetry.dependencies]
algoliasearch = "^4.13.2"
algoliasearch-django = "^4.0.0"
crewai = { version = "^1.7.2", python = ">=3.10,<3.14" }
crewai = { version = "^1.7.2", python = ">=3.10,<3.14", extras = [ "google-genai" ] }
django = "^6.0"
django-configurations = "^2.5.1"
django-cors-headers = "^4.7.0"
Expand Down
13 changes: 12 additions & 1 deletion backend/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,18 @@ class Base(Configuration):

STATIC_ROOT = BASE_DIR / "staticfiles"

OPEN_AI_SECRET_KEY = values.SecretValue(environ_name="OPEN_AI_SECRET_KEY")
# django-configurations automatically prefixes with "DJANGO_" and uppercases,
# so OPEN_AI_SECRET_KEY becomes DJANGO_OPEN_AI_SECRET_KEY (which is what all
# tests and code references use). No need to specify environ_name explicitly.
OPEN_AI_SECRET_KEY = values.SecretValue()
OPENAI_MODEL_NAME = values.Value(default="gpt-4o-mini")
# Note: GOOGLE_API_KEY uses Value() instead of SecretValue() because it's optional
# (only required when LLM_PROVIDER == "google"). SecretValue() requires the env var
# to always be set, which breaks setups using only OpenAI. This should still be
# treated as a secret and not exposed in logs or configuration output.
GOOGLE_API_KEY = values.Value(default=None)
GOOGLE_MODEL_NAME = values.Value(default="gemini-2.0-flash")
LLM_PROVIDER = values.Value(default="openai")

SLACK_BOT_TOKEN = values.SecretValue()
SLACK_COMMANDS_ENABLED = True
Expand Down
72 changes: 48 additions & 24 deletions backend/tests/apps/ai/common/llm_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
import os
from unittest.mock import Mock, patch

import pytest

from apps.ai.common.llm_config import get_llm


class TestLLMConfig:
"""Test cases for LLM configuration."""

@patch.dict(os.environ, {"LLM_PROVIDER": "openai", "DJANGO_OPEN_AI_SECRET_KEY": "test-key"})
@patch.dict(
os.environ,
{"DJANGO_LLM_PROVIDER": "openai", "DJANGO_OPEN_AI_SECRET_KEY": "test-key"},
)
@patch("apps.ai.common.llm_config.LLM")
def test_get_llm_openai_default(self, mock_llm):
"""Test getting OpenAI LLM with default model."""
Expand All @@ -21,7 +22,7 @@ def test_get_llm_openai_default(self, mock_llm):
result = get_llm()

mock_llm.assert_called_once_with(
model="gpt-4.1-mini",
model="gpt-4o-mini",
api_key="test-key",
temperature=0.1,
)
Expand All @@ -30,9 +31,9 @@ def test_get_llm_openai_default(self, mock_llm):
@patch.dict(
os.environ,
{
"LLM_PROVIDER": "openai",
"DJANGO_LLM_PROVIDER": "openai",
"DJANGO_OPEN_AI_SECRET_KEY": "test-key",
"OPENAI_MODEL_NAME": "gpt-4",
"DJANGO_OPENAI_MODEL_NAME": "gpt-4",
},
)
@patch("apps.ai.common.llm_config.LLM")
Expand All @@ -53,50 +54,73 @@ def test_get_llm_openai_custom_model(self, mock_llm):
@patch.dict(
os.environ,
{
"LLM_PROVIDER": "anthropic",
"ANTHROPIC_API_KEY": "test-anthropic-key",
"DJANGO_LLM_PROVIDER": "unsupported",
"DJANGO_OPEN_AI_SECRET_KEY": "test-key",
},
)
@patch("apps.ai.common.llm_config.logger")
@patch("apps.ai.common.llm_config.LLM")
def test_get_llm_anthropic_default(self, mock_llm):
"""Test getting Anthropic LLM with default model."""
def test_get_llm_unsupported_provider(self, mock_llm, mock_logger):
"""Test getting LLM with unsupported provider logs warning and falls back to OpenAI."""
mock_llm_instance = Mock()
mock_llm.return_value = mock_llm_instance

result = get_llm()

# Should log warning about unrecognized provider
mock_logger.warning.assert_called_once()
# Should fallback to OpenAI
mock_llm.assert_called_once_with(
model="claude-3-5-sonnet-20241022",
api_key="test-anthropic-key",
model="gpt-4o-mini",
api_key="test-key",
temperature=0.1,
)
assert result == mock_llm_instance

@patch.dict(
os.environ,
{
"LLM_PROVIDER": "anthropic",
"ANTHROPIC_API_KEY": "test-anthropic-key",
"ANTHROPIC_MODEL_NAME": "claude-3-opus",
"DJANGO_LLM_PROVIDER": "google",
"DJANGO_GOOGLE_API_KEY": "test-google-key",
"DJANGO_GOOGLE_MODEL_NAME": "gemini-2.0-flash",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: This test claims to verify the default Google model, but it explicitly provides DJANGO_GOOGLE_MODEL_NAME in the environment. Compare with test_get_llm_openai_default, which correctly omits DJANGO_OPENAI_MODEL_NAME to test the true default. To actually test the default model path for Google, remove DJANGO_GOOGLE_MODEL_NAME from the patched env vars and assert that the default model (e.g., gemini-2.0-flash) is used.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/tests/apps/ai/common/llm_config_test.py, line 85:

<comment>This test claims to verify the default Google model, but it explicitly provides `DJANGO_GOOGLE_MODEL_NAME` in the environment. Compare with `test_get_llm_openai_default`, which correctly omits `DJANGO_OPENAI_MODEL_NAME` to test the true default. To actually test the default model path for Google, remove `DJANGO_GOOGLE_MODEL_NAME` from the patched env vars and assert that the default model (e.g., `gemini-2.0-flash`) is used.</comment>

<file context>
@@ -53,50 +54,73 @@ def test_get_llm_openai_custom_model(self, mock_llm):
-            "ANTHROPIC_MODEL_NAME": "claude-3-opus",
+            "DJANGO_LLM_PROVIDER": "google",
+            "DJANGO_GOOGLE_API_KEY": "test-google-key",
+            "DJANGO_GOOGLE_MODEL_NAME": "gemini-2.0-flash",
         },
     )
</file context>

},
)
@patch("apps.ai.common.llm_config.LLM")
def test_get_llm_anthropic_custom_model(self, mock_llm):
"""Test getting Anthropic LLM with custom model."""
def test_get_llm_google(self, mock_llm):
"""Test getting Google LLM with default model."""
mock_llm_instance = Mock()
mock_llm.return_value = mock_llm_instance

result = get_llm()

mock_llm.assert_called_once_with(
model="claude-3-opus",
api_key="test-anthropic-key",
model="gemini-2.0-flash",
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
api_key="test-google-key",
temperature=0.1,
)
assert result == mock_llm_instance

@patch.dict(os.environ, {"LLM_PROVIDER": "unsupported"})
def test_get_llm_unsupported_provider(self):
"""Test getting LLM with unsupported provider raises error."""
with pytest.raises(ValueError, match="Unsupported LLM provider: unsupported"):
get_llm()
@patch.dict(
os.environ,
{
"DJANGO_LLM_PROVIDER": "google",
"DJANGO_GOOGLE_API_KEY": "test-google-key",
"DJANGO_GOOGLE_MODEL_NAME": "gemini-pro",
},
)
@patch("apps.ai.common.llm_config.LLM")
def test_get_llm_google_custom_model(self, mock_llm):
"""Test getting Google LLM with custom model."""
mock_llm_instance = Mock()
mock_llm.return_value = mock_llm_instance

result = get_llm()

mock_llm.assert_called_once_with(
model="gemini-pro",
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
api_key="test-google-key",
temperature=0.1,
)
assert result == mock_llm_instance
Loading