Skip to content
Merged
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
bff36bd
rag tool for agent
Dishant1804 Jul 22, 2025
f254af8
code rabbit suggestions implemented
Dishant1804 Jul 22, 2025
a1bba29
Merge branch 'main' into RAG
Dishant1804 Jul 22, 2025
ad3f3b4
Merge branch 'main' into RAG
arkid15r Jul 22, 2025
c1334a6
Merge branch 'main' into RAG
Dishant1804 Jul 23, 2025
c9d4a27
Merge branch 'main' into RAG
Dishant1804 Jul 24, 2025
ff45de1
suggestions implemented
Dishant1804 Jul 25, 2025
4b38f5a
Merge remote-tracking branch 'upstream/main' into RAG
Dishant1804 Jul 25, 2025
b2c5b59
code rabbit suggestion
Dishant1804 Jul 25, 2025
9b94aed
Merge branch 'main' into RAG
Dishant1804 Jul 25, 2025
3038f32
Merge remote-tracking branch 'upstream/main' into RAG
Dishant1804 Jul 28, 2025
e120962
added context model
Dishant1804 Jul 28, 2025
f24453a
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Jul 29, 2025
e876a0c
retrieving data from context model
Dishant1804 Jul 29, 2025
981277a
removed try except
Dishant1804 Jul 29, 2025
8b46f08
Suggestions implemented
Dishant1804 Jul 30, 2025
16fabcf
code rabbit suggestion
Dishant1804 Jul 30, 2025
532be09
Merge branch 'main' into context-model
Dishant1804 Jul 30, 2025
77203b8
removed deafult
Dishant1804 Jul 30, 2025
9e03b53
updated tests
Dishant1804 Jul 30, 2025
ed44239
Merge branch 'main' into context-model
Dishant1804 Aug 4, 2025
41f8126
de coupled context and chunks
Dishant1804 Aug 5, 2025
c5aba9c
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 5, 2025
697a406
update method for context
Dishant1804 Aug 7, 2025
46cd884
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 7, 2025
a3255ff
major revamp and test cases
Dishant1804 Aug 9, 2025
64c079a
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 9, 2025
7affa22
code rabbit suggestions
Dishant1804 Aug 10, 2025
55132d7
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 10, 2025
3d7bd48
major revamp
Dishant1804 Aug 10, 2025
7d0731b
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 10, 2025
ff3e61a
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 12, 2025
c709b9e
suggestions implemented
Dishant1804 Aug 13, 2025
1c7fe1c
refactoring
Dishant1804 Aug 13, 2025
948c529
more tests
Dishant1804 Aug 13, 2025
1455083
Merge branch 'main' into context-model
Dishant1804 Aug 13, 2025
1e8d65e
more refactoring
Dishant1804 Aug 13, 2025
3f15d7a
Merge branch 'main' into context-model
Dishant1804 Aug 13, 2025
742a15e
Merge branch 'main' into context-model
Dishant1804 Aug 14, 2025
bd8f280
suggestions implemented
Dishant1804 Aug 14, 2025
8610dde
Merge branch 'main' into context-model
Dishant1804 Aug 14, 2025
a9da28b
chunk model update
Dishant1804 Aug 14, 2025
a0ed311
update logic and suggestions
Dishant1804 Aug 16, 2025
9646366
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 16, 2025
2d86dcb
code rabbit suggestions
Dishant1804 Aug 16, 2025
011e843
before tests and question
Dishant1804 Aug 17, 2025
466bca3
sugesstions and decoupling with tests
Dishant1804 Aug 18, 2025
9c2556c
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 18, 2025
c9f260d
Merge branch 'main' into context-model
Dishant1804 Aug 18, 2025
197c0ff
sugesstions implemented
Dishant1804 Aug 18, 2025
4dc3800
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 18, 2025
346d324
Update code
arkid15r Aug 20, 2025
baae5eb
updated code
Dishant1804 Aug 21, 2025
f6bb1bd
spelling fixes
Dishant1804 Aug 21, 2025
6c353d1
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 21, 2025
506ad46
test changes
Dishant1804 Aug 21, 2025
871d266
Update tests
arkid15r Aug 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 38 additions & 18 deletions backend/apps/ai/Makefile
Original file line number Diff line number Diff line change
@@ -1,23 +1,43 @@
ai-create-chapter-chunks:
@echo "Creating chapter chunks"
@CMD="python manage.py ai_create_chapter_chunks" $(MAKE) exec-backend-command
ai-run-rag-tool:
@echo "Running RAG tool"
@CMD="python manage.py ai_run_rag_tool" $(MAKE) exec-backend-command

ai-create-committee-chunks:
@echo "Creating committee chunks"
@CMD="python manage.py ai_create_committee_chunks" $(MAKE) exec-backend-command
ai-update-chapter-chunks:
@echo "Updating chapter chunks"
@CMD="python manage.py ai_update_chapter_chunks" $(MAKE) exec-backend-command

ai-create-event-chunks:
@echo "Creating event chunks"
@CMD="python manage.py ai_create_event_chunks" $(MAKE) exec-backend-command
ai-update-chapter-context:
@echo "Updating chapter context"
@CMD="python manage.py ai_update_chapter_context" $(MAKE) exec-backend-command

ai-create-project-chunks:
@echo "Creating project chunks"
@CMD="python manage.py ai_create_project_chunks" $(MAKE) exec-backend-command
ai-update-committee-chunks:
@echo "Updating committee chunks"
@CMD="python manage.py ai_update_committee_chunks" $(MAKE) exec-backend-command

ai-create-slack-message-chunks:
@echo "Creating Slack message chunks"
@CMD="python manage.py ai_create_slack_message_chunks" $(MAKE) exec-backend-command
ai-update-committee-context:
@echo "Updating committee context"
@CMD="python manage.py ai_update_committee_context" $(MAKE) exec-backend-command

ai-run-rag-tool:
@echo "Running RAG tool"
@CMD="python manage.py ai_run_rag_tool" $(MAKE) exec-backend-command
ai-update-event-chunks:
@echo "Updating event chunks"
@CMD="python manage.py ai_update_event_chunks" $(MAKE) exec-backend-command

ai-update-event-context:
@echo "Updating event context"
@CMD="python manage.py ai_update_event_context" $(MAKE) exec-backend-command

ai-update-project-chunks:
@echo "Updating project chunks"
@CMD="python manage.py ai_update_project_chunks" $(MAKE) exec-backend-command

ai-update-project-context:
@echo "Updating project context"
@CMD="python manage.py ai_update_project_context" $(MAKE) exec-backend-command

ai-update-slack-message-chunks:
@echo "Updating Slack message chunks"
@CMD="python manage.py ai_update_slack_message_chunks" $(MAKE) exec-backend-command

ai-update-slack-message-context:
@echo "Updating Slack message context"
@CMD="python manage.py ai_update_slack_message_context" $(MAKE) exec-backend-command
21 changes: 19 additions & 2 deletions backend/apps/ai/admin.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you order the changes you add according to existing ordering convention?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you elaborate this one I am unable to understand it

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your ContextAdmin class goes before teh ChunkAdmin and the same for register(). Compare them to the imports order for example.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still not addressed for some reason 🤷‍♂️

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made the changes now

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.contrib import admin

from apps.ai.models.chunk import Chunk
from apps.ai.models.context import Context


class ChunkAdmin(admin.ModelAdmin):
Expand All @@ -11,9 +12,25 @@ class ChunkAdmin(admin.ModelAdmin):
list_display = (
"id",
"text",
"content_type",
"context",
)
search_fields = ("text", "object_id")
list_filter = ("context__entity_type",)
search_fields = ("text",)


class ContextAdmin(admin.ModelAdmin):
"""Admin for Context model."""

list_display = (
"id",
"content",
"entity_type",
"entity_id",
"source",
)
list_filter = ("entity_type", "source")
search_fields = ("content", "source")


admin.site.register(Chunk, ChunkAdmin)
admin.site.register(Context, ContextAdmin)
8 changes: 2 additions & 6 deletions backend/apps/ai/agent/tools/rag/rag_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,8 @@ def __init__(
ValueError: If the OpenAI API key is not set.

"""
try:
self.retriever = Retriever(embedding_model=embedding_model)
self.generator = Generator(chat_model=chat_model)
except Exception:
logger.exception("Failed to initialize RAG tool")
raise
self.retriever = Retriever(embedding_model=embedding_model)
self.generator = Generator(chat_model=chat_model)

def query(
self,
Expand Down
168 changes: 78 additions & 90 deletions backend/apps/ai/agent/tools/rag/retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
class Retriever:
"""A class for retrieving relevant text chunks for a RAG."""

SUPPORTED_CONTENT_TYPES = ["event", "project", "chapter", "committee", "message"]
SUPPORTED_ENTITY_TYPES = ("event", "project", "chapter", "committee", "message")

def __init__(self, embedding_model: str = "text-embedding-3-small"):
"""Initialize the Retriever.
Expand All @@ -36,7 +36,6 @@ def __init__(self, embedding_model: str = "text-embedding-3-small"):
if not (openai_api_key := os.getenv("DJANGO_OPEN_AI_SECRET_KEY")):
error_msg = "DJANGO_OPEN_AI_SECRET_KEY environment variable not set"
raise ValueError(error_msg)

self.openai_client = openai.OpenAI(api_key=openai_api_key)
self.embedding_model = embedding_model
logger.info("Retriever initialized with embedding model: %s", self.embedding_model)
Expand Down Expand Up @@ -64,121 +63,117 @@ def get_query_embedding(self, query: str) -> list[float]:
logger.exception("Unexpected error while generating embedding")
raise

def get_source_name(self, content_object) -> str:
def get_source_name(self, entity) -> str:
"""Get the name/identifier for the content object."""
for attr in ("name", "title", "login", "key", "summary"):
if getattr(content_object, attr, None):
return str(getattr(content_object, attr))

return str(content_object)
if getattr(entity, attr, None):
return str(getattr(entity, attr))
return str(entity)

def get_additional_context(self, content_object, content_type: str) -> dict[str, Any]:
def get_additional_context(self, entity, entity_type: str) -> dict[str, Any]:
"""Get additional context information based on content type.

Args:
content_object: The source object.
content_type: The model name of the content object.
entity: The source object.
entity_type: The model name of the content object.

Returns:
A dictionary with additional context information.

"""
context = {}
clean_content_type = content_type.split(".")[-1] if "." in content_type else content_type

clean_content_type = entity_type.split(".")[-1] if "." in entity_type else entity_type
if clean_content_type == "chapter":
context.update(
{
"location": getattr(content_object, "suggested_location", None),
"region": getattr(content_object, "region", None),
"country": getattr(content_object, "country", None),
"postal_code": getattr(content_object, "postal_code", None),
"currency": getattr(content_object, "currency", None),
"meetup_group": getattr(content_object, "meetup_group", None),
"tags": getattr(content_object, "tags", []),
"topics": getattr(content_object, "topics", []),
"leaders": getattr(content_object, "leaders_raw", []),
"related_urls": getattr(content_object, "related_urls", []),
"is_active": getattr(content_object, "is_active", None),
"url": getattr(content_object, "url", None),
"location": getattr(entity, "suggested_location", None),
"region": getattr(entity, "region", None),
"country": getattr(entity, "country", None),
"postal_code": getattr(entity, "postal_code", None),
"currency": getattr(entity, "currency", None),
"meetup_group": getattr(entity, "meetup_group", None),
"tags": getattr(entity, "tags", []),
"topics": getattr(entity, "topics", []),
"leaders": getattr(entity, "leaders_raw", []),
"related_urls": getattr(entity, "related_urls", []),
"is_active": getattr(entity, "is_active", None),
"url": getattr(entity, "url", None),
}
)
elif clean_content_type == "project":
context.update(
{
"level": getattr(content_object, "level", None),
"project_type": getattr(content_object, "type", None),
"languages": getattr(content_object, "languages", []),
"topics": getattr(content_object, "topics", []),
"licenses": getattr(content_object, "licenses", []),
"tags": getattr(content_object, "tags", []),
"custom_tags": getattr(content_object, "custom_tags", []),
"stars_count": getattr(content_object, "stars_count", None),
"forks_count": getattr(content_object, "forks_count", None),
"contributors_count": getattr(content_object, "contributors_count", None),
"releases_count": getattr(content_object, "releases_count", None),
"open_issues_count": getattr(content_object, "open_issues_count", None),
"leaders": getattr(content_object, "leaders_raw", []),
"related_urls": getattr(content_object, "related_urls", []),
"created_at": getattr(content_object, "created_at", None),
"updated_at": getattr(content_object, "updated_at", None),
"released_at": getattr(content_object, "released_at", None),
"health_score": getattr(content_object, "health_score", None),
"is_active": getattr(content_object, "is_active", None),
"track_issues": getattr(content_object, "track_issues", None),
"url": getattr(content_object, "url", None),
"level": getattr(entity, "level", None),
"project_type": getattr(entity, "type", None),
"languages": getattr(entity, "languages", []),
"topics": getattr(entity, "topics", []),
"licenses": getattr(entity, "licenses", []),
"tags": getattr(entity, "tags", []),
"custom_tags": getattr(entity, "custom_tags", []),
"stars_count": getattr(entity, "stars_count", None),
"forks_count": getattr(entity, "forks_count", None),
"contributors_count": getattr(entity, "contributors_count", None),
"releases_count": getattr(entity, "releases_count", None),
"open_issues_count": getattr(entity, "open_issues_count", None),
"leaders": getattr(entity, "leaders_raw", []),
"related_urls": getattr(entity, "related_urls", []),
"created_at": getattr(entity, "created_at", None),
"updated_at": getattr(entity, "updated_at", None),
"released_at": getattr(entity, "released_at", None),
"health_score": getattr(entity, "health_score", None),
"is_active": getattr(entity, "is_active", None),
"track_issues": getattr(entity, "track_issues", None),
"url": getattr(entity, "url", None),
}
)
elif clean_content_type == "event":
context.update(
{
"start_date": getattr(content_object, "start_date", None),
"end_date": getattr(content_object, "end_date", None),
"location": getattr(content_object, "suggested_location", None),
"category": getattr(content_object, "category", None),
"latitude": getattr(content_object, "latitude", None),
"longitude": getattr(content_object, "longitude", None),
"url": getattr(content_object, "url", None),
"description": getattr(content_object, "description", None),
"summary": getattr(content_object, "summary", None),
"start_date": getattr(entity, "start_date", None),
"end_date": getattr(entity, "end_date", None),
"location": getattr(entity, "suggested_location", None),
"category": getattr(entity, "category", None),
"latitude": getattr(entity, "latitude", None),
"longitude": getattr(entity, "longitude", None),
"url": getattr(entity, "url", None),
"description": getattr(entity, "description", None),
"summary": getattr(entity, "summary", None),
}
)
elif clean_content_type == "committee":
context.update(
{
"is_active": getattr(content_object, "is_active", None),
"leaders": getattr(content_object, "leaders", []),
"url": getattr(content_object, "url", None),
"description": getattr(content_object, "description", None),
"summary": getattr(content_object, "summary", None),
"tags": getattr(content_object, "tags", []),
"topics": getattr(content_object, "topics", []),
"related_urls": getattr(content_object, "related_urls", []),
"is_active": getattr(entity, "is_active", None),
"leaders": getattr(entity, "leaders", []),
"url": getattr(entity, "url", None),
"description": getattr(entity, "description", None),
"summary": getattr(entity, "summary", None),
"tags": getattr(entity, "tags", []),
"topics": getattr(entity, "topics", []),
"related_urls": getattr(entity, "related_urls", []),
}
)
elif clean_content_type == "message":
context.update(
{
"channel": (
getattr(content_object.conversation, "slack_channel_id", None)
if hasattr(content_object, "conversation") and content_object.conversation
getattr(entity.conversation, "slack_channel_id", None)
if hasattr(entity, "conversation") and entity.conversation
else None
),
"thread_ts": (
getattr(content_object.parent_message, "ts", None)
if hasattr(content_object, "parent_message")
and content_object.parent_message
getattr(entity.parent_message, "ts", None)
if hasattr(entity, "parent_message") and entity.parent_message
else None
),
"ts": getattr(content_object, "ts", None),
"ts": getattr(entity, "ts", None),
"user": (
getattr(content_object.author, "name", None)
if hasattr(content_object, "author") and content_object.author
getattr(entity.author, "name", None)
if hasattr(entity, "author") and entity.author
else None
),
}
)

return {k: v for k, v in context.items() if v is not None}

def retrieve(
Expand All @@ -201,51 +196,45 @@ def retrieve(

"""
query_embedding = self.get_query_embedding(query)

if not content_types:
content_types = self.extract_content_types_from_query(query)

queryset = Chunk.objects.annotate(
similarity=1 - CosineDistance("embedding", query_embedding)
).filter(similarity__gte=similarity_threshold)

if content_types:
content_type_query = Q()
for name in content_types:
lower_name = name.lower()
if "." in lower_name:
app_label, model = lower_name.split(".", 1)
content_type_query |= Q(
content_type__app_label=app_label, content_type__model=model
context__entity_type__app_label=app_label,
context__entity_type__model=model,
)
else:
content_type_query |= Q(content_type__model=lower_name)
content_type_query |= Q(context__entity_type__model=lower_name)
queryset = queryset.filter(content_type_query)

chunks = (
queryset.select_related("content_type")
.prefetch_related("content_object")
.order_by("-similarity")[:limit]
)
chunks = queryset.select_related("context__entity_type").order_by("-similarity")[:limit]

results = []
for chunk in chunks:
if not chunk.content_object:
if not chunk.context or not chunk.context.entity:
logger.warning("Content object is None for chunk %s. Skipping.", chunk.id)
continue

source_name = self.get_source_name(chunk.content_object)
source_name = self.get_source_name(chunk.context.entity)
additional_context = self.get_additional_context(
chunk.content_object, chunk.content_type.model
chunk.context.entity, chunk.context.entity_type.model
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems you don't need the second param as you can figure that our from the model instance inside of get_additional_context

)

results.append(
{
"text": chunk.text,
"similarity": float(chunk.similarity),
"source_type": chunk.content_type.model,
"source_type": chunk.context.entity_type.model,
"source_name": source_name,
"source_id": chunk.object_id,
"source_id": chunk.context.entity_id,
"additional_context": additional_context,
}
)
Expand All @@ -262,13 +251,12 @@ def extract_content_types_from_query(self, query: str) -> list[str]:
A list of detected content type names.

"""
detected_types = []
query_words = set(re.findall(r"\b\w+\b", query.lower()))

detected_types = [
content_type
for content_type in self.SUPPORTED_CONTENT_TYPES
if content_type in query_words or f"{content_type}s" in query_words
entity_type
for entity_type in self.SUPPORTED_ENTITY_TYPES
if entity_type in query_words or f"{entity_type}s" in query_words
]

if detected_types:
Expand Down
Empty file.
Loading