Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
bff36bd
rag tool for agent
Dishant1804 Jul 22, 2025
f254af8
code rabbit suggestions implemented
Dishant1804 Jul 22, 2025
a1bba29
Merge branch 'main' into RAG
Dishant1804 Jul 22, 2025
ad3f3b4
Merge branch 'main' into RAG
arkid15r Jul 22, 2025
c1334a6
Merge branch 'main' into RAG
Dishant1804 Jul 23, 2025
c9d4a27
Merge branch 'main' into RAG
Dishant1804 Jul 24, 2025
ff45de1
suggestions implemented
Dishant1804 Jul 25, 2025
4b38f5a
Merge remote-tracking branch 'upstream/main' into RAG
Dishant1804 Jul 25, 2025
b2c5b59
code rabbit suggestion
Dishant1804 Jul 25, 2025
9b94aed
Merge branch 'main' into RAG
Dishant1804 Jul 25, 2025
3038f32
Merge remote-tracking branch 'upstream/main' into RAG
Dishant1804 Jul 28, 2025
e120962
added context model
Dishant1804 Jul 28, 2025
f24453a
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Jul 29, 2025
e876a0c
retrieving data from context model
Dishant1804 Jul 29, 2025
981277a
removed try except
Dishant1804 Jul 29, 2025
8b46f08
Suggestions implemented
Dishant1804 Jul 30, 2025
16fabcf
code rabbit suggestion
Dishant1804 Jul 30, 2025
532be09
Merge branch 'main' into context-model
Dishant1804 Jul 30, 2025
77203b8
removed deafult
Dishant1804 Jul 30, 2025
9e03b53
updated tests
Dishant1804 Jul 30, 2025
ed44239
Merge branch 'main' into context-model
Dishant1804 Aug 4, 2025
41f8126
de coupled context and chunks
Dishant1804 Aug 5, 2025
c5aba9c
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 5, 2025
697a406
update method for context
Dishant1804 Aug 7, 2025
46cd884
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 7, 2025
a3255ff
major revamp and test cases
Dishant1804 Aug 9, 2025
64c079a
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 9, 2025
7affa22
code rabbit suggestions
Dishant1804 Aug 10, 2025
55132d7
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 10, 2025
3d7bd48
major revamp
Dishant1804 Aug 10, 2025
7d0731b
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 10, 2025
ff3e61a
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 12, 2025
c709b9e
suggestions implemented
Dishant1804 Aug 13, 2025
1c7fe1c
refactoring
Dishant1804 Aug 13, 2025
948c529
more tests
Dishant1804 Aug 13, 2025
1455083
Merge branch 'main' into context-model
Dishant1804 Aug 13, 2025
1e8d65e
more refactoring
Dishant1804 Aug 13, 2025
3f15d7a
Merge branch 'main' into context-model
Dishant1804 Aug 13, 2025
742a15e
Merge branch 'main' into context-model
Dishant1804 Aug 14, 2025
bd8f280
suggestions implemented
Dishant1804 Aug 14, 2025
8610dde
Merge branch 'main' into context-model
Dishant1804 Aug 14, 2025
a9da28b
chunk model update
Dishant1804 Aug 14, 2025
a0ed311
update logic and suggestions
Dishant1804 Aug 16, 2025
9646366
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 16, 2025
2d86dcb
code rabbit suggestions
Dishant1804 Aug 16, 2025
011e843
before tests and question
Dishant1804 Aug 17, 2025
466bca3
sugesstions and decoupling with tests
Dishant1804 Aug 18, 2025
9c2556c
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 18, 2025
c9f260d
Merge branch 'main' into context-model
Dishant1804 Aug 18, 2025
197c0ff
sugesstions implemented
Dishant1804 Aug 18, 2025
4dc3800
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 18, 2025
346d324
Update code
arkid15r Aug 20, 2025
baae5eb
updated code
Dishant1804 Aug 21, 2025
f6bb1bd
spelling fixes
Dishant1804 Aug 21, 2025
6c353d1
Merge remote-tracking branch 'upstream/main' into context-model
Dishant1804 Aug 21, 2025
506ad46
test changes
Dishant1804 Aug 21, 2025
871d266
Update tests
arkid15r Aug 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions backend/apps/ai/admin.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you order the changes you add according to existing ordering convention?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you elaborate this one I am unable to understand it

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your ContextAdmin class goes before teh ChunkAdmin and the same for register(). Compare them to the imports order for example.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still not addressed for some reason 🤷‍♂️

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made the changes now

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
from django.contrib import admin

from apps.ai.models.chunk import Chunk
from apps.ai.models.context import Context


class ContextAdmin(admin.ModelAdmin):
"""Admin for Context model."""

list_display = (
"id",
"content",
"content_type",
"object_id",
"source",
)
search_fields = ("content", "source")
list_filter = ("content_type", "source")


class ChunkAdmin(admin.ModelAdmin):
Expand All @@ -11,9 +26,11 @@ class ChunkAdmin(admin.ModelAdmin):
list_display = (
"id",
"text",
"content_type",
"context",
)
search_fields = ("text", "object_id")
search_fields = ("text",)
list_filter = ("context__content_type",)


admin.site.register(Context, ContextAdmin)
admin.site.register(Chunk, ChunkAdmin)
8 changes: 2 additions & 6 deletions backend/apps/ai/agent/tools/rag/rag_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,8 @@ def __init__(
ValueError: If the OpenAI API key is not set.

"""
try:
self.retriever = Retriever(embedding_model=embedding_model)
self.generator = Generator(chat_model=chat_model)
except Exception:
logger.exception("Failed to initialize RAG tool")
raise
self.retriever = Retriever(embedding_model=embedding_model)
self.generator = Generator(chat_model=chat_model)

def query(
self,
Expand Down
29 changes: 11 additions & 18 deletions backend/apps/ai/agent/tools/rag/retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
class Retriever:
"""A class for retrieving relevant text chunks for a RAG."""

SUPPORTED_CONTENT_TYPES = ["event", "project", "chapter", "committee", "message"]
SUPPORTED_CONTENT_TYPES = ("event", "project", "chapter", "committee", "message")

def __init__(self, embedding_model: str = "text-embedding-3-small"):
"""Initialize the Retriever.
Expand All @@ -36,7 +36,6 @@ def __init__(self, embedding_model: str = "text-embedding-3-small"):
if not (openai_api_key := os.getenv("DJANGO_OPEN_AI_SECRET_KEY")):
error_msg = "DJANGO_OPEN_AI_SECRET_KEY environment variable not set"
raise ValueError(error_msg)

self.openai_client = openai.OpenAI(api_key=openai_api_key)
self.embedding_model = embedding_model
logger.info("Retriever initialized with embedding model: %s", self.embedding_model)
Expand Down Expand Up @@ -69,7 +68,6 @@ def get_source_name(self, content_object) -> str:
for attr in ("name", "title", "login", "key", "summary"):
if getattr(content_object, attr, None):
return str(getattr(content_object, attr))

return str(content_object)

def get_additional_context(self, content_object, content_type: str) -> dict[str, Any]:
Expand All @@ -85,7 +83,6 @@ def get_additional_context(self, content_object, content_type: str) -> dict[str,
"""
context = {}
clean_content_type = content_type.split(".")[-1] if "." in content_type else content_type

if clean_content_type == "chapter":
context.update(
{
Expand Down Expand Up @@ -178,7 +175,6 @@ def get_additional_context(self, content_object, content_type: str) -> dict[str,
),
}
)

return {k: v for k, v in context.items() if v is not None}

def retrieve(
Expand All @@ -201,51 +197,49 @@ def retrieve(

"""
query_embedding = self.get_query_embedding(query)

if not content_types:
content_types = self.extract_content_types_from_query(query)

queryset = Chunk.objects.annotate(
similarity=1 - CosineDistance("embedding", query_embedding)
).filter(similarity__gte=similarity_threshold)

if content_types:
content_type_query = Q()
for name in content_types:
lower_name = name.lower()
if "." in lower_name:
app_label, model = lower_name.split(".", 1)
content_type_query |= Q(
content_type__app_label=app_label, content_type__model=model
context__content_type__app_label=app_label,
context__content_type__model=model,
)
else:
content_type_query |= Q(content_type__model=lower_name)
content_type_query |= Q(context__content_type__model=lower_name)
queryset = queryset.filter(content_type_query)

chunks = (
queryset.select_related("content_type")
.prefetch_related("content_object")
queryset.select_related("context__content_type")
.prefetch_related("context__content_object")
.order_by("-similarity")[:limit]
)

results = []
for chunk in chunks:
if not chunk.content_object:
if not chunk.context or not chunk.context.content_object:
logger.warning("Content object is None for chunk %s. Skipping.", chunk.id)
continue

source_name = self.get_source_name(chunk.content_object)
source_name = self.get_source_name(chunk.context.content_object)
additional_context = self.get_additional_context(
chunk.content_object, chunk.content_type.model
chunk.context.content_object, chunk.context.content_type.model
)

results.append(
{
"text": chunk.text,
"similarity": float(chunk.similarity),
"source_type": chunk.content_type.model,
"source_type": chunk.context.content_type.model,
"source_name": source_name,
"source_id": chunk.object_id,
"source_id": chunk.context.object_id,
"additional_context": additional_context,
}
)
Expand All @@ -262,7 +256,6 @@ def extract_content_types_from_query(self, query: str) -> list[str]:
A list of detected content type names.

"""
detected_types = []
query_words = set(re.findall(r"\b\w+\b", query.lower()))

detected_types = [
Expand Down
9 changes: 8 additions & 1 deletion backend/apps/ai/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MIN_REQUEST_INTERVAL_SECONDS,
)
from apps.ai.models.chunk import Chunk
from apps.ai.models.context import Context

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -43,6 +44,12 @@ def create_chunks_and_embeddings(
model="text-embedding-3-small",
)

context = Context(
content="\n".join(all_chunk_texts),
content_object=content_object,
)
context.save()

return [
chunk
for text, embedding in zip(
Expand All @@ -53,7 +60,7 @@ def create_chunks_and_embeddings(
if (
chunk := Chunk.update_data(
text=text,
content_object=content_object,
context=context,
embedding=embedding,
save=False,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Generated by Django 5.2.4 on 2025-07-28 09:00

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ai", "0004_alter_chunk_unique_together_chunk_content_type_and_more"),
("contenttypes", "0002_remove_content_type_name"),
]

operations = [
migrations.CreateModel(
name="Context",
fields=[
(
"id",
models.BigAutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("nest_created_at", models.DateTimeField(auto_now_add=True)),
("nest_updated_at", models.DateTimeField(auto_now=True)),
("generated_text", models.TextField(verbose_name="Generated Text")),
("object_id", models.PositiveIntegerField(default=0)),
("source", models.CharField(blank=True, default="", max_length=100)),
(
"content_type",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
to="contenttypes.contenttype",
),
),
],
options={
"verbose_name": "Context",
"db_table": "ai_contexts",
},
),
migrations.AlterUniqueTogether(
name="chunk",
unique_together=set(),
),
migrations.AddField(
model_name="chunk",
name="context",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="chunks",
to="ai.context",
),
),
migrations.AlterUniqueTogether(
name="chunk",
unique_together={("context", "text")},
),
migrations.RemoveField(
model_name="chunk",
name="content_type",
),
migrations.RemoveField(
model_name="chunk",
name="object_id",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 5.2.4 on 2025-07-30 12:49

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ai", "0005_context_alter_chunk_unique_together_chunk_context_and_more"),
]

operations = [
migrations.RenameField(
model_name="context",
old_name="generated_text",
new_name="content",
),
migrations.AlterField(
model_name="chunk",
name="context",
field=models.ForeignKey(
default="",
on_delete=django.db.models.deletion.CASCADE,
related_name="chunks",
to="ai.context",
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 5.2.4 on 2025-07-30 18:15

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ai", "0006_rename_generated_text_context_content_and_more"),
("contenttypes", "0002_remove_content_type_name"),
]

operations = [
migrations.AlterField(
model_name="chunk",
name="context",
field=models.ForeignKey(
default=None,
on_delete=django.db.models.deletion.CASCADE,
related_name="chunks",
to="ai.context",
),
),
migrations.AlterUniqueTogether(
name="context",
unique_together={("content_type", "object_id")},
),
]
Loading