From 9627d1ea8194f09c6b3d58d5e9042f76469974e1 Mon Sep 17 00:00:00 2001 From: Dishant1804 Date: Sun, 22 Jun 2025 11:12:51 +0530 Subject: [PATCH 1/8] chunk model and embeddings --- backend/Makefile | 4 + backend/apps/slack/admin.py | 14 + .../commands/slack_create_chunks.py | 130 +++ backend/apps/slack/migrations/0018_chunk.py | 49 ++ backend/apps/slack/models/__init__.py | 1 + backend/apps/slack/models/chunk.py | 70 ++ backend/poetry.lock | 755 +++++++++++++++++- backend/pyproject.toml | 3 + backend/tests/slack/models/chunk_test.py | 193 +++++ docker-compose/local.yaml | 2 +- docker-compose/production.yaml | 2 +- docker-compose/staging.yaml | 2 +- 12 files changed, 1207 insertions(+), 18 deletions(-) create mode 100644 backend/apps/slack/management/commands/slack_create_chunks.py create mode 100644 backend/apps/slack/migrations/0018_chunk.py create mode 100644 backend/apps/slack/models/chunk.py create mode 100644 backend/tests/slack/models/chunk_test.py diff --git a/backend/Makefile b/backend/Makefile index 7f5983f184..cbd4fc46db 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -175,6 +175,10 @@ slack-sync-messages: @echo "Syncing Slack messages" @CMD="python manage.py slack_sync_messages" $(MAKE) exec-backend-command +slack-create-message-chunks: + @echo "creating message chunks" + @CMD="python manage.py slack_create_chunks" $(MAKE) exec-backend-command + sync-data: \ update-data \ enrich-data \ diff --git a/backend/apps/slack/admin.py b/backend/apps/slack/admin.py index 44687f3dbc..13a76b95af 100644 --- a/backend/apps/slack/admin.py +++ b/backend/apps/slack/admin.py @@ -2,6 +2,7 @@ from django.contrib import admin, messages +from apps.slack.models.chunk import Chunk from apps.slack.models.conversation import Conversation from apps.slack.models.event import Event from apps.slack.models.member import Member @@ -9,6 +10,18 @@ from apps.slack.models.workspace import Workspace +class ChunkAdmin(admin.ModelAdmin): + list_display = ( + "id", + "message", + "chunk_text", + ) + search_fields = ( + "message__slack_message_id", + "chunk_text", + ) + + class ConversationAdmin(admin.ModelAdmin): list_display = ( "name", @@ -161,6 +174,7 @@ class WorkspaceAdmin(admin.ModelAdmin): ) +admin.site.register(Chunk, ChunkAdmin) admin.site.register(Conversation, ConversationAdmin) admin.site.register(Event, EventAdmin) admin.site.register(Member, MemberAdmin) diff --git a/backend/apps/slack/management/commands/slack_create_chunks.py b/backend/apps/slack/management/commands/slack_create_chunks.py new file mode 100644 index 0000000000..5e9bb544ca --- /dev/null +++ b/backend/apps/slack/management/commands/slack_create_chunks.py @@ -0,0 +1,130 @@ +"""A command to create chunks of Slack messages.""" + +import os +import re + +import openai +from django.core.management.base import BaseCommand +from langchain.text_splitter import RecursiveCharacterTextSplitter + +from apps.slack.models.chunk import Chunk +from apps.slack.models.message import Message + + +class Command(BaseCommand): + help = "Create chunks for Slack messages" + + def handle(self, *args, **options): + openai_api_key = os.getenv("DJANGO_OPEN_AI_SECRET_KEY") + + if not openai_api_key: + self.stdout.write( + self.style.ERROR("DJANGO_OPEN_AI_SECRET_KEY environment variable not set") + ) + return + + self.openai_client = openai.OpenAI(api_key=openai_api_key) + + total_messages = Message.objects.count() + print(f"Found {total_messages} messages to process") + + batch_size = 1000 + processed_count = 0 + + for offset in range(0, total_messages, batch_size): + batch_messages = Message.objects.all()[offset : offset + batch_size] + batch_chunks = [] + + for message in batch_messages: + cleaned_text = self.clean_message_text(message.raw_data.get("text", "")) + chunks = self.create_chunks_from_message(message, cleaned_text) + batch_chunks.extend(chunks) + + if batch_chunks: + Chunk.bulk_save(batch_chunks) + + processed_count += len(batch_messages) + + self.stdout.write(f"Completed processing all {total_messages} messages") + + def create_chunks_from_message( + self, message: Message, cleaned_text: str + ) -> list[Chunk | None]: + """Create chunks from a message.""" + if message.raw_data.get("subtype") in ["channel_join", "channel_leave"]: + return [] + + chunk_texts = self.split_message_text(cleaned_text) + + if not chunk_texts: + self.stdout.write( + f"No chunks created for message {message.slack_message_id} - text too short" + ) + return [] + + try: + response = self.openai_client.embeddings.create( + model="text-embedding-3-small", input=chunk_texts + ) + embeddings = [d.embedding for d in response.data] + return [ + Chunk.update_data( + chunk_text=text, + message=message, + embedding=embedding, + save=False, + ) + for text, embedding in zip(chunk_texts, embeddings, strict=True) + ] + except openai.error.OpenAIError as e: + self.stdout.write( + self.style.ERROR(f"OpenAI API error for message {message.slack_message_id}: {e}") + ) + return [] + + def split_message_text(self, message_text: str) -> list[str]: + """Split message text into chunks.""" + splitter = RecursiveCharacterTextSplitter( + chunk_size=300, + chunk_overlap=40, + length_function=len, + separators=["\n\n", "\n", " ", ""], + ) + return splitter.split_text(message_text) + + def clean_message_text(self, message_text: str) -> str: + """Clean message text by removing emojis and other noise while preserving context.""" + if not message_text: + return "" + + emoji_pattern = re.compile( + "[" + "\U0001f600-\U0001f64f" + "\U0001f300-\U0001f5ff" + "\U0001f680-\U0001f6ff" + "\U0001f1e0-\U0001f1ff" + "\U00002500-\U00002bef" + "\U00002702-\U000027b0" + "\U00002702-\U000027b0" + "\U000024c2-\U0001f251" + "\U0001f926-\U0001f937" + "\U00010000-\U0010ffff" + "\u2640-\u2642" + "\u2600-\u2b55" + "\u200d" + "\u23cf" + "\u23e9" + "\u231a" + "\ufe0f" + "\u3030" + "]+", + flags=re.UNICODE, + ) + + cleaned_text = re.sub(r"<@U[A-Z0-9]+>", "", message_text) + cleaned_text = re.sub(r"]+>", "", cleaned_text) + cleaned_text = re.sub(r":\w+:", "", cleaned_text) + cleaned_text = emoji_pattern.sub("", cleaned_text) + cleaned_text = re.sub(r"\s+", " ", cleaned_text) + + return cleaned_text.strip() diff --git a/backend/apps/slack/migrations/0018_chunk.py b/backend/apps/slack/migrations/0018_chunk.py new file mode 100644 index 0000000000..9a9f4d5759 --- /dev/null +++ b/backend/apps/slack/migrations/0018_chunk.py @@ -0,0 +1,49 @@ +# Generated by Django 5.2.3 on 2025-06-22 05:31 + +import django.db.models.deletion +import pgvector.django.vector +from django.db import migrations, models +from pgvector.django import VectorExtension + + +class Migration(migrations.Migration): + dependencies = [ + ("slack", "0017_remove_message_text"), + ] + + operations = [ + VectorExtension(), + migrations.CreateModel( + name="Chunk", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("nest_created_at", models.DateTimeField(auto_now_add=True)), + ("nest_updated_at", models.DateTimeField(auto_now=True)), + ("chunk_text", models.TextField(verbose_name="Chunk Text")), + ( + "embedding", + pgvector.django.vector.VectorField( + dimensions=1536, verbose_name="Chunk Embedding" + ), + ), + ( + "message", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="chunks", + to="slack.message", + ), + ), + ], + options={ + "verbose_name": "Chunks", + "db_table": "slack_chunks", + "unique_together": {("message", "chunk_text")}, + }, + ), + ] diff --git a/backend/apps/slack/models/__init__.py b/backend/apps/slack/models/__init__.py index 3bbe0878de..b66e6f5c40 100644 --- a/backend/apps/slack/models/__init__.py +++ b/backend/apps/slack/models/__init__.py @@ -1,3 +1,4 @@ +from .chunk import Chunk from .conversation import Conversation from .event import Event from .member import Member diff --git a/backend/apps/slack/models/chunk.py b/backend/apps/slack/models/chunk.py new file mode 100644 index 0000000000..4a966866f0 --- /dev/null +++ b/backend/apps/slack/models/chunk.py @@ -0,0 +1,70 @@ +"""Slack app chunk model.""" + +from django.db import models +from pgvector.django import VectorField + +from apps.common.models import BulkSaveModel, TimestampedModel +from apps.common.utils import truncate +from apps.slack.models.message import Message + + +class Chunk(TimestampedModel): + """Slack Chunk model.""" + + class Meta: + db_table = "slack_chunks" + verbose_name = "Chunks" + unique_together = ("message", "chunk_text") + + message = models.ForeignKey(Message, on_delete=models.CASCADE, related_name="chunks") + chunk_text = models.TextField(verbose_name="Chunk Text") + embedding = VectorField(verbose_name="Chunk Embedding", dimensions=1536) + + def __str__(self): + """Human readable representation.""" + text_preview = truncate(self.chunk_text, 50) + return f"Chunk {self.id} for Message {self.message.slack_message_id}: {text_preview}" + + def from_chunk(self, chunk_text: str, message: Message, embedding=None) -> None: + """Update instance based on chunk data.""" + self.chunk_text = chunk_text + self.message = message + self.embedding = embedding + + @staticmethod + def bulk_save(chunks, fields=None): + """Bulk save chunks.""" + chunks = [chunk for chunk in chunks if chunk is not None] + if chunks: + BulkSaveModel.bulk_save(Chunk, chunks, fields=fields) + + @staticmethod + def update_data( + chunk_text: str, + message: Message, + embedding, + *, + save: bool = True, + ) -> "Chunk | None": + """Update chunk data. + + Args: + chunk_text (str): The text content of the chunk. + message (Message): The message this chunk belongs to. + embedding (list): The embedding vector for the chunk. + save (bool): Whether to save the chunk to the database. + + Returns: + Chunk: The updated chunk instance. + + """ + if Chunk.objects.filter(message=message, chunk_text=chunk_text).exists(): + return None + + chunk = Chunk(message=message) + chunk.from_chunk(chunk_text, message, embedding) + + if save: + chunk.save() + + return chunk diff --git a/backend/poetry.lock b/backend/poetry.lock index c4d230de4b..87140efd56 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -252,18 +252,18 @@ tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" a [[package]] name = "boto3" -version = "1.38.40" +version = "1.38.41" description = "The AWS SDK for Python" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "boto3-1.38.40-py3-none-any.whl", hash = "sha256:a43cad12c18607ae9addfc0a98366aae5762b1a4880529f82295b21473686433"}, - {file = "boto3-1.38.40.tar.gz", hash = "sha256:fcef3e08513d276c97d72d5e7ab8f3ce9950170784b9b5cf4fab327cdb577503"}, + {file = "boto3-1.38.41-py3-none-any.whl", hash = "sha256:6119e9f272b9f004f052ca78ce94d3fe10198bc159ae808f75c0e1b9c07518bd"}, + {file = "boto3-1.38.41.tar.gz", hash = "sha256:c6710fc533c8e1f5d1f025c74ffe1222c3659094cd51c076ec50c201a54c8f22"}, ] [package.dependencies] -botocore = ">=1.38.40,<1.39.0" +botocore = ">=1.38.41,<1.39.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.13.0,<0.14.0" @@ -272,14 +272,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.38.40" +version = "1.38.41" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "botocore-1.38.40-py3-none-any.whl", hash = "sha256:7528f47945502bf4226e629337c2ac2e454e661ac8fd1dc0fbf7f38082930f3f"}, - {file = "botocore-1.38.40.tar.gz", hash = "sha256:aefbfe835a7ebe9bbdd88df3999b0f8f484dd025af4ebb3f3387541316ce4349"}, + {file = "botocore-1.38.41-py3-none-any.whl", hash = "sha256:06069a06f1352accb1f6c9505d6e323753627112be80a9d2e057c6d9c9779ffd"}, + {file = "botocore-1.38.41.tar.gz", hash = "sha256:98e3fed636ebb519320c4b2d078db6fa6099b052b4bb9b5c66632a5a7fe72507"}, ] [package.dependencies] @@ -681,6 +681,22 @@ editorconfig = ">=0.12.2" jsbeautifier = "*" six = ">=1.13.0" +[[package]] +name = "dataclasses-json" +version = "0.6.7" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = "<4.0,>=3.7" +groups = ["main"] +files = [ + {file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"}, + {file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "deprecated" version = "1.2.18" @@ -1108,6 +1124,75 @@ files = [ {file = "graphql_core-3.2.6.tar.gz", hash = "sha256:c08eec22f9e40f0bd61d805907e3b3b1b9a320bc606e23dc145eebca07c8fbab"}, ] +[[package]] +name = "greenlet" +version = "3.2.3" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version == \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")" +files = [ + {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:731e154aba8e757aedd0781d4b240f1225b075b4409f1bb83b05ff410582cf00"}, + {file = "greenlet-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:96c20252c2f792defe9a115d3287e14811036d51e78b3aaddbee23b69b216302"}, + {file = "greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba"}, + {file = "greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34"}, + {file = "greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163"}, + {file = "greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849"}, + {file = "greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36"}, + {file = "greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3"}, + {file = "greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141"}, + {file = "greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a"}, + {file = "greenlet-3.2.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:42efc522c0bd75ffa11a71e09cd8a399d83fafe36db250a87cf1dacfaa15dc64"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d760f9bdfe79bff803bad32b4d8ffb2c1d2ce906313fc10a83976ffb73d64ca7"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8324319cbd7b35b97990090808fdc99c27fe5338f87db50514959f8059999805"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:8c37ef5b3787567d322331d5250e44e42b58c8c713859b8a04c6065f27efbf72"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ce539fb52fb774d0802175d37fcff5c723e2c7d249c65916257f0a940cee8904"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:003c930e0e074db83559edc8705f3a2d066d4aa8c2f198aff1e454946efd0f26"}, + {file = "greenlet-3.2.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7e70ea4384b81ef9e84192e8a77fb87573138aa5d4feee541d8014e452b434da"}, + {file = "greenlet-3.2.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:22eb5ba839c4b2156f18f76768233fe44b23a31decd9cc0d4cc8141c211fd1b4"}, + {file = "greenlet-3.2.3-cp39-cp39-win32.whl", hash = "sha256:4532f0d25df67f896d137431b13f4cdce89f7e3d4a96387a41290910df4d3a57"}, + {file = "greenlet-3.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:aaa7aae1e7f75eaa3ae400ad98f8644bb81e1dc6ba47ce8a93d3f17274e08322"}, + {file = "greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil"] + [[package]] name = "gunicorn" version = "23.0.0" @@ -1189,6 +1274,18 @@ http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "httpx-sse" +version = "0.4.0" +description = "Consume Server-Sent Event (SSE) messages with HTTPX." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721"}, + {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"}, +] + [[package]] name = "humanize" version = "4.12.3" @@ -1394,6 +1491,162 @@ files = [ [package.extras] dev = ["build (==1.2.2.post1)", "coverage (==7.5.4) ; python_version < \"3.9\"", "coverage (==7.8.0) ; python_version >= \"3.9\"", "mypy (==1.14.1) ; python_version < \"3.9\"", "mypy (==1.15.0) ; python_version >= \"3.9\"", "pip (==25.0.1)", "pylint (==3.2.7) ; python_version < \"3.9\"", "pylint (==3.3.6) ; python_version >= \"3.9\"", "ruff (==0.11.2)", "twine (==6.1.0)", "uv (==0.6.11)"] +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +groups = ["main"] +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "3.0.0" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, + {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"}, +] + +[[package]] +name = "langchain" +version = "0.3.26" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "langchain-0.3.26-py3-none-any.whl", hash = "sha256:361bb2e61371024a8c473da9f9c55f4ee50f269c5ab43afdb2b1309cb7ac36cf"}, + {file = "langchain-0.3.26.tar.gz", hash = "sha256:8ff034ee0556d3e45eff1f1e96d0d745ced57858414dba7171c8ebdbeb5580c9"}, +] + +[package.dependencies] +langchain-core = ">=0.3.66,<1.0.0" +langchain-text-splitters = ">=0.3.8,<1.0.0" +langsmith = ">=0.1.17" +pydantic = ">=2.7.4,<3.0.0" +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" + +[package.extras] +anthropic = ["langchain-anthropic"] +aws = ["langchain-aws"] +azure-ai = ["langchain-azure-ai"] +cohere = ["langchain-cohere"] +community = ["langchain-community"] +deepseek = ["langchain-deepseek"] +fireworks = ["langchain-fireworks"] +google-genai = ["langchain-google-genai"] +google-vertexai = ["langchain-google-vertexai"] +groq = ["langchain-groq"] +huggingface = ["langchain-huggingface"] +mistralai = ["langchain-mistralai"] +ollama = ["langchain-ollama"] +openai = ["langchain-openai"] +perplexity = ["langchain-perplexity"] +together = ["langchain-together"] +xai = ["langchain-xai"] + +[[package]] +name = "langchain-community" +version = "0.3.26" +description = "Community contributed LangChain integrations." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "langchain_community-0.3.26-py3-none-any.whl", hash = "sha256:b25a553ee9d44a6c02092a440da6c561a9312c7013ffc25365ac3f8694edb53a"}, + {file = "langchain_community-0.3.26.tar.gz", hash = "sha256:49f9d71dc20bc42ccecd6875d02fafef1be0e211a0b22cecbd678f5fd3719487"}, +] + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +dataclasses-json = ">=0.5.7,<0.7" +httpx-sse = ">=0.4.0,<1.0.0" +langchain = ">=0.3.26,<1.0.0" +langchain-core = ">=0.3.66,<1.0.0" +langsmith = ">=0.1.125" +numpy = {version = ">=2.1.0", markers = "python_version >= \"3.13\""} +pydantic-settings = ">=2.4.0,<3.0.0" +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10" + +[[package]] +name = "langchain-core" +version = "0.3.66" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "langchain_core-0.3.66-py3-none-any.whl", hash = "sha256:65cd6c3659afa4f91de7aa681397a0c53ff9282425c281e53646dd7faf16099e"}, + {file = "langchain_core-0.3.66.tar.gz", hash = "sha256:350c92e792ec1401f4b740d759b95f297710a50de29e1be9fbfff8676ef62117"}, +] + +[package.dependencies] +jsonpatch = ">=1.33,<2.0" +langsmith = ">=0.3.45" +packaging = ">=23.2,<25" +pydantic = ">=2.7.4" +PyYAML = ">=5.3" +tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10.0.0" +typing-extensions = ">=4.7" + +[[package]] +name = "langchain-text-splitters" +version = "0.3.8" +description = "LangChain text splitting utilities" +optional = false +python-versions = "<4.0,>=3.9" +groups = ["main"] +files = [ + {file = "langchain_text_splitters-0.3.8-py3-none-any.whl", hash = "sha256:e75cc0f4ae58dcf07d9f18776400cf8ade27fadd4ff6d264df6278bb302f6f02"}, + {file = "langchain_text_splitters-0.3.8.tar.gz", hash = "sha256:116d4b9f2a22dda357d0b79e30acf005c5518177971c66a9f1ab0edfdb0f912e"}, +] + +[package.dependencies] +langchain-core = ">=0.3.51,<1.0.0" + +[[package]] +name = "langsmith" +version = "0.4.1" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "langsmith-0.4.1-py3-none-any.whl", hash = "sha256:19c4c40bbb6735cb1136c453b2edcde265ca5ba1b108b7e0e3583ec4bda28625"}, + {file = "langsmith-0.4.1.tar.gz", hash = "sha256:ae8ec403fb2b9cabcfc3b0c54556d65555598c85879dac83b009576927f7eb1d"}, +] + +[package.dependencies] +httpx = ">=0.23.0,<1" +orjson = {version = ">=3.9.14,<4.0.0", markers = "platform_python_implementation != \"PyPy\""} +packaging = ">=23.2" +pydantic = {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""} +requests = ">=2,<3" +requests-toolbelt = ">=1.0.0,<2.0.0" +zstandard = ">=0.23.0,<0.24.0" + +[package.extras] +langsmith-pyo3 = ["langsmith-pyo3 (>=0.1.0rc2,<0.2.0)"] +openai-agents = ["openai-agents (>=0.0.3,<0.1)"] +otel = ["opentelemetry-api (>=1.30.0,<2.0.0)", "opentelemetry-exporter-otlp-proto-http (>=1.30.0,<2.0.0)", "opentelemetry-sdk (>=1.30.0,<2.0.0)"] +pytest = ["pytest (>=7.0.0)", "rich (>=13.9.4,<14.0.0)"] + [[package]] name = "lxml" version = "5.4.0" @@ -1630,6 +1883,26 @@ files = [ {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, ] +[[package]] +name = "marshmallow" +version = "3.26.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"}, + {file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"] +docs = ["autodocsumm (==0.2.14)", "furo (==2024.8.6)", "sphinx (==8.1.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.0)", "sphinxext-opengraph (==0.9.1)"] +tests = ["pytest", "simplejson"] + [[package]] name = "multidict" version = "6.5.0" @@ -1750,6 +2023,18 @@ files = [ {file = "multidict-6.5.0.tar.gz", hash = "sha256:942bd8002492ba819426a8d7aefde3189c1b87099cdf18aaaefefcf7f3f7b6d2"}, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -1762,16 +2047,77 @@ files = [ {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, ] +[[package]] +name = "numpy" +version = "2.3.0" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.11" +groups = ["main"] +files = [ + {file = "numpy-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c3c9fdde0fa18afa1099d6257eb82890ea4f3102847e692193b54e00312a9ae9"}, + {file = "numpy-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46d16f72c2192da7b83984aa5455baee640e33a9f1e61e656f29adf55e406c2b"}, + {file = "numpy-2.3.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a0be278be9307c4ab06b788f2a077f05e180aea817b3e41cebbd5aaf7bd85ed3"}, + {file = "numpy-2.3.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:99224862d1412d2562248d4710126355d3a8db7672170a39d6909ac47687a8a4"}, + {file = "numpy-2.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2393a914db64b0ead0ab80c962e42d09d5f385802006a6c87835acb1f58adb96"}, + {file = "numpy-2.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:7729c8008d55e80784bd113787ce876ca117185c579c0d626f59b87d433ea779"}, + {file = "numpy-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:06d4fb37a8d383b769281714897420c5cc3545c79dc427df57fc9b852ee0bf58"}, + {file = "numpy-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c39ec392b5db5088259c68250e342612db82dc80ce044cf16496cf14cf6bc6f8"}, + {file = "numpy-2.3.0-cp311-cp311-win32.whl", hash = "sha256:ee9d3ee70d62827bc91f3ea5eee33153212c41f639918550ac0475e3588da59f"}, + {file = "numpy-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:43c55b6a860b0eb44d42341438b03513cf3879cb3617afb749ad49307e164edd"}, + {file = "numpy-2.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:2e6a1409eee0cb0316cb64640a49a49ca44deb1a537e6b1121dc7c458a1299a8"}, + {file = "numpy-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:389b85335838155a9076e9ad7f8fdba0827496ec2d2dc32ce69ce7898bde03ba"}, + {file = "numpy-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9498f60cd6bb8238d8eaf468a3d5bb031d34cd12556af53510f05fcf581c1b7e"}, + {file = "numpy-2.3.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:622a65d40d8eb427d8e722fd410ac3ad4958002f109230bc714fa551044ebae2"}, + {file = "numpy-2.3.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b9446d9d8505aadadb686d51d838f2b6688c9e85636a0c3abaeb55ed54756459"}, + {file = "numpy-2.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:50080245365d75137a2bf46151e975de63146ae6d79f7e6bd5c0e85c9931d06a"}, + {file = "numpy-2.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c24bb4113c66936eeaa0dc1e47c74770453d34f46ee07ae4efd853a2ed1ad10a"}, + {file = "numpy-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4d8d294287fdf685281e671886c6dcdf0291a7c19db3e5cb4178d07ccf6ecc67"}, + {file = "numpy-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6295f81f093b7f5769d1728a6bd8bf7466de2adfa771ede944ce6711382b89dc"}, + {file = "numpy-2.3.0-cp312-cp312-win32.whl", hash = "sha256:e6648078bdd974ef5d15cecc31b0c410e2e24178a6e10bf511e0557eed0f2570"}, + {file = "numpy-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:0898c67a58cdaaf29994bc0e2c65230fd4de0ac40afaf1584ed0b02cd74c6fdd"}, + {file = "numpy-2.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:bd8df082b6c4695753ad6193018c05aac465d634834dca47a3ae06d4bb22d9ea"}, + {file = "numpy-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5754ab5595bfa2c2387d241296e0381c21f44a4b90a776c3c1d39eede13a746a"}, + {file = "numpy-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d11fa02f77752d8099573d64e5fe33de3229b6632036ec08f7080f46b6649959"}, + {file = "numpy-2.3.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:aba48d17e87688a765ab1cd557882052f238e2f36545dfa8e29e6a91aef77afe"}, + {file = "numpy-2.3.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4dc58865623023b63b10d52f18abaac3729346a7a46a778381e0e3af4b7f3beb"}, + {file = "numpy-2.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:df470d376f54e052c76517393fa443758fefcdd634645bc9c1f84eafc67087f0"}, + {file = "numpy-2.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:87717eb24d4a8a64683b7a4e91ace04e2f5c7c77872f823f02a94feee186168f"}, + {file = "numpy-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d8fa264d56882b59dcb5ea4d6ab6f31d0c58a57b41aec605848b6eb2ef4a43e8"}, + {file = "numpy-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e651756066a0eaf900916497e20e02fe1ae544187cb0fe88de981671ee7f6270"}, + {file = "numpy-2.3.0-cp313-cp313-win32.whl", hash = "sha256:e43c3cce3b6ae5f94696669ff2a6eafd9a6b9332008bafa4117af70f4b88be6f"}, + {file = "numpy-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:81ae0bf2564cf475f94be4a27ef7bcf8af0c3e28da46770fc904da9abd5279b5"}, + {file = "numpy-2.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:c8738baa52505fa6e82778580b23f945e3578412554d937093eac9205e845e6e"}, + {file = "numpy-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:39b27d8b38942a647f048b675f134dd5a567f95bfff481f9109ec308515c51d8"}, + {file = "numpy-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0eba4a1ea88f9a6f30f56fdafdeb8da3774349eacddab9581a21234b8535d3d3"}, + {file = "numpy-2.3.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:b0f1f11d0a1da54927436505a5a7670b154eac27f5672afc389661013dfe3d4f"}, + {file = "numpy-2.3.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:690d0a5b60a47e1f9dcec7b77750a4854c0d690e9058b7bef3106e3ae9117808"}, + {file = "numpy-2.3.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:8b51ead2b258284458e570942137155978583e407babc22e3d0ed7af33ce06f8"}, + {file = "numpy-2.3.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:aaf81c7b82c73bd9b45e79cfb9476cb9c29e937494bfe9092c26aece812818ad"}, + {file = "numpy-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f420033a20b4f6a2a11f585f93c843ac40686a7c3fa514060a97d9de93e5e72b"}, + {file = "numpy-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d344ca32ab482bcf8735d8f95091ad081f97120546f3d250240868430ce52555"}, + {file = "numpy-2.3.0-cp313-cp313t-win32.whl", hash = "sha256:48a2e8eaf76364c32a1feaa60d6925eaf32ed7a040183b807e02674305beef61"}, + {file = "numpy-2.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ba17f93a94e503551f154de210e4d50c5e3ee20f7e7a1b5f6ce3f22d419b93bb"}, + {file = "numpy-2.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f14e016d9409680959691c109be98c436c6249eaf7f118b424679793607b5944"}, + {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:80b46117c7359de8167cc00a2c7d823bdd505e8c7727ae0871025a86d668283b"}, + {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:5814a0f43e70c061f47abd5857d120179609ddc32a613138cbb6c4e9e2dbdda5"}, + {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:ef6c1e88fd6b81ac6d215ed71dc8cd027e54d4bf1d2682d362449097156267a2"}, + {file = "numpy-2.3.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33a5a12a45bb82d9997e2c0b12adae97507ad7c347546190a18ff14c28bbca12"}, + {file = "numpy-2.3.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:54dfc8681c1906d239e95ab1508d0a533c4a9505e52ee2d71a5472b04437ef97"}, + {file = "numpy-2.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:e017a8a251ff4d18d71f139e28bdc7c31edba7a507f72b1414ed902cbe48c74d"}, + {file = "numpy-2.3.0.tar.gz", hash = "sha256:581f87f9e9e9db2cba2141400e160e9dd644ee248788d6f90636eeb8fd9260a6"}, +] + [[package]] name = "openai" -version = "1.88.0" +version = "1.90.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "openai-1.88.0-py3-none-any.whl", hash = "sha256:7edd7826b3b83f5846562a6f310f040c79576278bf8e3687b30ba05bb5dff978"}, - {file = "openai-1.88.0.tar.gz", hash = "sha256:122d35e42998255cf1fc84560f6ee49a844e65c054cd05d3e42fda506b832bb1"}, + {file = "openai-1.90.0-py3-none-any.whl", hash = "sha256:e5dcb5498ea6b42fec47546d10f1bcc05fb854219a7d953a5ba766718b212a02"}, + {file = "openai-1.90.0.tar.gz", hash = "sha256:9771982cdd5b6631af68c6a603da72ed44cd2caf73b49f717a72b71374bc565b"}, ] [package.dependencies] @@ -1785,20 +2131,104 @@ tqdm = ">4" typing-extensions = ">=4.11,<5" [package.extras] +aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.6)"] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] realtime = ["websockets (>=13,<16)"] voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"] +[[package]] +name = "orjson" +version = "3.10.18" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "platform_python_implementation != \"PyPy\"" +files = [ + {file = "orjson-3.10.18-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a45e5d68066b408e4bc383b6e4ef05e717c65219a9e1390abc6155a520cac402"}, + {file = "orjson-3.10.18-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be3b9b143e8b9db05368b13b04c84d37544ec85bb97237b3a923f076265ec89c"}, + {file = "orjson-3.10.18-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9b0aa09745e2c9b3bf779b096fa71d1cc2d801a604ef6dd79c8b1bfef52b2f92"}, + {file = "orjson-3.10.18-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53a245c104d2792e65c8d225158f2b8262749ffe64bc7755b00024757d957a13"}, + {file = "orjson-3.10.18-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9495ab2611b7f8a0a8a505bcb0f0cbdb5469caafe17b0e404c3c746f9900469"}, + {file = "orjson-3.10.18-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73be1cbcebadeabdbc468f82b087df435843c809cd079a565fb16f0f3b23238f"}, + {file = "orjson-3.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe8936ee2679e38903df158037a2f1c108129dee218975122e37847fb1d4ac68"}, + {file = "orjson-3.10.18-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7115fcbc8525c74e4c2b608129bef740198e9a120ae46184dac7683191042056"}, + {file = "orjson-3.10.18-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:771474ad34c66bc4d1c01f645f150048030694ea5b2709b87d3bda273ffe505d"}, + {file = "orjson-3.10.18-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:7c14047dbbea52886dd87169f21939af5d55143dad22d10db6a7514f058156a8"}, + {file = "orjson-3.10.18-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:641481b73baec8db14fdf58f8967e52dc8bda1f2aba3aa5f5c1b07ed6df50b7f"}, + {file = "orjson-3.10.18-cp310-cp310-win32.whl", hash = "sha256:607eb3ae0909d47280c1fc657c4284c34b785bae371d007595633f4b1a2bbe06"}, + {file = "orjson-3.10.18-cp310-cp310-win_amd64.whl", hash = "sha256:8770432524ce0eca50b7efc2a9a5f486ee0113a5fbb4231526d414e6254eba92"}, + {file = "orjson-3.10.18-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e0a183ac3b8e40471e8d843105da6fbe7c070faab023be3b08188ee3f85719b8"}, + {file = "orjson-3.10.18-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:5ef7c164d9174362f85238d0cd4afdeeb89d9e523e4651add6a5d458d6f7d42d"}, + {file = "orjson-3.10.18-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afd14c5d99cdc7bf93f22b12ec3b294931518aa019e2a147e8aa2f31fd3240f7"}, + {file = "orjson-3.10.18-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7b672502323b6cd133c4af6b79e3bea36bad2d16bca6c1f645903fce83909a7a"}, + {file = "orjson-3.10.18-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:51f8c63be6e070ec894c629186b1c0fe798662b8687f3d9fdfa5e401c6bd7679"}, + {file = "orjson-3.10.18-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9478ade5313d724e0495d167083c6f3be0dd2f1c9c8a38db9a9e912cdaf947"}, + {file = "orjson-3.10.18-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:187aefa562300a9d382b4b4eb9694806e5848b0cedf52037bb5c228c61bb66d4"}, + {file = "orjson-3.10.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9da552683bc9da222379c7a01779bddd0ad39dd699dd6300abaf43eadee38334"}, + {file = "orjson-3.10.18-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e450885f7b47a0231979d9c49b567ed1c4e9f69240804621be87c40bc9d3cf17"}, + {file = "orjson-3.10.18-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:5e3c9cc2ba324187cd06287ca24f65528f16dfc80add48dc99fa6c836bb3137e"}, + {file = "orjson-3.10.18-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:50ce016233ac4bfd843ac5471e232b865271d7d9d44cf9d33773bcd883ce442b"}, + {file = "orjson-3.10.18-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b3ceff74a8f7ffde0b2785ca749fc4e80e4315c0fd887561144059fb1c138aa7"}, + {file = "orjson-3.10.18-cp311-cp311-win32.whl", hash = "sha256:fdba703c722bd868c04702cac4cb8c6b8ff137af2623bc0ddb3b3e6a2c8996c1"}, + {file = "orjson-3.10.18-cp311-cp311-win_amd64.whl", hash = "sha256:c28082933c71ff4bc6ccc82a454a2bffcef6e1d7379756ca567c772e4fb3278a"}, + {file = "orjson-3.10.18-cp311-cp311-win_arm64.whl", hash = "sha256:a6c7c391beaedd3fa63206e5c2b7b554196f14debf1ec9deb54b5d279b1b46f5"}, + {file = "orjson-3.10.18-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:50c15557afb7f6d63bc6d6348e0337a880a04eaa9cd7c9d569bcb4e760a24753"}, + {file = "orjson-3.10.18-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:356b076f1662c9813d5fa56db7d63ccceef4c271b1fb3dd522aca291375fcf17"}, + {file = "orjson-3.10.18-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:559eb40a70a7494cd5beab2d73657262a74a2c59aff2068fdba8f0424ec5b39d"}, + {file = "orjson-3.10.18-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f3c29eb9a81e2fbc6fd7ddcfba3e101ba92eaff455b8d602bf7511088bbc0eae"}, + {file = "orjson-3.10.18-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6612787e5b0756a171c7d81ba245ef63a3533a637c335aa7fcb8e665f4a0966f"}, + {file = "orjson-3.10.18-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ac6bd7be0dcab5b702c9d43d25e70eb456dfd2e119d512447468f6405b4a69c"}, + {file = "orjson-3.10.18-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f72f100cee8dde70100406d5c1abba515a7df926d4ed81e20a9730c062fe9ad"}, + {file = "orjson-3.10.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dca85398d6d093dd41dc0983cbf54ab8e6afd1c547b6b8a311643917fbf4e0c"}, + {file = "orjson-3.10.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:22748de2a07fcc8781a70edb887abf801bb6142e6236123ff93d12d92db3d406"}, + {file = "orjson-3.10.18-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:3a83c9954a4107b9acd10291b7f12a6b29e35e8d43a414799906ea10e75438e6"}, + {file = "orjson-3.10.18-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:303565c67a6c7b1f194c94632a4a39918e067bd6176a48bec697393865ce4f06"}, + {file = "orjson-3.10.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:86314fdb5053a2f5a5d881f03fca0219bfdf832912aa88d18676a5175c6916b5"}, + {file = "orjson-3.10.18-cp312-cp312-win32.whl", hash = "sha256:187ec33bbec58c76dbd4066340067d9ece6e10067bb0cc074a21ae3300caa84e"}, + {file = "orjson-3.10.18-cp312-cp312-win_amd64.whl", hash = "sha256:f9f94cf6d3f9cd720d641f8399e390e7411487e493962213390d1ae45c7814fc"}, + {file = "orjson-3.10.18-cp312-cp312-win_arm64.whl", hash = "sha256:3d600be83fe4514944500fa8c2a0a77099025ec6482e8087d7659e891f23058a"}, + {file = "orjson-3.10.18-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:69c34b9441b863175cc6a01f2935de994025e773f814412030f269da4f7be147"}, + {file = "orjson-3.10.18-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:1ebeda919725f9dbdb269f59bc94f861afbe2a27dce5608cdba2d92772364d1c"}, + {file = "orjson-3.10.18-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5adf5f4eed520a4959d29ea80192fa626ab9a20b2ea13f8f6dc58644f6927103"}, + {file = "orjson-3.10.18-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7592bb48a214e18cd670974f289520f12b7aed1fa0b2e2616b8ed9e069e08595"}, + {file = "orjson-3.10.18-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f872bef9f042734110642b7a11937440797ace8c87527de25e0c53558b579ccc"}, + {file = "orjson-3.10.18-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0315317601149c244cb3ecef246ef5861a64824ccbcb8018d32c66a60a84ffbc"}, + {file = "orjson-3.10.18-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0da26957e77e9e55a6c2ce2e7182a36a6f6b180ab7189315cb0995ec362e049"}, + {file = "orjson-3.10.18-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb70d489bc79b7519e5803e2cc4c72343c9dc1154258adf2f8925d0b60da7c58"}, + {file = "orjson-3.10.18-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e9e86a6af31b92299b00736c89caf63816f70a4001e750bda179e15564d7a034"}, + {file = "orjson-3.10.18-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:c382a5c0b5931a5fc5405053d36c1ce3fd561694738626c77ae0b1dfc0242ca1"}, + {file = "orjson-3.10.18-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8e4b2ae732431127171b875cb2668f883e1234711d3c147ffd69fe5be51a8012"}, + {file = "orjson-3.10.18-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d808e34ddb24fc29a4d4041dcfafbae13e129c93509b847b14432717d94b44f"}, + {file = "orjson-3.10.18-cp313-cp313-win32.whl", hash = "sha256:ad8eacbb5d904d5591f27dee4031e2c1db43d559edb8f91778efd642d70e6bea"}, + {file = "orjson-3.10.18-cp313-cp313-win_amd64.whl", hash = "sha256:aed411bcb68bf62e85588f2a7e03a6082cc42e5a2796e06e72a962d7c6310b52"}, + {file = "orjson-3.10.18-cp313-cp313-win_arm64.whl", hash = "sha256:f54c1385a0e6aba2f15a40d703b858bedad36ded0491e55d35d905b2c34a4cc3"}, + {file = "orjson-3.10.18-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c95fae14225edfd699454e84f61c3dd938df6629a00c6ce15e704f57b58433bb"}, + {file = "orjson-3.10.18-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5232d85f177f98e0cefabb48b5e7f60cff6f3f0365f9c60631fecd73849b2a82"}, + {file = "orjson-3.10.18-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2783e121cafedf0d85c148c248a20470018b4ffd34494a68e125e7d5857655d1"}, + {file = "orjson-3.10.18-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e54ee3722caf3db09c91f442441e78f916046aa58d16b93af8a91500b7bbf273"}, + {file = "orjson-3.10.18-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2daf7e5379b61380808c24f6fc182b7719301739e4271c3ec88f2984a2d61f89"}, + {file = "orjson-3.10.18-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7f39b371af3add20b25338f4b29a8d6e79a8c7ed0e9dd49e008228a065d07781"}, + {file = "orjson-3.10.18-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b819ed34c01d88c6bec290e6842966f8e9ff84b7694632e88341363440d4cc0"}, + {file = "orjson-3.10.18-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2f6c57debaef0b1aa13092822cbd3698a1fb0209a9ea013a969f4efa36bdea57"}, + {file = "orjson-3.10.18-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:755b6d61ffdb1ffa1e768330190132e21343757c9aa2308c67257cc81a1a6f5a"}, + {file = "orjson-3.10.18-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ce8d0a875a85b4c8579eab5ac535fb4b2a50937267482be402627ca7e7570ee3"}, + {file = "orjson-3.10.18-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:57b5d0673cbd26781bebc2bf86f99dd19bd5a9cb55f71cc4f66419f6b50f3d77"}, + {file = "orjson-3.10.18-cp39-cp39-win32.whl", hash = "sha256:951775d8b49d1d16ca8818b1f20c4965cae9157e7b562a2ae34d3967b8f21c8e"}, + {file = "orjson-3.10.18-cp39-cp39-win_amd64.whl", hash = "sha256:fdd9d68f83f0bc4406610b1ac68bdcded8c5ee58605cc69e643a06f4d075f429"}, + {file = "orjson-3.10.18.tar.gz", hash = "sha256:e8da3947d92123eda795b68228cafe2724815621fe35e8e320a9e9593a4bcd53"}, +] + [[package]] name = "packaging" -version = "25.0" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" groups = ["main", "test"] files = [ - {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, - {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -1813,6 +2243,21 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] +[[package]] +name = "pgvector" +version = "0.4.1" +description = "pgvector support for Python" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pgvector-0.4.1-py3-none-any.whl", hash = "sha256:34bb4e99e1b13d08a2fe82dda9f860f15ddcd0166fbb25bffe15821cbfeb7362"}, + {file = "pgvector-0.4.1.tar.gz", hash = "sha256:83d3a1c044ff0c2f1e95d13dfb625beb0b65506cfec0941bfe81fd0ad44f4003"}, +] + +[package.dependencies] +numpy = "*" + [[package]] name = "platformdirs" version = "4.3.8" @@ -2197,6 +2642,30 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pydantic-settings" +version = "2.9.1" +description = "Settings management using Pydantic" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef"}, + {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, +] + +[package.dependencies] +pydantic = ">=2.7.0" +python-dotenv = ">=0.21.0" +typing-inspection = ">=0.4.0" + +[package.extras] +aws-secrets-manager = ["boto3 (>=1.35.0)", "boto3-stubs[secretsmanager]"] +azure-key-vault = ["azure-identity (>=1.16.0)", "azure-keyvault-secrets (>=4.8.0)"] +gcp-secret-manager = ["google-cloud-secret-manager (>=2.23.1)"] +toml = ["tomli (>=2.0.1)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "pygithub" version = "2.6.1" @@ -2401,7 +2870,7 @@ version = "1.1.0" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.9" -groups = ["test"] +groups = ["main", "test"] files = [ {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, @@ -2723,6 +3192,21 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main"] +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + [[package]] name = "ruff" version = "0.11.13" @@ -2881,6 +3365,102 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "sqlalchemy" +version = "2.0.41" +description = "Database Abstraction Library" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "SQLAlchemy-2.0.41-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6854175807af57bdb6425e47adbce7d20a4d79bbfd6f6d6519cd10bb7109a7f8"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05132c906066142103b83d9c250b60508af556982a385d96c4eaa9fb9720ac2b"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b4af17bda11e907c51d10686eda89049f9ce5669b08fbe71a29747f1e876036"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:c0b0e5e1b5d9f3586601048dd68f392dc0cc99a59bb5faf18aab057ce00d00b2"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0b3dbf1e7e9bc95f4bac5e2fb6d3fb2f083254c3fdd20a1789af965caf2d2348"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-win32.whl", hash = "sha256:1e3f196a0c59b0cae9a0cd332eb1a4bda4696e863f4f1cf84ab0347992c548c2"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-win_amd64.whl", hash = "sha256:6ab60a5089a8f02009f127806f777fca82581c49e127f08413a66056bd9166dd"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b1f09b6821406ea1f94053f346f28f8215e293344209129a9c0fcc3578598d7b"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1936af879e3db023601196a1684d28e12f19ccf93af01bf3280a3262c4b6b4e5"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2ac41acfc8d965fb0c464eb8f44995770239668956dc4cdf502d1b1ffe0d747"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81c24e0c0fde47a9723c81d5806569cddef103aebbf79dbc9fcbb617153dea30"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23a8825495d8b195c4aa9ff1c430c28f2c821e8c5e2d98089228af887e5d7e29"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:60c578c45c949f909a4026b7807044e7e564adf793537fc762b2489d522f3d11"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-win32.whl", hash = "sha256:118c16cd3f1b00c76d69343e38602006c9cfb9998fa4f798606d28d63f23beda"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-win_amd64.whl", hash = "sha256:7492967c3386df69f80cf67efd665c0f667cee67032090fe01d7d74b0e19bb08"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6375cd674fe82d7aa9816d1cb96ec592bac1726c11e0cafbf40eeee9a4516b5f"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8c9fdd15a55d9465e590a402f42082705d66b05afc3ffd2d2eb3c6ba919560"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f9dc8c44acdee06c8fc6440db9eae8b4af8b01e4b1aee7bdd7241c22edff4f"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c11ceb9a1f482c752a71f203a81858625d8df5746d787a4786bca4ffdf71c6"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:911cc493ebd60de5f285bcae0491a60b4f2a9f0f5c270edd1c4dbaef7a38fc04"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03968a349db483936c249f4d9cd14ff2c296adfa1290b660ba6516f973139582"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-win32.whl", hash = "sha256:293cd444d82b18da48c9f71cd7005844dbbd06ca19be1ccf6779154439eec0b8"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-win_amd64.whl", hash = "sha256:3d3549fc3e40667ec7199033a4e40a2f669898a00a7b18a931d3efb4c7900504"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:90144d3b0c8b139408da50196c5cad2a6909b51b23df1f0538411cd23ffa45d3"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:023b3ee6169969beea3bb72312e44d8b7c27c75b347942d943cf49397b7edeb5"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:725875a63abf7c399d4548e686debb65cdc2549e1825437096a0af1f7e374814"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81965cc20848ab06583506ef54e37cf15c83c7e619df2ad16807c03100745dea"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dd5ec3aa6ae6e4d5b5de9357d2133c07be1aff6405b136dad753a16afb6717dd"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ff8e80c4c4932c10493ff97028decfdb622de69cae87e0f127a7ebe32b4069c6"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-win32.whl", hash = "sha256:4d44522480e0bf34c3d63167b8cfa7289c1c54264c2950cc5fc26e7850967e45"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-win_amd64.whl", hash = "sha256:81eedafa609917040d39aa9332e25881a8e7a0862495fcdf2023a9667209deda"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9a420a91913092d1e20c86a2f5f1fc85c1a8924dbcaf5e0586df8aceb09c9cc2"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:906e6b0d7d452e9a98e5ab8507c0da791856b2380fdee61b765632bb8698026f"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a373a400f3e9bac95ba2a06372c4fd1412a7cee53c37fc6c05f829bf672b8769"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:087b6b52de812741c27231b5a3586384d60c353fbd0e2f81405a814b5591dc8b"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:34ea30ab3ec98355235972dadc497bb659cc75f8292b760394824fab9cf39826"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8280856dd7c6a68ab3a164b4a4b1c51f7691f6d04af4d4ca23d6ecf2261b7923"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-win32.whl", hash = "sha256:b50eab9994d64f4a823ff99a0ed28a6903224ddbe7fef56a6dd865eec9243440"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-win_amd64.whl", hash = "sha256:5e22575d169529ac3e0a120cf050ec9daa94b6a9597993d1702884f6954a7d71"}, + {file = "sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576"}, + {file = "sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9"}, +] + +[package.dependencies] +greenlet = {version = ">=1", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +typing-extensions = ">=4.6.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (>=1)"] +aioodbc = ["aioodbc", "greenlet (>=1)"] +aiosqlite = ["aiosqlite", "greenlet (>=1)", "typing_extensions (!=3.10.0.1)"] +asyncio = ["greenlet (>=1)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (>=1)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5,!=1.1.10)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=8)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (>=1)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3_binary"] + [[package]] name = "sqlparse" version = "0.5.3" @@ -2956,6 +3536,22 @@ strawberry-graphql = ">=0.264.0" debug-toolbar = ["django-debug-toolbar (>=3.4)"] enum = ["django-choices-field (>=2.2.2)"] +[[package]] +name = "tenacity" +version = "9.1.2" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138"}, + {file = "tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + [[package]] name = "thefuzz" version = "0.22.1" @@ -3005,6 +3601,22 @@ files = [ {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "typing-inspection" version = "0.4.1" @@ -3280,7 +3892,120 @@ idna = ">=2.0" multidict = ">=4.0" propcache = ">=0.2.1" +[[package]] +name = "zstandard" +version = "0.23.0" +description = "Zstandard bindings for Python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"}, + {file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77da4c6bfa20dd5ea25cbf12c76f181a8e8cd7ea231c673828d0386b1740b8dc"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2170c7e0367dde86a2647ed5b6f57394ea7f53545746104c6b09fc1f4223573"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c16842b846a8d2a145223f520b7e18b57c8f476924bda92aeee3a88d11cfc391"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:157e89ceb4054029a289fb504c98c6a9fe8010f1680de0201b3eb5dc20aa6d9e"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:203d236f4c94cd8379d1ea61db2fce20730b4c38d7f1c34506a31b34edc87bdd"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dc5d1a49d3f8262be192589a4b72f0d03b72dcf46c51ad5852a4fdc67be7b9e4"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:752bf8a74412b9892f4e5b58f2f890a039f57037f52c89a740757ebd807f33ea"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80080816b4f52a9d886e67f1f96912891074903238fe54f2de8b786f86baded2"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84433dddea68571a6d6bd4fbf8ff398236031149116a7fff6f777ff95cad3df9"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19a2d91963ed9e42b4e8d77cd847ae8381576585bad79dbd0a8837a9f6620a"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:59556bf80a7094d0cfb9f5e50bb2db27fefb75d5138bb16fb052b61b0e0eeeb0"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:27d3ef2252d2e62476389ca8f9b0cf2bbafb082a3b6bfe9d90cbcbb5529ecf7c"}, + {file = "zstandard-0.23.0-cp310-cp310-win32.whl", hash = "sha256:5d41d5e025f1e0bccae4928981e71b2334c60f580bdc8345f824e7c0a4c2a813"}, + {file = "zstandard-0.23.0-cp310-cp310-win_amd64.whl", hash = "sha256:519fbf169dfac1222a76ba8861ef4ac7f0530c35dd79ba5727014613f91613d4"}, + {file = "zstandard-0.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:34895a41273ad33347b2fc70e1bff4240556de3c46c6ea430a7ed91f9042aa4e"}, + {file = "zstandard-0.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77ea385f7dd5b5676d7fd943292ffa18fbf5c72ba98f7d09fc1fb9e819b34c23"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:983b6efd649723474f29ed42e1467f90a35a74793437d0bc64a5bf482bedfa0a"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80a539906390591dd39ebb8d773771dc4db82ace6372c4d41e2d293f8e32b8db"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:445e4cb5048b04e90ce96a79b4b63140e3f4ab5f662321975679b5f6360b90e2"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd30d9c67d13d891f2360b2a120186729c111238ac63b43dbd37a5a40670b8ca"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d20fd853fbb5807c8e84c136c278827b6167ded66c72ec6f9a14b863d809211c"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed1708dbf4d2e3a1c5c69110ba2b4eb6678262028afd6c6fbcc5a8dac9cda68e"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:be9b5b8659dff1f913039c2feee1aca499cfbc19e98fa12bc85e037c17ec6ca5"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:65308f4b4890aa12d9b6ad9f2844b7ee42c7f7a4fd3390425b242ffc57498f48"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:98da17ce9cbf3bfe4617e836d561e433f871129e3a7ac16d6ef4c680f13a839c"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:8ed7d27cb56b3e058d3cf684d7200703bcae623e1dcc06ed1e18ecda39fee003"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:b69bb4f51daf461b15e7b3db033160937d3ff88303a7bc808c67bbc1eaf98c78"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:034b88913ecc1b097f528e42b539453fa82c3557e414b3de9d5632c80439a473"}, + {file = "zstandard-0.23.0-cp311-cp311-win32.whl", hash = "sha256:f2d4380bf5f62daabd7b751ea2339c1a21d1c9463f1feb7fc2bdcea2c29c3160"}, + {file = "zstandard-0.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:62136da96a973bd2557f06ddd4e8e807f9e13cbb0bfb9cc06cfe6d98ea90dfe0"}, + {file = "zstandard-0.23.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4567955a6bc1b20e9c31612e615af6b53733491aeaa19a6b3b37f3b65477094"}, + {file = "zstandard-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e172f57cd78c20f13a3415cc8dfe24bf388614324d25539146594c16d78fcc8"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0e166f698c5a3e914947388c162be2583e0c638a4703fc6a543e23a88dea3c1"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12a289832e520c6bd4dcaad68e944b86da3bad0d339ef7989fb7e88f92e96072"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d50d31bfedd53a928fed6707b15a8dbeef011bb6366297cc435accc888b27c20"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72c68dda124a1a138340fb62fa21b9bf4848437d9ca60bd35db36f2d3345f373"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53dd9d5e3d29f95acd5de6802e909ada8d8d8cfa37a3ac64836f3bc4bc5512db"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6a41c120c3dbc0d81a8e8adc73312d668cd34acd7725f036992b1b72d22c1772"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:40b33d93c6eddf02d2c19f5773196068d875c41ca25730e8288e9b672897c105"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9206649ec587e6b02bd124fb7799b86cddec350f6f6c14bc82a2b70183e708ba"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76e79bc28a65f467e0409098fa2c4376931fd3207fbeb6b956c7c476d53746dd"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:66b689c107857eceabf2cf3d3fc699c3c0fe8ccd18df2219d978c0283e4c508a"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9c236e635582742fee16603042553d276cca506e824fa2e6489db04039521e90"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8fffdbd9d1408006baaf02f1068d7dd1f016c6bcb7538682622c556e7b68e35"}, + {file = "zstandard-0.23.0-cp312-cp312-win32.whl", hash = "sha256:dc1d33abb8a0d754ea4763bad944fd965d3d95b5baef6b121c0c9013eaf1907d"}, + {file = "zstandard-0.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:64585e1dba664dc67c7cdabd56c1e5685233fbb1fc1966cfba2a340ec0dfff7b"}, + {file = "zstandard-0.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:576856e8594e6649aee06ddbfc738fec6a834f7c85bf7cadd1c53d4a58186ef9"}, + {file = "zstandard-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38302b78a850ff82656beaddeb0bb989a0322a8bbb1bf1ab10c17506681d772a"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2240ddc86b74966c34554c49d00eaafa8200a18d3a5b6ffbf7da63b11d74ee2"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ef230a8fd217a2015bc91b74f6b3b7d6522ba48be29ad4ea0ca3a3775bf7dd5"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:774d45b1fac1461f48698a9d4b5fa19a69d47ece02fa469825b442263f04021f"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f77fa49079891a4aab203d0b1744acc85577ed16d767b52fc089d83faf8d8ed"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac184f87ff521f4840e6ea0b10c0ec90c6b1dcd0bad2f1e4a9a1b4fa177982ea"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c363b53e257246a954ebc7c488304b5592b9c53fbe74d03bc1c64dda153fb847"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e7792606d606c8df5277c32ccb58f29b9b8603bf83b48639b7aedf6df4fe8171"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0817825b900fcd43ac5d05b8b3079937073d2b1ff9cf89427590718b70dd840"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9da6bc32faac9a293ddfdcb9108d4b20416219461e4ec64dfea8383cac186690"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fd7699e8fd9969f455ef2926221e0233f81a2542921471382e77a9e2f2b57f4b"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d477ed829077cd945b01fc3115edd132c47e6540ddcd96ca169facff28173057"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33"}, + {file = "zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd"}, + {file = "zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b"}, + {file = "zstandard-0.23.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2ef3775758346d9ac6214123887d25c7061c92afe1f2b354f9388e9e4d48acfc"}, + {file = "zstandard-0.23.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4051e406288b8cdbb993798b9a45c59a4896b6ecee2f875424ec10276a895740"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2d1a054f8f0a191004675755448d12be47fa9bebbcffa3cdf01db19f2d30a54"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f83fa6cae3fff8e98691248c9320356971b59678a17f20656a9e59cd32cee6d8"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32ba3b5ccde2d581b1e6aa952c836a6291e8435d788f656fe5976445865ae045"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f146f50723defec2975fb7e388ae3a024eb7151542d1599527ec2aa9cacb152"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1bfe8de1da6d104f15a60d4a8a768288f66aa953bbe00d027398b93fb9680b26"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:29a2bc7c1b09b0af938b7a8343174b987ae021705acabcbae560166567f5a8db"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:61f89436cbfede4bc4e91b4397eaa3e2108ebe96d05e93d6ccc95ab5714be512"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:53ea7cdc96c6eb56e76bb06894bcfb5dfa93b7adcf59d61c6b92674e24e2dd5e"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:a4ae99c57668ca1e78597d8b06d5af837f377f340f4cce993b551b2d7731778d"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:379b378ae694ba78cef921581ebd420c938936a153ded602c4fea612b7eaa90d"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:50a80baba0285386f97ea36239855f6020ce452456605f262b2d33ac35c7770b"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:61062387ad820c654b6a6b5f0b94484fa19515e0c5116faf29f41a6bc91ded6e"}, + {file = "zstandard-0.23.0-cp38-cp38-win32.whl", hash = "sha256:b8c0bd73aeac689beacd4e7667d48c299f61b959475cdbb91e7d3d88d27c56b9"}, + {file = "zstandard-0.23.0-cp38-cp38-win_amd64.whl", hash = "sha256:a05e6d6218461eb1b4771d973728f0133b2a4613a6779995df557f70794fd60f"}, + {file = "zstandard-0.23.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa014d55c3af933c1315eb4bb06dd0459661cc0b15cd61077afa6489bec63bb"}, + {file = "zstandard-0.23.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7f0804bb3799414af278e9ad51be25edf67f78f916e08afdb983e74161b916"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb2b1ecfef1e67897d336de3a0e3f52478182d6a47eda86cbd42504c5cbd009a"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:837bb6764be6919963ef41235fd56a6486b132ea64afe5fafb4cb279ac44f259"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1516c8c37d3a053b01c1c15b182f3b5f5eef19ced9b930b684a73bad121addf4"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48ef6a43b1846f6025dde6ed9fee0c24e1149c1c25f7fb0a0585572b2f3adc58"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11e3bf3c924853a2d5835b24f03eeba7fc9b07d8ca499e247e06ff5676461a15"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2fb4535137de7e244c230e24f9d1ec194f61721c86ebea04e1581d9d06ea1269"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8c24f21fa2af4bb9f2c492a86fe0c34e6d2c63812a839590edaf177b7398f700"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8c86881813a78a6f4508ef9daf9d4995b8ac2d147dcb1a450448941398091c9"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fe3b385d996ee0822fd46528d9f0443b880d4d05528fd26a9119a54ec3f91c69"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:82d17e94d735c99621bf8ebf9995f870a6b3e6d14543b99e201ae046dfe7de70"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c7c517d74bea1a6afd39aa612fa025e6b8011982a0897768a2f7c8ab4ebb78a2"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1fd7e0f1cfb70eb2f95a19b472ee7ad6d9a0a992ec0ae53286870c104ca939e5"}, + {file = "zstandard-0.23.0-cp39-cp39-win32.whl", hash = "sha256:43da0f0092281bf501f9c5f6f3b4c975a8a0ea82de49ba3f7100e64d422a1274"}, + {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"}, + {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"}, +] + +[package.dependencies] +cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\""} + +[package.extras] +cffi = ["cffi (>=1.11)"] + [metadata] lock-version = "2.1" python-versions = "^3.13" -content-hash = "74b08acb4ffd0b2eb81f79aa7f03a92a4ecd113a770de88d4f64e2f14e81948c" +content-hash = "72d70977fcf98483419b9f7bf7ec52d1f132f60bad1581249257b958ba290f0a" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index fb391bda9a..65973e2809 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -34,9 +34,12 @@ geopy = "^2.4.1" gunicorn = "^23.0.0" humanize = "^4.11.0" jinja2 = "^3.1.6" +langchain = "^0.3.26" +langchain-community = "^0.3.26" lxml = "^5.3.0" markdown = "^3.7" openai = "^1.60.1" +pgvector= "^0.4.1" psycopg2-binary = "^2.9.9" pydantic = "^2.11.1" pydantic-core = "^2.33.0" diff --git a/backend/tests/slack/models/chunk_test.py b/backend/tests/slack/models/chunk_test.py new file mode 100644 index 0000000000..8304417124 --- /dev/null +++ b/backend/tests/slack/models/chunk_test.py @@ -0,0 +1,193 @@ +from unittest.mock import Mock, patch + +from django.db import models + +from apps.slack.models.chunk import Chunk +from apps.slack.models.message import Message + + +def create_model_mock(model_class): + mock = Mock(spec=model_class) + mock._state = Mock() + mock.pk = 1 + return mock + + +class TestChunkModel: + def test_str_method(self): + """Test the string representation of a chunk.""" + mock_message = create_model_mock(Message) + mock_message.slack_message_id = "123456.789" + + chunk = Chunk( + id=1, + chunk_text="This is a test chunk with some content that should be displayed", + message=mock_message, + ) + + result = str(chunk) + assert "Chunk 1 for Message 123456.789:" in result + assert "This is a test chunk with some content that" in result + + def test_from_chunk_method(self): + """Test the from_chunk method updates chunk properties.""" + chunk = Chunk() + mock_message = create_model_mock(Message) + test_text = "Test chunk content" + test_embedding = [0.1, 0.2, 0.3] + + chunk.from_chunk(test_text, mock_message, test_embedding) + + assert chunk.chunk_text == test_text + assert chunk.message == mock_message + assert chunk.embedding == test_embedding + + def test_from_chunk_method_without_embedding(self): + """Test from_chunk method with None embedding.""" + chunk = Chunk() + mock_message = create_model_mock(Message) + test_text = "Test chunk content" + + chunk.from_chunk(test_text, mock_message) + + assert chunk.chunk_text == test_text + assert chunk.message == mock_message + assert chunk.embedding is None + + def test_bulk_save_with_chunks(self): + """Test bulk_save method with valid chunks.""" + mock_chunks = [Mock(), Mock(), Mock()] + + with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: + Chunk.bulk_save(mock_chunks) + mock_bulk_save.assert_called_once_with(Chunk, mock_chunks, fields=None) + + def test_bulk_save_with_none_chunks(self): + """Test bulk_save method filters out None chunks.""" + mock_chunks = [Mock(), None, Mock(), None] + expected_chunks = [mock_chunks[0], mock_chunks[2]] + + with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: + Chunk.bulk_save(mock_chunks) + mock_bulk_save.assert_called_once_with(Chunk, expected_chunks, fields=None) + + def test_bulk_save_with_empty_list(self): + """Test bulk_save method with empty chunk list.""" + with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: + Chunk.bulk_save([]) + mock_bulk_save.assert_not_called() + + def test_bulk_save_with_all_none_chunks(self): + """Test bulk_save method with all None chunks.""" + with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: + Chunk.bulk_save([None, None, None]) + mock_bulk_save.assert_not_called() + + def test_bulk_save_with_fields_parameter(self): + """Test bulk_save method with custom fields parameter.""" + mock_chunks = [Mock(), Mock()] + fields = ["chunk_text", "embedding"] + + with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: + Chunk.bulk_save(mock_chunks, fields=fields) + mock_bulk_save.assert_called_once_with(Chunk, mock_chunks, fields=fields) + + def test_update_data_new_chunk(self, mocker): + """Test update_data method creates new chunk when it doesn't exist.""" + mock_message = create_model_mock(Message) + chunk_text = "Test chunk content" + embedding = [0.1, 0.2, 0.3] + + mocker.patch( + "apps.slack.models.chunk.Chunk.objects.filter", + return_value=Mock(exists=Mock(return_value=False)), + ) + + patched_save = mocker.patch("apps.slack.models.chunk.Chunk.save") + + with patch.object(Chunk, "message", create=True): + result = Chunk.update_data( + chunk_text=chunk_text, message=mock_message, embedding=embedding, save=True + ) + + assert result is not None + assert isinstance(result, Chunk) + assert result.chunk_text == chunk_text + assert result.message == mock_message + assert result.embedding == embedding + patched_save.assert_called_once() + + def test_update_data_existing_chunk(self, mocker): + """Test update_data method returns None when chunk already exists.""" + mock_message = create_model_mock(Message) + chunk_text = "Existing chunk content" + embedding = [0.1, 0.2, 0.3] + + mocker.patch( + "apps.slack.models.chunk.Chunk.objects.filter", + return_value=Mock(exists=Mock(return_value=True)), + ) + + result = Chunk.update_data( + chunk_text=chunk_text, message=mock_message, embedding=embedding, save=True + ) + + assert result is None + + def test_update_data_no_save(self, mocker): + """Test update_data method with save=False.""" + mock_message = create_model_mock(Message) + chunk_text = "Test chunk content" + embedding = [0.1, 0.2, 0.3] + + mocker.patch( + "apps.slack.models.chunk.Chunk.objects.filter", + return_value=Mock(exists=Mock(return_value=False)), + ) + + patched_save = mocker.patch("apps.slack.models.chunk.Chunk.save") + + with patch.object(Chunk, "message", create=True): + result = Chunk.update_data( + chunk_text=chunk_text, message=mock_message, embedding=embedding, save=False + ) + + assert result is not None + assert isinstance(result, Chunk) + assert result.chunk_text == chunk_text + assert result.message == mock_message + assert result.embedding == embedding + patched_save.assert_not_called() + + def test_update_data_with_keyword_save_parameter(self, mocker): + """Test update_data method with keyword-only save parameter.""" + mock_message = create_model_mock(Message) + chunk_text = "Test chunk content" + embedding = [0.1, 0.2, 0.3] + + mocker.patch( + "apps.slack.models.chunk.Chunk.objects.filter", + return_value=Mock(exists=Mock(return_value=False)), + ) + + patched_save = mocker.patch("apps.slack.models.chunk.Chunk.save") + + with patch.object(Chunk, "message", create=True): + result = Chunk.update_data(chunk_text, mock_message, embedding, save=True) + + assert result is not None + patched_save.assert_called_once() + + def test_meta_class_attributes(self): + """Test the Meta class attributes of the Chunk model.""" + assert Chunk._meta.db_table == "slack_chunks" + assert Chunk._meta.verbose_name == "Chunks" + assert ("message", "chunk_text") in Chunk._meta.unique_together + + def test_message_foreign_key_relationship(self): + """Test the foreign key relationship with Message model.""" + message_field = Chunk._meta.get_field("message") + + assert isinstance(message_field, models.ForeignKey) + assert message_field.remote_field.model == Message + assert message_field.remote_field.related_name == "chunks" diff --git a/docker-compose/local.yaml b/docker-compose/local.yaml index 0718cad865..17c8a525f4 100644 --- a/docker-compose/local.yaml +++ b/docker-compose/local.yaml @@ -52,7 +52,7 @@ services: db: container_name: nest-db - image: postgres:16.4 + image: pgvector/pgvector:pg16 environment: POSTGRES_DB: ${DJANGO_DB_NAME:-nest_db_dev} POSTGRES_PASSWORD: ${DJANGO_DB_PASSWORD:-nest_user_dev_password} diff --git a/docker-compose/production.yaml b/docker-compose/production.yaml index 979baab285..37c2f2114d 100644 --- a/docker-compose/production.yaml +++ b/docker-compose/production.yaml @@ -37,7 +37,7 @@ services: production-nest-db: container_name: production-nest-db - image: postgres:16.4 + image: pgvector/pgvector:pg16 env_file: .env.db healthcheck: interval: 5s diff --git a/docker-compose/staging.yaml b/docker-compose/staging.yaml index 5d22a54a94..6f51428a00 100644 --- a/docker-compose/staging.yaml +++ b/docker-compose/staging.yaml @@ -37,7 +37,7 @@ services: staging-nest-db: container_name: staging-nest-db - image: postgres:16.4 + image: pgvector/pgvector:pg16 env_file: .env.db healthcheck: interval: 5s From 79627a71e28a636bfd74a5e60a8666056100cd7a Mon Sep 17 00:00:00 2001 From: Dishant1804 Date: Sun, 22 Jun 2025 13:32:15 +0530 Subject: [PATCH 2/8] draft --- backend/apps/ai/__init__.py | 0 backend/apps/ai/admin.py | 20 + .../apps/ai/management/commands/__init__.py | 0 .../commands/slack_create_chunks.py | 5 +- .../migrations/0001_initial.py} | 6 +- backend/apps/ai/migrations/__init__.py | 0 backend/apps/ai/models/__init__.py | 1 + backend/apps/{slack => ai}/models/chunk.py | 0 backend/apps/slack/admin.py | 14 - backend/apps/slack/models/__init__.py | 1 - backend/settings/base.py | 1 + backend/tests/apps/ai/__init__.py | 0 .../apps/ai/management/commands/__init__.py | 0 .../commands/slack_create_chunks_test.py | 357 ++++++++++++++++++ backend/tests/apps/ai/models/__init__.py | 0 .../{slack => apps/ai}/models/chunk_test.py | 12 +- 16 files changed, 391 insertions(+), 26 deletions(-) create mode 100644 backend/apps/ai/__init__.py create mode 100644 backend/apps/ai/admin.py create mode 100644 backend/apps/ai/management/commands/__init__.py rename backend/apps/{slack => ai}/management/commands/slack_create_chunks.py (96%) rename backend/apps/{slack/migrations/0018_chunk.py => ai/migrations/0001_initial.py} (92%) create mode 100644 backend/apps/ai/migrations/__init__.py create mode 100644 backend/apps/ai/models/__init__.py rename backend/apps/{slack => ai}/models/chunk.py (100%) create mode 100644 backend/tests/apps/ai/__init__.py create mode 100644 backend/tests/apps/ai/management/commands/__init__.py create mode 100644 backend/tests/apps/ai/management/commands/slack_create_chunks_test.py create mode 100644 backend/tests/apps/ai/models/__init__.py rename backend/tests/{slack => apps/ai}/models/chunk_test.py (94%) diff --git a/backend/apps/ai/__init__.py b/backend/apps/ai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/apps/ai/admin.py b/backend/apps/ai/admin.py new file mode 100644 index 0000000000..565a228004 --- /dev/null +++ b/backend/apps/ai/admin.py @@ -0,0 +1,20 @@ +"""AI app admin.""" + +from django.contrib import admin + +from apps.ai.models.chunk import Chunk + + +class ChunkAdmin(admin.ModelAdmin): + list_display = ( + "id", + "message", + "chunk_text", + ) + search_fields = ( + "message__slack_message_id", + "chunk_text", + ) + + +admin.site.register(Chunk, ChunkAdmin) diff --git a/backend/apps/ai/management/commands/__init__.py b/backend/apps/ai/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/apps/slack/management/commands/slack_create_chunks.py b/backend/apps/ai/management/commands/slack_create_chunks.py similarity index 96% rename from backend/apps/slack/management/commands/slack_create_chunks.py rename to backend/apps/ai/management/commands/slack_create_chunks.py index 5e9bb544ca..86f30f4a11 100644 --- a/backend/apps/slack/management/commands/slack_create_chunks.py +++ b/backend/apps/ai/management/commands/slack_create_chunks.py @@ -7,7 +7,7 @@ from django.core.management.base import BaseCommand from langchain.text_splitter import RecursiveCharacterTextSplitter -from apps.slack.models.chunk import Chunk +from apps.ai.models.chunk import Chunk from apps.slack.models.message import Message @@ -26,7 +26,7 @@ def handle(self, *args, **options): self.openai_client = openai.OpenAI(api_key=openai_api_key) total_messages = Message.objects.count() - print(f"Found {total_messages} messages to process") + self.stdout.write(f"Found {total_messages} messages to process") batch_size = 1000 processed_count = 0 @@ -105,7 +105,6 @@ def clean_message_text(self, message_text: str) -> str: "\U0001f1e0-\U0001f1ff" "\U00002500-\U00002bef" "\U00002702-\U000027b0" - "\U00002702-\U000027b0" "\U000024c2-\U0001f251" "\U0001f926-\U0001f937" "\U00010000-\U0010ffff" diff --git a/backend/apps/slack/migrations/0018_chunk.py b/backend/apps/ai/migrations/0001_initial.py similarity index 92% rename from backend/apps/slack/migrations/0018_chunk.py rename to backend/apps/ai/migrations/0001_initial.py index 9a9f4d5759..238ec0048e 100644 --- a/backend/apps/slack/migrations/0018_chunk.py +++ b/backend/apps/ai/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.2.3 on 2025-06-22 05:31 +# Generated by Django 5.2.3 on 2025-06-22 06:17 import django.db.models.deletion import pgvector.django.vector @@ -7,8 +7,10 @@ class Migration(migrations.Migration): + initial = True + dependencies = [ - ("slack", "0017_remove_message_text"), + ("slack", "0018_conversation_sync_messages"), ] operations = [ diff --git a/backend/apps/ai/migrations/__init__.py b/backend/apps/ai/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/apps/ai/models/__init__.py b/backend/apps/ai/models/__init__.py new file mode 100644 index 0000000000..05907f2861 --- /dev/null +++ b/backend/apps/ai/models/__init__.py @@ -0,0 +1 @@ +from .chunk import Chunk diff --git a/backend/apps/slack/models/chunk.py b/backend/apps/ai/models/chunk.py similarity index 100% rename from backend/apps/slack/models/chunk.py rename to backend/apps/ai/models/chunk.py diff --git a/backend/apps/slack/admin.py b/backend/apps/slack/admin.py index a131050db3..d0749a9f80 100644 --- a/backend/apps/slack/admin.py +++ b/backend/apps/slack/admin.py @@ -2,7 +2,6 @@ from django.contrib import admin, messages -from apps.slack.models.chunk import Chunk from apps.slack.models.conversation import Conversation from apps.slack.models.event import Event from apps.slack.models.member import Member @@ -10,18 +9,6 @@ from apps.slack.models.workspace import Workspace -class ChunkAdmin(admin.ModelAdmin): - list_display = ( - "id", - "message", - "chunk_text", - ) - search_fields = ( - "message__slack_message_id", - "chunk_text", - ) - - class ConversationAdmin(admin.ModelAdmin): list_display = ( "name", @@ -178,7 +165,6 @@ class WorkspaceAdmin(admin.ModelAdmin): ) -admin.site.register(Chunk, ChunkAdmin) admin.site.register(Conversation, ConversationAdmin) admin.site.register(Event, EventAdmin) admin.site.register(Member, MemberAdmin) diff --git a/backend/apps/slack/models/__init__.py b/backend/apps/slack/models/__init__.py index b66e6f5c40..3bbe0878de 100644 --- a/backend/apps/slack/models/__init__.py +++ b/backend/apps/slack/models/__init__.py @@ -1,4 +1,3 @@ -from .chunk import Chunk from .conversation import Conversation from .event import Event from .member import Member diff --git a/backend/settings/base.py b/backend/settings/base.py index b7dbac17ac..3851cbf660 100644 --- a/backend/settings/base.py +++ b/backend/settings/base.py @@ -43,6 +43,7 @@ class Base(Configuration): ) LOCAL_APPS = ( + "apps.ai", "apps.common", "apps.core", "apps.github", diff --git a/backend/tests/apps/ai/__init__.py b/backend/tests/apps/ai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/tests/apps/ai/management/commands/__init__.py b/backend/tests/apps/ai/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/tests/apps/ai/management/commands/slack_create_chunks_test.py b/backend/tests/apps/ai/management/commands/slack_create_chunks_test.py new file mode 100644 index 0000000000..83963a08e6 --- /dev/null +++ b/backend/tests/apps/ai/management/commands/slack_create_chunks_test.py @@ -0,0 +1,357 @@ +"""Tests for the slack_create_chunks management command.""" + +import os +from io import StringIO +from unittest.mock import Mock, patch + +import pytest +from django.core.management import call_command + +from apps.ai.management.commands.slack_create_chunks import Command +from apps.ai.models.chunk import Chunk +from apps.slack.models.message import Message + + +class TestSlackCreateChunksCommand: + """Test cases for the slack_create_chunks management command.""" + + @pytest.fixture + def command(self): + """Create a Command instance for testing.""" + return Command() + + @pytest.fixture + def mock_message(self): + """Create a mock message.""" + message = Mock(spec=Message) + message.slack_message_id = "1234567890.123456" + message.raw_data = {"text": "This is a test message content for chunking"} + return message + + @pytest.fixture + def mock_message_with_subtype(self): + """Create a mock message with join/leave subtype.""" + message = Mock(spec=Message) + message.slack_message_id = "1234567890.123457" + message.raw_data = {"text": "User joined channel", "subtype": "channel_join"} + return message + + @pytest.fixture + def mock_message_empty_text(self): + """Create a mock message with empty text.""" + message = Mock(spec=Message) + message.slack_message_id = "1234567890.123458" + message.raw_data = {"text": ""} + return message + + @pytest.fixture + def mock_openai_embedding_response(self): + """Create a mock OpenAI embedding response.""" + mock_data = [ + Mock(embedding=[0.1, 0.2, 0.3, 0.4, 0.5]), + Mock(embedding=[0.6, 0.7, 0.8, 0.9, 1.0]), + ] + mock_response = Mock() + mock_response.data = mock_data + return mock_response + + @pytest.fixture(autouse=True) + def mock_env_var(self): + """Mock the environment variable for OpenAI API key.""" + with patch.dict(os.environ, {"DJANGO_OPEN_AI_SECRET_KEY": "test-api-key"}): + yield + + def test_handle_no_openai_api_key(self, command): + """Test handle method when OpenAI API key is not set.""" + with patch.dict(os.environ, {}, clear=True): + stdout = StringIO() + command.stdout = stdout + + command.handle() + + output = stdout.getvalue() + assert "DJANGO_OPEN_AI_SECRET_KEY environment variable not set" in output + + @patch("builtins.print") + @patch("apps.slack.models.message.Message.objects") + @patch("openai.OpenAI") + def test_handle_success( + self, mock_openai_client, mock_message_objects, mock_print, command, mock_message + ): + """Test successful execution of handle method.""" + mock_message_objects.count.return_value = 1 + mock_message_objects.all.return_value.__getitem__.return_value = [mock_message] + + mock_client_instance = Mock() + mock_openai_client.return_value = mock_client_instance + + mock_embedding_response = Mock() + mock_embedding_response.data = [Mock(embedding=[0.1, 0.2, 0.3])] + mock_client_instance.embeddings.create.return_value = mock_embedding_response + + stdout = StringIO() + command.stdout = stdout + + with ( + patch.object(Chunk, "bulk_save") as mock_bulk_save, + patch.object(Chunk, "update_data", return_value=Mock()) as mock_update_data, + ): + command.handle() + + mock_print.assert_called_with("Found 1 messages to process") + output = stdout.getvalue() + assert "Completed processing all 1 messages" in output + mock_bulk_save.assert_called() + mock_update_data.assert_called() + + def test_create_chunks_from_message_with_join_subtype( + self, command, mock_message_with_subtype + ): + """Test create_chunks_from_message with channel_join subtype.""" + result = command.create_chunks_from_message(mock_message_with_subtype, "User joined") + + assert result == [] + + def test_create_chunks_from_message_with_leave_subtype(self, command): + """Test create_chunks_from_message with channel_leave subtype.""" + message = Mock(spec=Message) + message.slack_message_id = "1234567890.123459" + message.raw_data = {"text": "User left channel", "subtype": "channel_leave"} + + result = command.create_chunks_from_message(message, "User left") + + assert result == [] + + def test_create_chunks_from_message_empty_chunks(self, command, mock_message_empty_text): + """Test create_chunks_from_message when no chunks are created.""" + stdout = StringIO() + command.stdout = stdout + + with patch.object(command, "split_message_text", return_value=[]): + result = command.create_chunks_from_message(mock_message_empty_text, "") + + assert result == [] + output = stdout.getvalue() + assert "No chunks created for message 1234567890.123458 - text too short" in output + + def test_create_chunks_from_message_success( + self, command, mock_message, mock_openai_embedding_response + ): + """Test successful chunk creation from message.""" + mock_client = Mock() + mock_client.embeddings.create.return_value = mock_openai_embedding_response + command.openai_client = mock_client + + mock_chunk = Mock(spec=Chunk) + + with ( + patch.object(command, "split_message_text", return_value=["chunk1", "chunk2"]), + patch.object(Chunk, "update_data", return_value=mock_chunk) as mock_update_data, + ): + result = command.create_chunks_from_message(mock_message, "test message") + + assert len(result) == 2 + assert all(chunk == mock_chunk for chunk in result) + assert mock_update_data.call_count == 2 + mock_client.embeddings.create.assert_called_once_with( + model="text-embedding-3-small", input=["chunk1", "chunk2"] + ) + + for call in mock_update_data.call_args_list: + assert call.kwargs.get("save") is False + + @patch("langchain.text_splitter.RecursiveCharacterTextSplitter") + def test_split_message_text(self, mock_splitter_class, command): + """Test message text splitting functionality.""" + mock_splitter = Mock() + mock_splitter.split_text.return_value = ["chunk1", "chunk2", "chunk3"] + mock_splitter_class.return_value = mock_splitter + + long_text = "This is a long message that should be split into multiple chunks. " * 10 + + result = command.split_message_text(long_text) + + mock_splitter_class.assert_called_once_with( + chunk_size=300, + chunk_overlap=40, + length_function=len, + separators=["\n\n", "\n", " ", ""], + ) + mock_splitter.split_text.assert_called_once_with(long_text) + assert result == ["chunk1", "chunk2", "chunk3"] + + @patch("langchain.text_splitter.RecursiveCharacterTextSplitter") + def test_split_message_text_short(self, mock_splitter_class, command): + """Test message text splitting with short text.""" + mock_splitter = Mock() + mock_splitter.split_text.return_value = ["Short message"] + mock_splitter_class.return_value = mock_splitter + + short_text = "Short message" + + result = command.split_message_text(short_text) + + assert result == ["Short message"] + mock_splitter.split_text.assert_called_once_with(short_text) + + @patch("langchain.text_splitter.RecursiveCharacterTextSplitter") + def test_split_message_text_empty(self, mock_splitter_class, command): + """Test message text splitting with empty text.""" + mock_splitter = Mock() + mock_splitter.split_text.return_value = [] + mock_splitter_class.return_value = mock_splitter + + result = command.split_message_text("") + + assert result == [] + + @pytest.mark.parametrize( + ("input_text", "expected_patterns"), + [ + ("Hello world", "Hello world"), + ("Hello <@U123456> world", "Hello world"), + ("Check this link ", "Check this link "), + ("This is :smile: awesome :thumbsup:", "This is awesome "), + ("Multiple spaces", "Multiple spaces"), + ("", ""), + (" spaces around ", "spaces around"), + ], + ) + def test_clean_message_text(self, command, input_text, expected_patterns): + """Test message text cleaning functionality.""" + result = command.clean_message_text(input_text) + + assert result == expected_patterns + + def test_clean_message_text_with_emojis(self, command): + """Test message text cleaning with emoji characters.""" + text_with_emojis = "Hello 😀😃😄 world 🌍🌎🌏" + + result = command.clean_message_text(text_with_emojis) + + assert "😀" not in result + assert "😃" not in result + assert "🌍" not in result + assert "Hello" in result + assert "world" in result + + def test_clean_message_text_complex_patterns(self, command): + """Test message text cleaning with complex patterns.""" + complex_text = ( + "Hey <@U123456>! Check out :rocket: Amazing!" + ) + + result = command.clean_message_text(complex_text) + + assert "<@U123456>" not in result + assert "" not in result + assert ":rocket:" not in result + assert "Hey" in result + assert "Amazing" in result + + @patch("builtins.print") + @patch("apps.slack.models.message.Message.objects") + @patch("openai.OpenAI") + def test_handle_batch_processing( + self, mock_openai_client, mock_message_objects, mock_print, command + ): + """Test that messages are processed in batches.""" + mock_message_objects.count.return_value = 2500 + + mock_messages_batch1 = [Mock(spec=Message) for _ in range(1000)] + mock_messages_batch2 = [Mock(spec=Message) for _ in range(1000)] + mock_messages_batch3 = [Mock(spec=Message) for _ in range(500)] + + def mock_getitem(slice_obj): + start = slice_obj.start or 0 + stop = slice_obj.stop or 2500 + if start == 0 and stop == 1000: + return mock_messages_batch1 + if start == 1000 and stop == 2000: + return mock_messages_batch2 + if start == 2000 and stop == 2500: + return mock_messages_batch3 + return [] + + mock_message_objects.all.return_value.__getitem__ = mock_getitem + + for i, message in enumerate( + mock_messages_batch1 + mock_messages_batch2 + mock_messages_batch3 + ): + message.slack_message_id = f"msg_{i}" + message.raw_data = {"text": f"Message {i}"} + + mock_client_instance = Mock() + mock_openai_client.return_value = mock_client_instance + + mock_embedding_response = Mock() + mock_embedding_response.data = [] + mock_client_instance.embeddings.create.return_value = mock_embedding_response + + stdout = StringIO() + command.stdout = stdout + + with ( + patch.object(Chunk, "bulk_save") as mock_bulk_save, + patch.object(command, "create_chunks_from_message", return_value=[]), + ): + command.handle() + + mock_print.assert_called_with("Found 2500 messages to process") + output = stdout.getvalue() + assert "Completed processing all 2500 messages" in output + + assert mock_bulk_save.call_count == 0 + + @patch("django.core.management.call_command") + def test_call_command_integration(self, mock_call_command): + """Test that the command can be called via call_command.""" + call_command("slack_create_chunks") + + mock_call_command.assert_called_once_with("slack_create_chunks") + + def test_command_help_text(self, command): + """Test that the command has appropriate help text.""" + assert command.help == "Create chunks for Slack messages" + + @patch("builtins.print") + @patch("apps.slack.models.message.Message.objects") + @patch("openai.OpenAI") + def test_handle_with_no_messages( + self, mock_openai_client, mock_message_objects, mock_print, command + ): + """Test handle method when there are no messages.""" + mock_message_objects.count.return_value = 0 + + stdout = StringIO() + command.stdout = stdout + + command.handle() + + mock_print.assert_called_with("Found 0 messages to process") + output = stdout.getvalue() + assert "Completed processing all 0 messages" in output + + def test_create_chunks_from_message_with_none_subtype(self, command, mock_message): + """Test create_chunks_from_message with None subtype.""" + mock_message.raw_data = {"text": "Regular message"} + + mock_client = Mock() + mock_embedding_response = Mock() + mock_embedding_response.data = [Mock(embedding=[0.1, 0.2, 0.3])] + mock_client.embeddings.create.return_value = mock_embedding_response + command.openai_client = mock_client + + mock_chunk = Mock(spec=Chunk) + + with ( + patch.object(command, "split_message_text", return_value=["chunk1"]), + patch.object(Chunk, "update_data", return_value=mock_chunk) as mock_update_data, + ): + result = command.create_chunks_from_message(mock_message, "Regular message") + + assert len(result) == 1 + assert result[0] == mock_chunk + + mock_update_data.assert_called_once() + assert mock_update_data.call_args.kwargs.get("save") is False diff --git a/backend/tests/apps/ai/models/__init__.py b/backend/tests/apps/ai/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/tests/slack/models/chunk_test.py b/backend/tests/apps/ai/models/chunk_test.py similarity index 94% rename from backend/tests/slack/models/chunk_test.py rename to backend/tests/apps/ai/models/chunk_test.py index 8304417124..60e0e36d16 100644 --- a/backend/tests/slack/models/chunk_test.py +++ b/backend/tests/apps/ai/models/chunk_test.py @@ -2,7 +2,7 @@ from django.db import models -from apps.slack.models.chunk import Chunk +from apps.ai.models.chunk import Chunk from apps.slack.models.message import Message @@ -99,11 +99,11 @@ def test_update_data_new_chunk(self, mocker): embedding = [0.1, 0.2, 0.3] mocker.patch( - "apps.slack.models.chunk.Chunk.objects.filter", + "apps.ai.models.chunk.Chunk.objects.filter", return_value=Mock(exists=Mock(return_value=False)), ) - patched_save = mocker.patch("apps.slack.models.chunk.Chunk.save") + patched_save = mocker.patch("apps.ai.models.chunk.Chunk.save") with patch.object(Chunk, "message", create=True): result = Chunk.update_data( @@ -124,7 +124,7 @@ def test_update_data_existing_chunk(self, mocker): embedding = [0.1, 0.2, 0.3] mocker.patch( - "apps.slack.models.chunk.Chunk.objects.filter", + "apps.ai.models.chunk.Chunk.objects.filter", return_value=Mock(exists=Mock(return_value=True)), ) @@ -141,11 +141,11 @@ def test_update_data_no_save(self, mocker): embedding = [0.1, 0.2, 0.3] mocker.patch( - "apps.slack.models.chunk.Chunk.objects.filter", + "apps.ai.models.chunk.Chunk.objects.filter", return_value=Mock(exists=Mock(return_value=False)), ) - patched_save = mocker.patch("apps.slack.models.chunk.Chunk.save") + patched_save = mocker.patch("apps.ai.models.chunk.Chunk.save") with patch.object(Chunk, "message", create=True): result = Chunk.update_data( From 1e55c56cafac05d66ce0f78873e3b8dc3cb72d19 Mon Sep 17 00:00:00 2001 From: Dishant1804 Date: Sun, 22 Jun 2025 22:56:26 +0530 Subject: [PATCH 3/8] spelling and fixes --- .../commands/slack_create_chunks_test.py | 357 ------------------ backend/tests/apps/ai/models/chunk_test.py | 19 - 2 files changed, 376 deletions(-) delete mode 100644 backend/tests/apps/ai/management/commands/slack_create_chunks_test.py diff --git a/backend/tests/apps/ai/management/commands/slack_create_chunks_test.py b/backend/tests/apps/ai/management/commands/slack_create_chunks_test.py deleted file mode 100644 index 83963a08e6..0000000000 --- a/backend/tests/apps/ai/management/commands/slack_create_chunks_test.py +++ /dev/null @@ -1,357 +0,0 @@ -"""Tests for the slack_create_chunks management command.""" - -import os -from io import StringIO -from unittest.mock import Mock, patch - -import pytest -from django.core.management import call_command - -from apps.ai.management.commands.slack_create_chunks import Command -from apps.ai.models.chunk import Chunk -from apps.slack.models.message import Message - - -class TestSlackCreateChunksCommand: - """Test cases for the slack_create_chunks management command.""" - - @pytest.fixture - def command(self): - """Create a Command instance for testing.""" - return Command() - - @pytest.fixture - def mock_message(self): - """Create a mock message.""" - message = Mock(spec=Message) - message.slack_message_id = "1234567890.123456" - message.raw_data = {"text": "This is a test message content for chunking"} - return message - - @pytest.fixture - def mock_message_with_subtype(self): - """Create a mock message with join/leave subtype.""" - message = Mock(spec=Message) - message.slack_message_id = "1234567890.123457" - message.raw_data = {"text": "User joined channel", "subtype": "channel_join"} - return message - - @pytest.fixture - def mock_message_empty_text(self): - """Create a mock message with empty text.""" - message = Mock(spec=Message) - message.slack_message_id = "1234567890.123458" - message.raw_data = {"text": ""} - return message - - @pytest.fixture - def mock_openai_embedding_response(self): - """Create a mock OpenAI embedding response.""" - mock_data = [ - Mock(embedding=[0.1, 0.2, 0.3, 0.4, 0.5]), - Mock(embedding=[0.6, 0.7, 0.8, 0.9, 1.0]), - ] - mock_response = Mock() - mock_response.data = mock_data - return mock_response - - @pytest.fixture(autouse=True) - def mock_env_var(self): - """Mock the environment variable for OpenAI API key.""" - with patch.dict(os.environ, {"DJANGO_OPEN_AI_SECRET_KEY": "test-api-key"}): - yield - - def test_handle_no_openai_api_key(self, command): - """Test handle method when OpenAI API key is not set.""" - with patch.dict(os.environ, {}, clear=True): - stdout = StringIO() - command.stdout = stdout - - command.handle() - - output = stdout.getvalue() - assert "DJANGO_OPEN_AI_SECRET_KEY environment variable not set" in output - - @patch("builtins.print") - @patch("apps.slack.models.message.Message.objects") - @patch("openai.OpenAI") - def test_handle_success( - self, mock_openai_client, mock_message_objects, mock_print, command, mock_message - ): - """Test successful execution of handle method.""" - mock_message_objects.count.return_value = 1 - mock_message_objects.all.return_value.__getitem__.return_value = [mock_message] - - mock_client_instance = Mock() - mock_openai_client.return_value = mock_client_instance - - mock_embedding_response = Mock() - mock_embedding_response.data = [Mock(embedding=[0.1, 0.2, 0.3])] - mock_client_instance.embeddings.create.return_value = mock_embedding_response - - stdout = StringIO() - command.stdout = stdout - - with ( - patch.object(Chunk, "bulk_save") as mock_bulk_save, - patch.object(Chunk, "update_data", return_value=Mock()) as mock_update_data, - ): - command.handle() - - mock_print.assert_called_with("Found 1 messages to process") - output = stdout.getvalue() - assert "Completed processing all 1 messages" in output - mock_bulk_save.assert_called() - mock_update_data.assert_called() - - def test_create_chunks_from_message_with_join_subtype( - self, command, mock_message_with_subtype - ): - """Test create_chunks_from_message with channel_join subtype.""" - result = command.create_chunks_from_message(mock_message_with_subtype, "User joined") - - assert result == [] - - def test_create_chunks_from_message_with_leave_subtype(self, command): - """Test create_chunks_from_message with channel_leave subtype.""" - message = Mock(spec=Message) - message.slack_message_id = "1234567890.123459" - message.raw_data = {"text": "User left channel", "subtype": "channel_leave"} - - result = command.create_chunks_from_message(message, "User left") - - assert result == [] - - def test_create_chunks_from_message_empty_chunks(self, command, mock_message_empty_text): - """Test create_chunks_from_message when no chunks are created.""" - stdout = StringIO() - command.stdout = stdout - - with patch.object(command, "split_message_text", return_value=[]): - result = command.create_chunks_from_message(mock_message_empty_text, "") - - assert result == [] - output = stdout.getvalue() - assert "No chunks created for message 1234567890.123458 - text too short" in output - - def test_create_chunks_from_message_success( - self, command, mock_message, mock_openai_embedding_response - ): - """Test successful chunk creation from message.""" - mock_client = Mock() - mock_client.embeddings.create.return_value = mock_openai_embedding_response - command.openai_client = mock_client - - mock_chunk = Mock(spec=Chunk) - - with ( - patch.object(command, "split_message_text", return_value=["chunk1", "chunk2"]), - patch.object(Chunk, "update_data", return_value=mock_chunk) as mock_update_data, - ): - result = command.create_chunks_from_message(mock_message, "test message") - - assert len(result) == 2 - assert all(chunk == mock_chunk for chunk in result) - assert mock_update_data.call_count == 2 - mock_client.embeddings.create.assert_called_once_with( - model="text-embedding-3-small", input=["chunk1", "chunk2"] - ) - - for call in mock_update_data.call_args_list: - assert call.kwargs.get("save") is False - - @patch("langchain.text_splitter.RecursiveCharacterTextSplitter") - def test_split_message_text(self, mock_splitter_class, command): - """Test message text splitting functionality.""" - mock_splitter = Mock() - mock_splitter.split_text.return_value = ["chunk1", "chunk2", "chunk3"] - mock_splitter_class.return_value = mock_splitter - - long_text = "This is a long message that should be split into multiple chunks. " * 10 - - result = command.split_message_text(long_text) - - mock_splitter_class.assert_called_once_with( - chunk_size=300, - chunk_overlap=40, - length_function=len, - separators=["\n\n", "\n", " ", ""], - ) - mock_splitter.split_text.assert_called_once_with(long_text) - assert result == ["chunk1", "chunk2", "chunk3"] - - @patch("langchain.text_splitter.RecursiveCharacterTextSplitter") - def test_split_message_text_short(self, mock_splitter_class, command): - """Test message text splitting with short text.""" - mock_splitter = Mock() - mock_splitter.split_text.return_value = ["Short message"] - mock_splitter_class.return_value = mock_splitter - - short_text = "Short message" - - result = command.split_message_text(short_text) - - assert result == ["Short message"] - mock_splitter.split_text.assert_called_once_with(short_text) - - @patch("langchain.text_splitter.RecursiveCharacterTextSplitter") - def test_split_message_text_empty(self, mock_splitter_class, command): - """Test message text splitting with empty text.""" - mock_splitter = Mock() - mock_splitter.split_text.return_value = [] - mock_splitter_class.return_value = mock_splitter - - result = command.split_message_text("") - - assert result == [] - - @pytest.mark.parametrize( - ("input_text", "expected_patterns"), - [ - ("Hello world", "Hello world"), - ("Hello <@U123456> world", "Hello world"), - ("Check this link ", "Check this link "), - ("This is :smile: awesome :thumbsup:", "This is awesome "), - ("Multiple spaces", "Multiple spaces"), - ("", ""), - (" spaces around ", "spaces around"), - ], - ) - def test_clean_message_text(self, command, input_text, expected_patterns): - """Test message text cleaning functionality.""" - result = command.clean_message_text(input_text) - - assert result == expected_patterns - - def test_clean_message_text_with_emojis(self, command): - """Test message text cleaning with emoji characters.""" - text_with_emojis = "Hello 😀😃😄 world 🌍🌎🌏" - - result = command.clean_message_text(text_with_emojis) - - assert "😀" not in result - assert "😃" not in result - assert "🌍" not in result - assert "Hello" in result - assert "world" in result - - def test_clean_message_text_complex_patterns(self, command): - """Test message text cleaning with complex patterns.""" - complex_text = ( - "Hey <@U123456>! Check out :rocket: Amazing!" - ) - - result = command.clean_message_text(complex_text) - - assert "<@U123456>" not in result - assert "" not in result - assert ":rocket:" not in result - assert "Hey" in result - assert "Amazing" in result - - @patch("builtins.print") - @patch("apps.slack.models.message.Message.objects") - @patch("openai.OpenAI") - def test_handle_batch_processing( - self, mock_openai_client, mock_message_objects, mock_print, command - ): - """Test that messages are processed in batches.""" - mock_message_objects.count.return_value = 2500 - - mock_messages_batch1 = [Mock(spec=Message) for _ in range(1000)] - mock_messages_batch2 = [Mock(spec=Message) for _ in range(1000)] - mock_messages_batch3 = [Mock(spec=Message) for _ in range(500)] - - def mock_getitem(slice_obj): - start = slice_obj.start or 0 - stop = slice_obj.stop or 2500 - if start == 0 and stop == 1000: - return mock_messages_batch1 - if start == 1000 and stop == 2000: - return mock_messages_batch2 - if start == 2000 and stop == 2500: - return mock_messages_batch3 - return [] - - mock_message_objects.all.return_value.__getitem__ = mock_getitem - - for i, message in enumerate( - mock_messages_batch1 + mock_messages_batch2 + mock_messages_batch3 - ): - message.slack_message_id = f"msg_{i}" - message.raw_data = {"text": f"Message {i}"} - - mock_client_instance = Mock() - mock_openai_client.return_value = mock_client_instance - - mock_embedding_response = Mock() - mock_embedding_response.data = [] - mock_client_instance.embeddings.create.return_value = mock_embedding_response - - stdout = StringIO() - command.stdout = stdout - - with ( - patch.object(Chunk, "bulk_save") as mock_bulk_save, - patch.object(command, "create_chunks_from_message", return_value=[]), - ): - command.handle() - - mock_print.assert_called_with("Found 2500 messages to process") - output = stdout.getvalue() - assert "Completed processing all 2500 messages" in output - - assert mock_bulk_save.call_count == 0 - - @patch("django.core.management.call_command") - def test_call_command_integration(self, mock_call_command): - """Test that the command can be called via call_command.""" - call_command("slack_create_chunks") - - mock_call_command.assert_called_once_with("slack_create_chunks") - - def test_command_help_text(self, command): - """Test that the command has appropriate help text.""" - assert command.help == "Create chunks for Slack messages" - - @patch("builtins.print") - @patch("apps.slack.models.message.Message.objects") - @patch("openai.OpenAI") - def test_handle_with_no_messages( - self, mock_openai_client, mock_message_objects, mock_print, command - ): - """Test handle method when there are no messages.""" - mock_message_objects.count.return_value = 0 - - stdout = StringIO() - command.stdout = stdout - - command.handle() - - mock_print.assert_called_with("Found 0 messages to process") - output = stdout.getvalue() - assert "Completed processing all 0 messages" in output - - def test_create_chunks_from_message_with_none_subtype(self, command, mock_message): - """Test create_chunks_from_message with None subtype.""" - mock_message.raw_data = {"text": "Regular message"} - - mock_client = Mock() - mock_embedding_response = Mock() - mock_embedding_response.data = [Mock(embedding=[0.1, 0.2, 0.3])] - mock_client.embeddings.create.return_value = mock_embedding_response - command.openai_client = mock_client - - mock_chunk = Mock(spec=Chunk) - - with ( - patch.object(command, "split_message_text", return_value=["chunk1"]), - patch.object(Chunk, "update_data", return_value=mock_chunk) as mock_update_data, - ): - result = command.create_chunks_from_message(mock_message, "Regular message") - - assert len(result) == 1 - assert result[0] == mock_chunk - - mock_update_data.assert_called_once() - assert mock_update_data.call_args.kwargs.get("save") is False diff --git a/backend/tests/apps/ai/models/chunk_test.py b/backend/tests/apps/ai/models/chunk_test.py index 60e0e36d16..2b9c3c052e 100644 --- a/backend/tests/apps/ai/models/chunk_test.py +++ b/backend/tests/apps/ai/models/chunk_test.py @@ -159,25 +159,6 @@ def test_update_data_no_save(self, mocker): assert result.embedding == embedding patched_save.assert_not_called() - def test_update_data_with_keyword_save_parameter(self, mocker): - """Test update_data method with keyword-only save parameter.""" - mock_message = create_model_mock(Message) - chunk_text = "Test chunk content" - embedding = [0.1, 0.2, 0.3] - - mocker.patch( - "apps.slack.models.chunk.Chunk.objects.filter", - return_value=Mock(exists=Mock(return_value=False)), - ) - - patched_save = mocker.patch("apps.slack.models.chunk.Chunk.save") - - with patch.object(Chunk, "message", create=True): - result = Chunk.update_data(chunk_text, mock_message, embedding, save=True) - - assert result is not None - patched_save.assert_called_once() - def test_meta_class_attributes(self): """Test the Meta class attributes of the Chunk model.""" assert Chunk._meta.db_table == "slack_chunks" From cbb255fee0f924e9ec2ce868e9c68ee2e7ae2b23 Mon Sep 17 00:00:00 2001 From: Dishant1804 Date: Sun, 22 Jun 2025 23:37:52 +0530 Subject: [PATCH 4/8] code rabbit suggestions --- .../commands/slack_create_chunks.py | 35 ++--- .../commands/slack_sync_messages.py | 2 +- backend/poetry.lock | 127 ++++++++++-------- backend/pyproject.toml | 1 + cspell/custom-dict.txt | 1 + 5 files changed, 85 insertions(+), 81 deletions(-) diff --git a/backend/apps/ai/management/commands/slack_create_chunks.py b/backend/apps/ai/management/commands/slack_create_chunks.py index 86f30f4a11..718af6f33b 100644 --- a/backend/apps/ai/management/commands/slack_create_chunks.py +++ b/backend/apps/ai/management/commands/slack_create_chunks.py @@ -2,7 +2,10 @@ import os import re +import time +from datetime import UTC, datetime, timedelta +import emoji import openai from django.core.management.base import BaseCommand from langchain.text_splitter import RecursiveCharacterTextSplitter @@ -63,9 +66,16 @@ def create_chunks_from_message( return [] try: + last_request_time = datetime.now(UTC) + time_since_last_request = datetime.now(UTC) - last_request_time + + if time_since_last_request < timedelta(seconds=1.2): + time.sleep(1.2 - time_since_last_request.total_seconds()) + response = self.openai_client.embeddings.create( model="text-embedding-3-small", input=chunk_texts ) + last_request_time = datetime.now(UTC) embeddings = [d.embedding for d in response.data] return [ Chunk.update_data( @@ -97,33 +107,10 @@ def clean_message_text(self, message_text: str) -> str: if not message_text: return "" - emoji_pattern = re.compile( - "[" - "\U0001f600-\U0001f64f" - "\U0001f300-\U0001f5ff" - "\U0001f680-\U0001f6ff" - "\U0001f1e0-\U0001f1ff" - "\U00002500-\U00002bef" - "\U00002702-\U000027b0" - "\U000024c2-\U0001f251" - "\U0001f926-\U0001f937" - "\U00010000-\U0010ffff" - "\u2640-\u2642" - "\u2600-\u2b55" - "\u200d" - "\u23cf" - "\u23e9" - "\u231a" - "\ufe0f" - "\u3030" - "]+", - flags=re.UNICODE, - ) - + cleaned_text = emoji.demojize(message_text, delimiters=("", "")) cleaned_text = re.sub(r"<@U[A-Z0-9]+>", "", message_text) cleaned_text = re.sub(r"]+>", "", cleaned_text) cleaned_text = re.sub(r":\w+:", "", cleaned_text) - cleaned_text = emoji_pattern.sub("", cleaned_text) cleaned_text = re.sub(r"\s+", " ", cleaned_text) return cleaned_text.strip() diff --git a/backend/apps/slack/management/commands/slack_sync_messages.py b/backend/apps/slack/management/commands/slack_sync_messages.py index be0820fe4f..473391e280 100644 --- a/backend/apps/slack/management/commands/slack_sync_messages.py +++ b/backend/apps/slack/management/commands/slack_sync_messages.py @@ -64,7 +64,7 @@ def handle(self, *args, **options): conversations = ( Conversation.objects.filter(slack_channel_id=channel_id) if channel_id - else Conversation.objects.filter(sync_messages=True, workspace=workspace) + else Conversation.objects.filter(workspace=workspace) ) for conversation in conversations: diff --git a/backend/poetry.lock b/backend/poetry.lock index 6d8bf246d4..639f42f240 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -930,6 +930,21 @@ files = [ [package.extras] dev = ["mypy (>=1.15)"] +[[package]] +name = "emoji" +version = "2.14.1" +description = "Emoji for Python" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "emoji-2.14.1-py3-none-any.whl", hash = "sha256:35a8a486c1460addb1499e3bf7929d3889b2e2841a57401903699fef595e942b"}, + {file = "emoji-2.14.1.tar.gz", hash = "sha256:f8c50043d79a2c1410ebfae833ae1868d5941a67a6cd4d18377e2eb0bd79346b"}, +] + +[package.extras] +dev = ["coverage", "pytest (>=7.4.4)"] + [[package]] name = "execnet" version = "2.1.1" @@ -2049,63 +2064,63 @@ files = [ [[package]] name = "numpy" -version = "2.3.0" +version = "2.3.1" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.11" groups = ["main"] files = [ - {file = "numpy-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c3c9fdde0fa18afa1099d6257eb82890ea4f3102847e692193b54e00312a9ae9"}, - {file = "numpy-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46d16f72c2192da7b83984aa5455baee640e33a9f1e61e656f29adf55e406c2b"}, - {file = "numpy-2.3.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a0be278be9307c4ab06b788f2a077f05e180aea817b3e41cebbd5aaf7bd85ed3"}, - {file = "numpy-2.3.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:99224862d1412d2562248d4710126355d3a8db7672170a39d6909ac47687a8a4"}, - {file = "numpy-2.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2393a914db64b0ead0ab80c962e42d09d5f385802006a6c87835acb1f58adb96"}, - {file = "numpy-2.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:7729c8008d55e80784bd113787ce876ca117185c579c0d626f59b87d433ea779"}, - {file = "numpy-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:06d4fb37a8d383b769281714897420c5cc3545c79dc427df57fc9b852ee0bf58"}, - {file = "numpy-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c39ec392b5db5088259c68250e342612db82dc80ce044cf16496cf14cf6bc6f8"}, - {file = "numpy-2.3.0-cp311-cp311-win32.whl", hash = "sha256:ee9d3ee70d62827bc91f3ea5eee33153212c41f639918550ac0475e3588da59f"}, - {file = "numpy-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:43c55b6a860b0eb44d42341438b03513cf3879cb3617afb749ad49307e164edd"}, - {file = "numpy-2.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:2e6a1409eee0cb0316cb64640a49a49ca44deb1a537e6b1121dc7c458a1299a8"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:389b85335838155a9076e9ad7f8fdba0827496ec2d2dc32ce69ce7898bde03ba"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9498f60cd6bb8238d8eaf468a3d5bb031d34cd12556af53510f05fcf581c1b7e"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:622a65d40d8eb427d8e722fd410ac3ad4958002f109230bc714fa551044ebae2"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b9446d9d8505aadadb686d51d838f2b6688c9e85636a0c3abaeb55ed54756459"}, - {file = "numpy-2.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:50080245365d75137a2bf46151e975de63146ae6d79f7e6bd5c0e85c9931d06a"}, - {file = "numpy-2.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c24bb4113c66936eeaa0dc1e47c74770453d34f46ee07ae4efd853a2ed1ad10a"}, - {file = "numpy-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4d8d294287fdf685281e671886c6dcdf0291a7c19db3e5cb4178d07ccf6ecc67"}, - {file = "numpy-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6295f81f093b7f5769d1728a6bd8bf7466de2adfa771ede944ce6711382b89dc"}, - {file = "numpy-2.3.0-cp312-cp312-win32.whl", hash = "sha256:e6648078bdd974ef5d15cecc31b0c410e2e24178a6e10bf511e0557eed0f2570"}, - {file = "numpy-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:0898c67a58cdaaf29994bc0e2c65230fd4de0ac40afaf1584ed0b02cd74c6fdd"}, - {file = "numpy-2.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:bd8df082b6c4695753ad6193018c05aac465d634834dca47a3ae06d4bb22d9ea"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5754ab5595bfa2c2387d241296e0381c21f44a4b90a776c3c1d39eede13a746a"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d11fa02f77752d8099573d64e5fe33de3229b6632036ec08f7080f46b6649959"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:aba48d17e87688a765ab1cd557882052f238e2f36545dfa8e29e6a91aef77afe"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4dc58865623023b63b10d52f18abaac3729346a7a46a778381e0e3af4b7f3beb"}, - {file = "numpy-2.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:df470d376f54e052c76517393fa443758fefcdd634645bc9c1f84eafc67087f0"}, - {file = "numpy-2.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:87717eb24d4a8a64683b7a4e91ace04e2f5c7c77872f823f02a94feee186168f"}, - {file = "numpy-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d8fa264d56882b59dcb5ea4d6ab6f31d0c58a57b41aec605848b6eb2ef4a43e8"}, - {file = "numpy-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e651756066a0eaf900916497e20e02fe1ae544187cb0fe88de981671ee7f6270"}, - {file = "numpy-2.3.0-cp313-cp313-win32.whl", hash = "sha256:e43c3cce3b6ae5f94696669ff2a6eafd9a6b9332008bafa4117af70f4b88be6f"}, - {file = "numpy-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:81ae0bf2564cf475f94be4a27ef7bcf8af0c3e28da46770fc904da9abd5279b5"}, - {file = "numpy-2.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:c8738baa52505fa6e82778580b23f945e3578412554d937093eac9205e845e6e"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:39b27d8b38942a647f048b675f134dd5a567f95bfff481f9109ec308515c51d8"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0eba4a1ea88f9a6f30f56fdafdeb8da3774349eacddab9581a21234b8535d3d3"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:b0f1f11d0a1da54927436505a5a7670b154eac27f5672afc389661013dfe3d4f"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:690d0a5b60a47e1f9dcec7b77750a4854c0d690e9058b7bef3106e3ae9117808"}, - {file = "numpy-2.3.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:8b51ead2b258284458e570942137155978583e407babc22e3d0ed7af33ce06f8"}, - {file = "numpy-2.3.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:aaf81c7b82c73bd9b45e79cfb9476cb9c29e937494bfe9092c26aece812818ad"}, - {file = "numpy-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f420033a20b4f6a2a11f585f93c843ac40686a7c3fa514060a97d9de93e5e72b"}, - {file = "numpy-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d344ca32ab482bcf8735d8f95091ad081f97120546f3d250240868430ce52555"}, - {file = "numpy-2.3.0-cp313-cp313t-win32.whl", hash = "sha256:48a2e8eaf76364c32a1feaa60d6925eaf32ed7a040183b807e02674305beef61"}, - {file = "numpy-2.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ba17f93a94e503551f154de210e4d50c5e3ee20f7e7a1b5f6ce3f22d419b93bb"}, - {file = "numpy-2.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f14e016d9409680959691c109be98c436c6249eaf7f118b424679793607b5944"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:80b46117c7359de8167cc00a2c7d823bdd505e8c7727ae0871025a86d668283b"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:5814a0f43e70c061f47abd5857d120179609ddc32a613138cbb6c4e9e2dbdda5"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:ef6c1e88fd6b81ac6d215ed71dc8cd027e54d4bf1d2682d362449097156267a2"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33a5a12a45bb82d9997e2c0b12adae97507ad7c347546190a18ff14c28bbca12"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:54dfc8681c1906d239e95ab1508d0a533c4a9505e52ee2d71a5472b04437ef97"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:e017a8a251ff4d18d71f139e28bdc7c31edba7a507f72b1414ed902cbe48c74d"}, - {file = "numpy-2.3.0.tar.gz", hash = "sha256:581f87f9e9e9db2cba2141400e160e9dd644ee248788d6f90636eeb8fd9260a6"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ea9e48336a402551f52cd8f593343699003d2353daa4b72ce8d34f66b722070"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ccb7336eaf0e77c1635b232c141846493a588ec9ea777a7c24d7166bb8533ae"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0bb3a4a61e1d327e035275d2a993c96fa786e4913aa089843e6a2d9dd205c66a"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:e344eb79dab01f1e838ebb67aab09965fb271d6da6b00adda26328ac27d4a66e"}, + {file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:467db865b392168ceb1ef1ffa6f5a86e62468c43e0cfb4ab6da667ede10e58db"}, + {file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:afed2ce4a84f6b0fc6c1ce734ff368cbf5a5e24e8954a338f3bdffa0718adffb"}, + {file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0025048b3c1557a20bc80d06fdeb8cc7fc193721484cca82b2cfa072fec71a93"}, + {file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a5ee121b60aa509679b682819c602579e1df14a5b07fe95671c8849aad8f2115"}, + {file = "numpy-2.3.1-cp311-cp311-win32.whl", hash = "sha256:a8b740f5579ae4585831b3cf0e3b0425c667274f82a484866d2adf9570539369"}, + {file = "numpy-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:d4580adadc53311b163444f877e0789f1c8861e2698f6b2a4ca852fda154f3ff"}, + {file = "numpy-2.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:ec0bdafa906f95adc9a0c6f26a4871fa753f25caaa0e032578a30457bff0af6a"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2959d8f268f3d8ee402b04a9ec4bb7604555aeacf78b360dc4ec27f1d508177d"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:762e0c0c6b56bdedfef9a8e1d4538556438288c4276901ea008ae44091954e29"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:867ef172a0976aaa1f1d1b63cf2090de8b636a7674607d514505fb7276ab08fc"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:4e602e1b8682c2b833af89ba641ad4176053aaa50f5cacda1a27004352dde943"}, + {file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8e333040d069eba1652fb08962ec5b76af7f2c7bce1df7e1418c8055cf776f25"}, + {file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e7cbf5a5eafd8d230a3ce356d892512185230e4781a361229bd902ff403bc660"}, + {file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f1b8f26d1086835f442286c1d9b64bb3974b0b1e41bb105358fd07d20872952"}, + {file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ee8340cb48c9b7a5899d1149eece41ca535513a9698098edbade2a8e7a84da77"}, + {file = "numpy-2.3.1-cp312-cp312-win32.whl", hash = "sha256:e772dda20a6002ef7061713dc1e2585bc1b534e7909b2030b5a46dae8ff077ab"}, + {file = "numpy-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cfecc7822543abdea6de08758091da655ea2210b8ffa1faf116b940693d3df76"}, + {file = "numpy-2.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:7be91b2239af2658653c5bb6f1b8bccafaf08226a258caf78ce44710a0160d30"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25a1992b0a3fdcdaec9f552ef10d8103186f5397ab45e2d25f8ac51b1a6b97e8"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dea630156d39b02a63c18f508f85010230409db5b2927ba59c8ba4ab3e8272e"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bada6058dd886061f10ea15f230ccf7dfff40572e99fef440a4a857c8728c9c0"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:a894f3816eb17b29e4783e5873f92faf55b710c2519e5c351767c51f79d8526d"}, + {file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:18703df6c4a4fee55fd3d6e5a253d01c5d33a295409b03fda0c86b3ca2ff41a1"}, + {file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5902660491bd7a48b2ec16c23ccb9124b8abfd9583c5fdfa123fe6b421e03de1"}, + {file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:36890eb9e9d2081137bd78d29050ba63b8dab95dff7912eadf1185e80074b2a0"}, + {file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a780033466159c2270531e2b8ac063704592a0bc62ec4a1b991c7c40705eb0e8"}, + {file = "numpy-2.3.1-cp313-cp313-win32.whl", hash = "sha256:39bff12c076812595c3a306f22bfe49919c5513aa1e0e70fac756a0be7c2a2b8"}, + {file = "numpy-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d5ee6eec45f08ce507a6570e06f2f879b374a552087a4179ea7838edbcbfa42"}, + {file = "numpy-2.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:0c4d9e0a8368db90f93bd192bfa771ace63137c3488d198ee21dfb8e7771916e"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:b0b5397374f32ec0649dd98c652a1798192042e715df918c20672c62fb52d4b8"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c5bdf2015ccfcee8253fb8be695516ac4457c743473a43290fd36eba6a1777eb"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d70f20df7f08b90a2062c1f07737dd340adccf2068d0f1b9b3d56e2038979fee"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:2fb86b7e58f9ac50e1e9dd1290154107e47d1eef23a0ae9145ded06ea606f992"}, + {file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:23ab05b2d241f76cb883ce8b9a93a680752fbfcbd51c50eff0b88b979e471d8c"}, + {file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ce2ce9e5de4703a673e705183f64fd5da5bf36e7beddcb63a25ee2286e71ca48"}, + {file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c4913079974eeb5c16ccfd2b1f09354b8fed7e0d6f2cab933104a09a6419b1ee"}, + {file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:010ce9b4f00d5c036053ca684c77441f2f2c934fd23bee058b4d6f196efd8280"}, + {file = "numpy-2.3.1-cp313-cp313t-win32.whl", hash = "sha256:6269b9edfe32912584ec496d91b00b6d34282ca1d07eb10e82dfc780907d6c2e"}, + {file = "numpy-2.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2a809637460e88a113e186e87f228d74ae2852a2e0c44de275263376f17b5bdc"}, + {file = "numpy-2.3.1-cp313-cp313t-win_arm64.whl", hash = "sha256:eccb9a159db9aed60800187bc47a6d3451553f0e1b08b068d8b277ddfbb9b244"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ad506d4b09e684394c42c966ec1527f6ebc25da7f4da4b1b056606ffe446b8a3"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:ebb8603d45bc86bbd5edb0d63e52c5fd9e7945d3a503b77e486bd88dde67a19b"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:15aa4c392ac396e2ad3d0a2680c0f0dee420f9fed14eef09bdb9450ee6dcb7b7"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c6e0bf9d1a2f50d2b65a7cf56db37c095af17b59f6c132396f7c6d5dd76484df"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:eabd7e8740d494ce2b4ea0ff05afa1b7b291e978c0ae075487c51e8bd93c0c68"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:e610832418a2bc09d974cc9fecebfa51e9532d6190223bc5ef6a7402ebf3b5cb"}, + {file = "numpy-2.3.1.tar.gz", hash = "sha256:1ec9ae20a4226da374362cca3c62cd753faf2f951440b0e3b98e93c235441d2b"}, ] [[package]] @@ -2644,14 +2659,14 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pydantic-settings" -version = "2.9.1" +version = "2.10.0" description = "Settings management using Pydantic" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef"}, - {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, + {file = "pydantic_settings-2.10.0-py3-none-any.whl", hash = "sha256:33781dfa1c7405d5ed2b6f150830a93bb58462a847357bd8f162f8bacb77c027"}, + {file = "pydantic_settings-2.10.0.tar.gz", hash = "sha256:7a12e0767ba283954f3fd3fefdd0df3af21b28aa849c40c35811d52d682fa876"}, ] [package.dependencies] @@ -4008,4 +4023,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.13" -content-hash = "72d70977fcf98483419b9f7bf7ec52d1f132f60bad1581249257b958ba290f0a" +content-hash = "50c9bfb1cba43fb146a8818efcb20b79636efd34c8e86b93cca0b9a7603598d1" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 65973e2809..8c2a68c138 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -30,6 +30,7 @@ django-filter = "^25.1" django-redis = "^5.4.0" django-storages = { extras = ["s3"], version = "^1.14.4" } djangorestframework = "^3.15.2" +emoji= "^2.14.1" geopy = "^2.4.1" gunicorn = "^23.0.0" humanize = "^4.11.0" diff --git a/cspell/custom-dict.txt b/cspell/custom-dict.txt index 428a6bcd9d..ba4695d2df 100644 --- a/cspell/custom-dict.txt +++ b/cspell/custom-dict.txt @@ -45,6 +45,7 @@ csrfguard csrfprotector csrftoken cva +demojize dismissable dsn env From 9733fee7c161aedb1bb56283e4999e763d2b530e Mon Sep 17 00:00:00 2001 From: Dishant1804 Date: Sun, 22 Jun 2025 23:59:56 +0530 Subject: [PATCH 5/8] fixes --- .../apps/ai/management/commands/slack_create_chunks.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/apps/ai/management/commands/slack_create_chunks.py b/backend/apps/ai/management/commands/slack_create_chunks.py index 718af6f33b..d92e7788b3 100644 --- a/backend/apps/ai/management/commands/slack_create_chunks.py +++ b/backend/apps/ai/management/commands/slack_create_chunks.py @@ -66,8 +66,9 @@ def create_chunks_from_message( return [] try: - last_request_time = datetime.now(UTC) - time_since_last_request = datetime.now(UTC) - last_request_time + time_since_last_request = datetime.now(UTC) - getattr( + self, "last_request_time", datetime.now(UTC) - timedelta(seconds=2) + ) if time_since_last_request < timedelta(seconds=1.2): time.sleep(1.2 - time_since_last_request.total_seconds()) @@ -75,7 +76,7 @@ def create_chunks_from_message( response = self.openai_client.embeddings.create( model="text-embedding-3-small", input=chunk_texts ) - last_request_time = datetime.now(UTC) + self.last_request_time = datetime.now(UTC) embeddings = [d.embedding for d in response.data] return [ Chunk.update_data( @@ -108,7 +109,7 @@ def clean_message_text(self, message_text: str) -> str: return "" cleaned_text = emoji.demojize(message_text, delimiters=("", "")) - cleaned_text = re.sub(r"<@U[A-Z0-9]+>", "", message_text) + cleaned_text = re.sub(r"<@U[A-Z0-9]+>", "", cleaned_text) cleaned_text = re.sub(r"]+>", "", cleaned_text) cleaned_text = re.sub(r":\w+:", "", cleaned_text) cleaned_text = re.sub(r"\s+", " ", cleaned_text) From a9b1be95f252107d55c467bd1e471611179bc5e7 Mon Sep 17 00:00:00 2001 From: Dishant1804 Date: Mon, 23 Jun 2025 00:22:21 +0530 Subject: [PATCH 6/8] open ai error handling changes --- backend/apps/ai/management/commands/slack_create_chunks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/apps/ai/management/commands/slack_create_chunks.py b/backend/apps/ai/management/commands/slack_create_chunks.py index d92e7788b3..a1c797d735 100644 --- a/backend/apps/ai/management/commands/slack_create_chunks.py +++ b/backend/apps/ai/management/commands/slack_create_chunks.py @@ -87,7 +87,7 @@ def create_chunks_from_message( ) for text, embedding in zip(chunk_texts, embeddings, strict=True) ] - except openai.error.OpenAIError as e: + except openai.OpenAIError as e: self.stdout.write( self.style.ERROR(f"OpenAI API error for message {message.slack_message_id}: {e}") ) From e2abafa69c798d6e52dbf7ef5493189408714e10 Mon Sep 17 00:00:00 2001 From: Dishant1804 Date: Wed, 25 Jun 2025 23:45:29 +0530 Subject: [PATCH 7/8] suggestions implemented --- .../commands/slack_create_chunks.py | 20 ++++++++------ backend/apps/ai/migrations/0001_initial.py | 2 +- backend/apps/ai/models/chunk.py | 11 ++------ backend/apps/slack/models/message.py | 5 ++++ backend/tests/apps/ai/models/chunk_test.py | 27 +------------------ 5 files changed, 21 insertions(+), 44 deletions(-) diff --git a/backend/apps/ai/management/commands/slack_create_chunks.py b/backend/apps/ai/management/commands/slack_create_chunks.py index a1c797d735..0d39eecf48 100644 --- a/backend/apps/ai/management/commands/slack_create_chunks.py +++ b/backend/apps/ai/management/commands/slack_create_chunks.py @@ -13,6 +13,9 @@ from apps.ai.models.chunk import Chunk from apps.slack.models.message import Message +MIN_REQUEST_INTERVAL_SECONDS = 1.2 +DEFAULT_LAST_REQUEST_OFFSET_SECONDS = 2 + class Command(BaseCommand): help = "Create chunks for Slack messages" @@ -50,11 +53,9 @@ def handle(self, *args, **options): self.stdout.write(f"Completed processing all {total_messages} messages") - def create_chunks_from_message( - self, message: Message, cleaned_text: str - ) -> list[Chunk | None]: + def create_chunks_from_message(self, message: Message, cleaned_text: str) -> list[Chunk]: """Create chunks from a message.""" - if message.raw_data.get("subtype") in ["channel_join", "channel_leave"]: + if message.subtype in ["channel_join", "channel_leave"]: return [] chunk_texts = self.split_message_text(cleaned_text) @@ -67,18 +68,20 @@ def create_chunks_from_message( try: time_since_last_request = datetime.now(UTC) - getattr( - self, "last_request_time", datetime.now(UTC) - timedelta(seconds=2) + self, + "last_request_time", + datetime.now(UTC) - timedelta(seconds=DEFAULT_LAST_REQUEST_OFFSET_SECONDS), ) - if time_since_last_request < timedelta(seconds=1.2): - time.sleep(1.2 - time_since_last_request.total_seconds()) + if time_since_last_request < timedelta(seconds=MIN_REQUEST_INTERVAL_SECONDS): + time.sleep(MIN_REQUEST_INTERVAL_SECONDS - time_since_last_request.total_seconds()) response = self.openai_client.embeddings.create( model="text-embedding-3-small", input=chunk_texts ) self.last_request_time = datetime.now(UTC) embeddings = [d.embedding for d in response.data] - return [ + chunks = [ Chunk.update_data( chunk_text=text, message=message, @@ -87,6 +90,7 @@ def create_chunks_from_message( ) for text, embedding in zip(chunk_texts, embeddings, strict=True) ] + return [chunk for chunk in chunks if chunk is not None] except openai.OpenAIError as e: self.stdout.write( self.style.ERROR(f"OpenAI API error for message {message.slack_message_id}: {e}") diff --git a/backend/apps/ai/migrations/0001_initial.py b/backend/apps/ai/migrations/0001_initial.py index 238ec0048e..a13b087414 100644 --- a/backend/apps/ai/migrations/0001_initial.py +++ b/backend/apps/ai/migrations/0001_initial.py @@ -44,7 +44,7 @@ class Migration(migrations.Migration): ], options={ "verbose_name": "Chunks", - "db_table": "slack_chunks", + "db_table": "ai_chunks", "unique_together": {("message", "chunk_text")}, }, ), diff --git a/backend/apps/ai/models/chunk.py b/backend/apps/ai/models/chunk.py index 4a966866f0..19631bf11e 100644 --- a/backend/apps/ai/models/chunk.py +++ b/backend/apps/ai/models/chunk.py @@ -12,7 +12,7 @@ class Chunk(TimestampedModel): """Slack Chunk model.""" class Meta: - db_table = "slack_chunks" + db_table = "ai_chunks" verbose_name = "Chunks" unique_together = ("message", "chunk_text") @@ -25,12 +25,6 @@ def __str__(self): text_preview = truncate(self.chunk_text, 50) return f"Chunk {self.id} for Message {self.message.slack_message_id}: {text_preview}" - def from_chunk(self, chunk_text: str, message: Message, embedding=None) -> None: - """Update instance based on chunk data.""" - self.chunk_text = chunk_text - self.message = message - self.embedding = embedding - @staticmethod def bulk_save(chunks, fields=None): """Bulk save chunks.""" @@ -61,8 +55,7 @@ def update_data( if Chunk.objects.filter(message=message, chunk_text=chunk_text).exists(): return None - chunk = Chunk(message=message) - chunk.from_chunk(chunk_text, message, embedding) + chunk = Chunk(message=message, chunk_text=chunk_text, embedding=embedding) if save: chunk.save() diff --git a/backend/apps/slack/models/message.py b/backend/apps/slack/models/message.py index 03cdc70201..c8a02f462b 100644 --- a/backend/apps/slack/models/message.py +++ b/backend/apps/slack/models/message.py @@ -58,6 +58,11 @@ def latest_reply(self) -> "Message | None": .first() ) + @property + def subtype(self) -> str | None: + """Get the subtype of the message if it exists.""" + return self.raw_data.get("subtype") + def from_slack( self, message_data: dict, diff --git a/backend/tests/apps/ai/models/chunk_test.py b/backend/tests/apps/ai/models/chunk_test.py index 2b9c3c052e..3bbee46e6c 100644 --- a/backend/tests/apps/ai/models/chunk_test.py +++ b/backend/tests/apps/ai/models/chunk_test.py @@ -29,31 +29,6 @@ def test_str_method(self): assert "Chunk 1 for Message 123456.789:" in result assert "This is a test chunk with some content that" in result - def test_from_chunk_method(self): - """Test the from_chunk method updates chunk properties.""" - chunk = Chunk() - mock_message = create_model_mock(Message) - test_text = "Test chunk content" - test_embedding = [0.1, 0.2, 0.3] - - chunk.from_chunk(test_text, mock_message, test_embedding) - - assert chunk.chunk_text == test_text - assert chunk.message == mock_message - assert chunk.embedding == test_embedding - - def test_from_chunk_method_without_embedding(self): - """Test from_chunk method with None embedding.""" - chunk = Chunk() - mock_message = create_model_mock(Message) - test_text = "Test chunk content" - - chunk.from_chunk(test_text, mock_message) - - assert chunk.chunk_text == test_text - assert chunk.message == mock_message - assert chunk.embedding is None - def test_bulk_save_with_chunks(self): """Test bulk_save method with valid chunks.""" mock_chunks = [Mock(), Mock(), Mock()] @@ -161,7 +136,7 @@ def test_update_data_no_save(self, mocker): def test_meta_class_attributes(self): """Test the Meta class attributes of the Chunk model.""" - assert Chunk._meta.db_table == "slack_chunks" + assert Chunk._meta.db_table == "ai_chunks" assert Chunk._meta.verbose_name == "Chunks" assert ("message", "chunk_text") in Chunk._meta.unique_together From d93208c13b39ff90ff1dd5dcf87d3fce5c075cea Mon Sep 17 00:00:00 2001 From: Arkadii Yakovets Date: Thu, 26 Jun 2025 15:18:29 -0700 Subject: [PATCH 8/8] Update code --- backend/Makefile | 8 +- backend/apps/ai/admin.py | 4 +- .../ai_create_slack_message_chunks.py | 92 +++++++++++++ .../commands/slack_create_chunks.py | 121 ------------------ ...2_rename_chunk_text_chunk_text_and_more.py | 22 ++++ ..._options_alter_chunk_embedding_and_more.py | 27 ++++ backend/apps/ai/models/chunk.py | 37 ++++-- .../commands/slack_sync_messages.py | 2 +- backend/apps/slack/models/message.py | 21 +++ backend/tests/apps/ai/models/chunk_test.py | 47 ++----- 10 files changed, 205 insertions(+), 176 deletions(-) create mode 100644 backend/apps/ai/management/commands/ai_create_slack_message_chunks.py delete mode 100644 backend/apps/ai/management/commands/slack_create_chunks.py create mode 100644 backend/apps/ai/migrations/0002_rename_chunk_text_chunk_text_and_more.py create mode 100644 backend/apps/ai/migrations/0003_alter_chunk_options_alter_chunk_embedding_and_more.py diff --git a/backend/Makefile b/backend/Makefile index 854918181b..f5a4329435 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -1,3 +1,7 @@ +ai-create-slack-message-chunks: + @echo "Creating Slack message chunks" + @CMD="python manage.py ai_create_slack_message_chunks" $(MAKE) exec-backend-command + clean-backend-dependencies: @rm -rf backend/.cache @rm -rf backend/.local @@ -175,10 +179,6 @@ slack-sync-messages: @echo "Syncing Slack messages" @CMD="python manage.py slack_sync_messages" $(MAKE) exec-backend-command -slack-create-message-chunks: - @echo "creating message chunks" - @CMD="python manage.py slack_create_chunks" $(MAKE) exec-backend-command - sync-data: \ update-data \ enrich-data \ diff --git a/backend/apps/ai/admin.py b/backend/apps/ai/admin.py index 565a228004..e168d67d27 100644 --- a/backend/apps/ai/admin.py +++ b/backend/apps/ai/admin.py @@ -9,11 +9,11 @@ class ChunkAdmin(admin.ModelAdmin): list_display = ( "id", "message", - "chunk_text", + "text", ) search_fields = ( "message__slack_message_id", - "chunk_text", + "text", ) diff --git a/backend/apps/ai/management/commands/ai_create_slack_message_chunks.py b/backend/apps/ai/management/commands/ai_create_slack_message_chunks.py new file mode 100644 index 0000000000..266e7171e0 --- /dev/null +++ b/backend/apps/ai/management/commands/ai_create_slack_message_chunks.py @@ -0,0 +1,92 @@ +"""A command to create chunks of Slack messages.""" + +import os +import time +from datetime import UTC, datetime, timedelta + +import openai +from django.core.management.base import BaseCommand + +from apps.ai.models.chunk import Chunk +from apps.slack.models.message import Message + +MIN_REQUEST_INTERVAL_SECONDS = 1.2 +DEFAULT_LAST_REQUEST_OFFSET_SECONDS = 2 + + +class Command(BaseCommand): + help = "Create chunks for Slack messages" + + def handle(self, *args, **options): + if not (openai_api_key := os.getenv("DJANGO_OPEN_AI_SECRET_KEY")): + self.stdout.write( + self.style.ERROR("DJANGO_OPEN_AI_SECRET_KEY environment variable not set") + ) + return + + self.openai_client = openai.OpenAI(api_key=openai_api_key) + + total_messages = Message.objects.count() + self.stdout.write(f"Found {total_messages} messages to process") + + batch_size = 100 + for offset in range(0, total_messages, batch_size): + Chunk.bulk_save( + [ + chunk + for message in Message.objects.all()[offset : offset + batch_size] + for chunk in self.create_chunks(message) + ] + ) + + self.stdout.write(f"Completed processing all {total_messages} messages") + + def create_chunks(self, message: Message) -> list[Chunk]: + """Create chunks from a message.""" + if message.subtype in {"channel_join", "channel_leave"}: + return [] + + if not (chunk_text := Chunk.split_text(message.cleaned_text)): + self.stdout.write( + f"No chunks created for message {message.slack_message_id}: " + f"`{message.cleaned_text}`" + ) + return [] + + try: + time_since_last_request = datetime.now(UTC) - getattr( + self, + "last_request_time", + datetime.now(UTC) - timedelta(seconds=DEFAULT_LAST_REQUEST_OFFSET_SECONDS), + ) + + if time_since_last_request < timedelta(seconds=MIN_REQUEST_INTERVAL_SECONDS): + time.sleep(MIN_REQUEST_INTERVAL_SECONDS - time_since_last_request.total_seconds()) + + response = self.openai_client.embeddings.create( + input=chunk_text, + model="text-embedding-3-small", + ) + self.last_request_time = datetime.now(UTC) + + return [ + chunk + for text, embedding in zip( + chunk_text, + [d.embedding for d in response.data], # Embedding data from OpenAI response. + strict=True, + ) + if ( + chunk := Chunk.update_data( + embedding=embedding, + message=message, + save=False, + text=text, + ) + ) + ] + except openai.OpenAIError as e: + self.stdout.write( + self.style.ERROR(f"OpenAI API error for message {message.slack_message_id}: {e}") + ) + return [] diff --git a/backend/apps/ai/management/commands/slack_create_chunks.py b/backend/apps/ai/management/commands/slack_create_chunks.py deleted file mode 100644 index 0d39eecf48..0000000000 --- a/backend/apps/ai/management/commands/slack_create_chunks.py +++ /dev/null @@ -1,121 +0,0 @@ -"""A command to create chunks of Slack messages.""" - -import os -import re -import time -from datetime import UTC, datetime, timedelta - -import emoji -import openai -from django.core.management.base import BaseCommand -from langchain.text_splitter import RecursiveCharacterTextSplitter - -from apps.ai.models.chunk import Chunk -from apps.slack.models.message import Message - -MIN_REQUEST_INTERVAL_SECONDS = 1.2 -DEFAULT_LAST_REQUEST_OFFSET_SECONDS = 2 - - -class Command(BaseCommand): - help = "Create chunks for Slack messages" - - def handle(self, *args, **options): - openai_api_key = os.getenv("DJANGO_OPEN_AI_SECRET_KEY") - - if not openai_api_key: - self.stdout.write( - self.style.ERROR("DJANGO_OPEN_AI_SECRET_KEY environment variable not set") - ) - return - - self.openai_client = openai.OpenAI(api_key=openai_api_key) - - total_messages = Message.objects.count() - self.stdout.write(f"Found {total_messages} messages to process") - - batch_size = 1000 - processed_count = 0 - - for offset in range(0, total_messages, batch_size): - batch_messages = Message.objects.all()[offset : offset + batch_size] - batch_chunks = [] - - for message in batch_messages: - cleaned_text = self.clean_message_text(message.raw_data.get("text", "")) - chunks = self.create_chunks_from_message(message, cleaned_text) - batch_chunks.extend(chunks) - - if batch_chunks: - Chunk.bulk_save(batch_chunks) - - processed_count += len(batch_messages) - - self.stdout.write(f"Completed processing all {total_messages} messages") - - def create_chunks_from_message(self, message: Message, cleaned_text: str) -> list[Chunk]: - """Create chunks from a message.""" - if message.subtype in ["channel_join", "channel_leave"]: - return [] - - chunk_texts = self.split_message_text(cleaned_text) - - if not chunk_texts: - self.stdout.write( - f"No chunks created for message {message.slack_message_id} - text too short" - ) - return [] - - try: - time_since_last_request = datetime.now(UTC) - getattr( - self, - "last_request_time", - datetime.now(UTC) - timedelta(seconds=DEFAULT_LAST_REQUEST_OFFSET_SECONDS), - ) - - if time_since_last_request < timedelta(seconds=MIN_REQUEST_INTERVAL_SECONDS): - time.sleep(MIN_REQUEST_INTERVAL_SECONDS - time_since_last_request.total_seconds()) - - response = self.openai_client.embeddings.create( - model="text-embedding-3-small", input=chunk_texts - ) - self.last_request_time = datetime.now(UTC) - embeddings = [d.embedding for d in response.data] - chunks = [ - Chunk.update_data( - chunk_text=text, - message=message, - embedding=embedding, - save=False, - ) - for text, embedding in zip(chunk_texts, embeddings, strict=True) - ] - return [chunk for chunk in chunks if chunk is not None] - except openai.OpenAIError as e: - self.stdout.write( - self.style.ERROR(f"OpenAI API error for message {message.slack_message_id}: {e}") - ) - return [] - - def split_message_text(self, message_text: str) -> list[str]: - """Split message text into chunks.""" - splitter = RecursiveCharacterTextSplitter( - chunk_size=300, - chunk_overlap=40, - length_function=len, - separators=["\n\n", "\n", " ", ""], - ) - return splitter.split_text(message_text) - - def clean_message_text(self, message_text: str) -> str: - """Clean message text by removing emojis and other noise while preserving context.""" - if not message_text: - return "" - - cleaned_text = emoji.demojize(message_text, delimiters=("", "")) - cleaned_text = re.sub(r"<@U[A-Z0-9]+>", "", cleaned_text) - cleaned_text = re.sub(r"]+>", "", cleaned_text) - cleaned_text = re.sub(r":\w+:", "", cleaned_text) - cleaned_text = re.sub(r"\s+", " ", cleaned_text) - - return cleaned_text.strip() diff --git a/backend/apps/ai/migrations/0002_rename_chunk_text_chunk_text_and_more.py b/backend/apps/ai/migrations/0002_rename_chunk_text_chunk_text_and_more.py new file mode 100644 index 0000000000..452416ec1c --- /dev/null +++ b/backend/apps/ai/migrations/0002_rename_chunk_text_chunk_text_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.2.3 on 2025-06-26 21:04 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("ai", "0001_initial"), + ("slack", "0018_conversation_sync_messages"), + ] + + operations = [ + migrations.RenameField( + model_name="chunk", + old_name="chunk_text", + new_name="text", + ), + migrations.AlterUniqueTogether( + name="chunk", + unique_together={("message", "text")}, + ), + ] diff --git a/backend/apps/ai/migrations/0003_alter_chunk_options_alter_chunk_embedding_and_more.py b/backend/apps/ai/migrations/0003_alter_chunk_options_alter_chunk_embedding_and_more.py new file mode 100644 index 0000000000..ceb05f5167 --- /dev/null +++ b/backend/apps/ai/migrations/0003_alter_chunk_options_alter_chunk_embedding_and_more.py @@ -0,0 +1,27 @@ +# Generated by Django 5.2.3 on 2025-06-26 21:45 + +import pgvector.django.vector +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("ai", "0002_rename_chunk_text_chunk_text_and_more"), + ] + + operations = [ + migrations.AlterModelOptions( + name="chunk", + options={"verbose_name": "Chunk"}, + ), + migrations.AlterField( + model_name="chunk", + name="embedding", + field=pgvector.django.vector.VectorField(dimensions=1536, verbose_name="Embedding"), + ), + migrations.AlterField( + model_name="chunk", + name="text", + field=models.TextField(verbose_name="Text"), + ), + ] diff --git a/backend/apps/ai/models/chunk.py b/backend/apps/ai/models/chunk.py index 19631bf11e..1d59caac67 100644 --- a/backend/apps/ai/models/chunk.py +++ b/backend/apps/ai/models/chunk.py @@ -1,6 +1,7 @@ """Slack app chunk model.""" from django.db import models +from langchain.text_splitter import RecursiveCharacterTextSplitter from pgvector.django import VectorField from apps.common.models import BulkSaveModel, TimestampedModel @@ -13,28 +14,38 @@ class Chunk(TimestampedModel): class Meta: db_table = "ai_chunks" - verbose_name = "Chunks" - unique_together = ("message", "chunk_text") + verbose_name = "Chunk" + unique_together = ("message", "text") + embedding = VectorField(verbose_name="Embedding", dimensions=1536) message = models.ForeignKey(Message, on_delete=models.CASCADE, related_name="chunks") - chunk_text = models.TextField(verbose_name="Chunk Text") - embedding = VectorField(verbose_name="Chunk Embedding", dimensions=1536) + text = models.TextField(verbose_name="Text") def __str__(self): """Human readable representation.""" - text_preview = truncate(self.chunk_text, 50) - return f"Chunk {self.id} for Message {self.message.slack_message_id}: {text_preview}" + return ( + f"Chunk {self.id} for Message {self.message.slack_message_id}: " + f"{truncate(self.text, 50)}" + ) @staticmethod def bulk_save(chunks, fields=None): """Bulk save chunks.""" - chunks = [chunk for chunk in chunks if chunk is not None] - if chunks: - BulkSaveModel.bulk_save(Chunk, chunks, fields=fields) + BulkSaveModel.bulk_save(Chunk, chunks, fields=fields) + + @staticmethod + def split_text(text: str) -> list[str]: + """Split text into chunks.""" + return RecursiveCharacterTextSplitter( + chunk_size=300, + chunk_overlap=40, + length_function=len, + separators=["\n\n", "\n", " ", ""], + ).split_text(text) @staticmethod def update_data( - chunk_text: str, + text: str, message: Message, embedding, *, @@ -43,7 +54,7 @@ def update_data( """Update chunk data. Args: - chunk_text (str): The text content of the chunk. + text (str): The text content of the chunk. message (Message): The message this chunk belongs to. embedding (list): The embedding vector for the chunk. save (bool): Whether to save the chunk to the database. @@ -52,10 +63,10 @@ def update_data( Chunk: The updated chunk instance. """ - if Chunk.objects.filter(message=message, chunk_text=chunk_text).exists(): + if Chunk.objects.filter(message=message, text=text).exists(): return None - chunk = Chunk(message=message, chunk_text=chunk_text, embedding=embedding) + chunk = Chunk(message=message, text=text, embedding=embedding) if save: chunk.save() diff --git a/backend/apps/slack/management/commands/slack_sync_messages.py b/backend/apps/slack/management/commands/slack_sync_messages.py index 473391e280..be0820fe4f 100644 --- a/backend/apps/slack/management/commands/slack_sync_messages.py +++ b/backend/apps/slack/management/commands/slack_sync_messages.py @@ -64,7 +64,7 @@ def handle(self, *args, **options): conversations = ( Conversation.objects.filter(slack_channel_id=channel_id) if channel_id - else Conversation.objects.filter(workspace=workspace) + else Conversation.objects.filter(sync_messages=True, workspace=workspace) ) for conversation in conversations: diff --git a/backend/apps/slack/models/message.py b/backend/apps/slack/models/message.py index c8a02f462b..831a416d19 100644 --- a/backend/apps/slack/models/message.py +++ b/backend/apps/slack/models/message.py @@ -1,7 +1,9 @@ """Slack app message model.""" +import re from datetime import UTC, datetime +import emoji from django.db import models from apps.common.models import BulkSaveModel, TimestampedModel @@ -46,6 +48,20 @@ def __str__(self): else truncate(self.raw_data["text"], 50) ) + @property + def cleaned_text(self) -> str: + """Get cleaned text from the message.""" + if not self.text: + return "" + + text = emoji.demojize(self.text) # Remove emojis. + text = re.sub(r"<@U[A-Z0-9]+>", "", text) # Remove user mentions. + text = re.sub(r"]+>", "", text) # Remove links. + text = re.sub(r":\w+:", "", text) # Remove emoji aliases. + text = re.sub(r"\s+", " ", text) # Normalize whitespace. + + return text.strip() + @property def latest_reply(self) -> "Message | None": """Get the latest reply to this message.""" @@ -63,6 +79,11 @@ def subtype(self) -> str | None: """Get the subtype of the message if it exists.""" return self.raw_data.get("subtype") + @property + def text(self) -> str: + """Get the text of the message.""" + return self.raw_data.get("text", "") + def from_slack( self, message_data: dict, diff --git a/backend/tests/apps/ai/models/chunk_test.py b/backend/tests/apps/ai/models/chunk_test.py index 3bbee46e6c..3520d8191c 100644 --- a/backend/tests/apps/ai/models/chunk_test.py +++ b/backend/tests/apps/ai/models/chunk_test.py @@ -21,7 +21,7 @@ def test_str_method(self): chunk = Chunk( id=1, - chunk_text="This is a test chunk with some content that should be displayed", + text="This is a test chunk with some content that should be displayed", message=mock_message, ) @@ -37,31 +37,10 @@ def test_bulk_save_with_chunks(self): Chunk.bulk_save(mock_chunks) mock_bulk_save.assert_called_once_with(Chunk, mock_chunks, fields=None) - def test_bulk_save_with_none_chunks(self): - """Test bulk_save method filters out None chunks.""" - mock_chunks = [Mock(), None, Mock(), None] - expected_chunks = [mock_chunks[0], mock_chunks[2]] - - with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: - Chunk.bulk_save(mock_chunks) - mock_bulk_save.assert_called_once_with(Chunk, expected_chunks, fields=None) - - def test_bulk_save_with_empty_list(self): - """Test bulk_save method with empty chunk list.""" - with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: - Chunk.bulk_save([]) - mock_bulk_save.assert_not_called() - - def test_bulk_save_with_all_none_chunks(self): - """Test bulk_save method with all None chunks.""" - with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: - Chunk.bulk_save([None, None, None]) - mock_bulk_save.assert_not_called() - def test_bulk_save_with_fields_parameter(self): """Test bulk_save method with custom fields parameter.""" mock_chunks = [Mock(), Mock()] - fields = ["chunk_text", "embedding"] + fields = ["text", "embedding"] with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: Chunk.bulk_save(mock_chunks, fields=fields) @@ -70,7 +49,7 @@ def test_bulk_save_with_fields_parameter(self): def test_update_data_new_chunk(self, mocker): """Test update_data method creates new chunk when it doesn't exist.""" mock_message = create_model_mock(Message) - chunk_text = "Test chunk content" + text = "Test chunk content" embedding = [0.1, 0.2, 0.3] mocker.patch( @@ -82,12 +61,12 @@ def test_update_data_new_chunk(self, mocker): with patch.object(Chunk, "message", create=True): result = Chunk.update_data( - chunk_text=chunk_text, message=mock_message, embedding=embedding, save=True + text=text, message=mock_message, embedding=embedding, save=True ) assert result is not None assert isinstance(result, Chunk) - assert result.chunk_text == chunk_text + assert result.text == text assert result.message == mock_message assert result.embedding == embedding patched_save.assert_called_once() @@ -95,7 +74,7 @@ def test_update_data_new_chunk(self, mocker): def test_update_data_existing_chunk(self, mocker): """Test update_data method returns None when chunk already exists.""" mock_message = create_model_mock(Message) - chunk_text = "Existing chunk content" + text = "Existing chunk content" embedding = [0.1, 0.2, 0.3] mocker.patch( @@ -103,16 +82,14 @@ def test_update_data_existing_chunk(self, mocker): return_value=Mock(exists=Mock(return_value=True)), ) - result = Chunk.update_data( - chunk_text=chunk_text, message=mock_message, embedding=embedding, save=True - ) + result = Chunk.update_data(text=text, message=mock_message, embedding=embedding, save=True) assert result is None def test_update_data_no_save(self, mocker): """Test update_data method with save=False.""" mock_message = create_model_mock(Message) - chunk_text = "Test chunk content" + text = "Test chunk content" embedding = [0.1, 0.2, 0.3] mocker.patch( @@ -124,12 +101,12 @@ def test_update_data_no_save(self, mocker): with patch.object(Chunk, "message", create=True): result = Chunk.update_data( - chunk_text=chunk_text, message=mock_message, embedding=embedding, save=False + text=text, message=mock_message, embedding=embedding, save=False ) assert result is not None assert isinstance(result, Chunk) - assert result.chunk_text == chunk_text + assert result.text == text assert result.message == mock_message assert result.embedding == embedding patched_save.assert_not_called() @@ -137,8 +114,8 @@ def test_update_data_no_save(self, mocker): def test_meta_class_attributes(self): """Test the Meta class attributes of the Chunk model.""" assert Chunk._meta.db_table == "ai_chunks" - assert Chunk._meta.verbose_name == "Chunks" - assert ("message", "chunk_text") in Chunk._meta.unique_together + assert Chunk._meta.verbose_name == "Chunk" + assert ("message", "text") in Chunk._meta.unique_together def test_message_foreign_key_relationship(self): """Test the foreign key relationship with Message model."""