Skip to content

feat(agents-api): Add Doc sql queries #979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions agents-api/agents_api/autogen/Docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,30 @@ class Doc(BaseModel):
"""
Embeddings for the document
"""
modality: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None
"""
Modality of the document
"""
language: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None
"""
Language of the document
"""
index: Annotated[int | None, Field(json_schema_extra={"readOnly": True})] = None
"""
Index of the document
"""
embedding_model: Annotated[
str | None, Field(json_schema_extra={"readOnly": True})
] = None
"""
Embedding model to use for the document
"""
embedding_dimensions: Annotated[
int | None, Field(json_schema_extra={"readOnly": True})
] = None
"""
Dimensions of the embedding model
"""


class DocOwner(BaseModel):
Expand Down
32 changes: 32 additions & 0 deletions agents-api/agents_api/queries/docs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Module: agents_api/models/docs

This module is responsible for managing document-related operations within the application, particularly for agents and possibly other entities. It serves as a core component of the document management system, enabling features such as document creation, listing, deletion, and embedding of snippets for enhanced search and retrieval capabilities.

Main functionalities include:
- Creating new documents and associating them with agents or users.
- Listing documents based on various criteria, including ownership and metadata filters.
- Deleting documents by their unique identifiers.
- Embedding document snippets for retrieval purposes.

The module interacts with other parts of the application, such as the agents and users modules, to provide a comprehensive document management system. Its role is crucial in enabling document search, retrieval, and management features within the context of agents and users.

This documentation aims to provide clear, concise, and sufficient context for new developers or contributors to understand the module's role without needing to dive deep into the code immediately.
"""

# ruff: noqa: F401, F403, F405

from .create_doc import create_doc
from .delete_doc import delete_doc
from .get_doc import get_doc
from .list_docs import list_docs
# from .search_docs_by_embedding import search_docs_by_embedding
# from .search_docs_by_text import search_docs_by_text

__all__ = [
"create_doc",
"delete_doc",
"get_doc",
"list_docs",
# "search_docs_by_embct",
]
139 changes: 139 additions & 0 deletions agents-api/agents_api/queries/docs/create_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from typing import Literal
from uuid import UUID

import asyncpg
from beartype import beartype
from fastapi import HTTPException
from sqlglot import parse_one
from uuid_extensions import uuid7

import ast


from ...autogen.openapi_model import CreateDocRequest, Doc
from ...metrics.counters import increase_counter
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class

# Base INSERT for docs
doc_query = parse_one("""
INSERT INTO docs (
developer_id,
doc_id,
title,
content,
index,
modality,
embedding_model,
embedding_dimensions,
language,
metadata
)
VALUES (
$1, -- developer_id
$2, -- doc_id
$3, -- title
$4, -- content
$5, -- index
$6, -- modality
$7, -- embedding_model
$8, -- embedding_dimensions
$9, -- language
$10 -- metadata (JSONB)
)
RETURNING *;
""").sql(pretty=True)

# Owner association query for doc_owners
doc_owner_query = parse_one("""
WITH inserted_owner AS (
INSERT INTO doc_owners (
developer_id,
doc_id,
owner_type,
owner_id
)
VALUES ($1, $2, $3, $4)
RETURNING doc_id
)
SELECT d.*
FROM inserted_owner io
JOIN docs d ON d.doc_id = io.doc_id;
""").sql(pretty=True)


@rewrap_exceptions(
{
asyncpg.UniqueViolationError: partialclass(
HTTPException,
status_code=409,
detail="A document with this ID already exists for this developer",
),
asyncpg.NoDataFoundError: partialclass(
HTTPException,
status_code=404,
detail="The specified owner does not exist",
),
asyncpg.ForeignKeyViolationError: partialclass(
HTTPException,
status_code=404,
detail="Developer or doc owner not found",
),
}
)
@wrap_in_class(
Doc,
one=True,
transform=lambda d: {
**d,
"id": d["doc_id"],
"content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]),
},
)
@increase_counter("create_doc")
@pg_query
@beartype
async def create_doc(
*,
developer_id: UUID,
doc_id: UUID | None = None,
data: CreateDocRequest,
owner_type: Literal["user", "agent"] | None = None,
owner_id: UUID | None = None,
modality: Literal["text", "image", "mixed"] | None = "text",
embedding_model: str | None = "voyage-3",
embedding_dimensions: int | None = 1024,
language: str | None = "english",
index: int | None = 0,
) -> list[tuple[str, list] | tuple[str, list, str]]:
"""
Insert a new doc record into Timescale and optionally associate it with an owner.
"""
# Generate a UUID if not provided
doc_id = doc_id or uuid7()

# check if content is a string
if isinstance(data.content, str):
data.content = [data.content]

# Create the doc record
doc_params = [
developer_id,
doc_id,
data.title,
str(data.content),
index,
modality,
embedding_model,
embedding_dimensions,
language,
data.metadata or {},
]

queries = [(doc_query, doc_params)]

# If an owner is specified, associate it:
if owner_type and owner_id:
owner_params = [developer_id, doc_id, owner_type, owner_id]
queries.append((doc_owner_query, owner_params))

return queries
74 changes: 74 additions & 0 deletions agents-api/agents_api/queries/docs/delete_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from typing import Literal
from uuid import UUID

import asyncpg
from beartype import beartype
from fastapi import HTTPException
from sqlglot import parse_one

from ...autogen.openapi_model import ResourceDeletedResponse
from ...common.utils.datetime import utcnow
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class

# Delete doc query + ownership check
delete_doc_query = parse_one("""
WITH deleted_owners AS (
DELETE FROM doc_owners
WHERE developer_id = $1
AND doc_id = $2
AND (
($3::text IS NULL AND $4::uuid IS NULL)
OR (owner_type = $3 AND owner_id = $4)
)
)
DELETE FROM docs
WHERE developer_id = $1
AND doc_id = $2
AND (
$3::text IS NULL OR EXISTS (
SELECT 1 FROM doc_owners
WHERE developer_id = $1
AND doc_id = $2
AND owner_type = $3
AND owner_id = $4
)
)
RETURNING doc_id;
""").sql(pretty=True)


@rewrap_exceptions(
{
asyncpg.NoDataFoundError: partialclass(
HTTPException,
status_code=404,
detail="Doc not found",
)
}
)
@wrap_in_class(
ResourceDeletedResponse,
one=True,
transform=lambda d: {
"id": d["doc_id"],
"deleted_at": utcnow(),
"jobs": [],
},
)
@pg_query
@beartype
async def delete_doc(
*,
developer_id: UUID,
doc_id: UUID,
owner_type: Literal["user", "agent"] | None = None,
owner_id: UUID | None = None,
) -> tuple[str, list]:
"""
Deletes a doc (and associated doc_owners) for the given developer and doc_id.
If owner_type/owner_id is specified, only remove doc if that matches.
"""
return (
delete_doc_query,
[developer_id, doc_id, owner_type, owner_id],
)
Empty file.
50 changes: 50 additions & 0 deletions agents-api/agents_api/queries/docs/get_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Literal
from uuid import UUID

from beartype import beartype
from sqlglot import parse_one
import ast

from ...autogen.openapi_model import Doc
from ..utils import pg_query, wrap_in_class

doc_query = parse_one("""
SELECT d.*
FROM docs d
LEFT JOIN doc_owners doc_own ON d.developer_id = doc_own.developer_id AND d.doc_id = doc_own.doc_id
WHERE d.developer_id = $1
AND d.doc_id = $2
AND (
($3::text IS NULL AND $4::uuid IS NULL)
OR (doc_own.owner_type = $3 AND doc_own.owner_id = $4)
)
LIMIT 1;
""").sql(pretty=True)


@wrap_in_class(
Doc,
one=True,
transform=lambda d: {
**d,
"id": d["doc_id"],
"content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]),
# "embeddings": d["embeddings"],
},
)
@pg_query
@beartype
async def get_doc(
*,
developer_id: UUID,
doc_id: UUID,
owner_type: Literal["user", "agent"] | None = None,
owner_id: UUID | None = None,
) -> tuple[str, list]:
"""
Fetch a single doc, optionally constrained to a given owner.
"""
return (
doc_query,
[developer_id, doc_id, owner_type, owner_id],
)
Loading
Loading