Skip to content

Commit 93673b7

Browse files
committed
fix: fixed the CRD doc queries + added tests
1 parent b427e38 commit 93673b7

21 files changed

+454
-326
lines changed

Diff for: agents-api/agents_api/autogen/Docs.py

+24
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,30 @@ class Doc(BaseModel):
7373
"""
7474
Embeddings for the document
7575
"""
76+
modality: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None
77+
"""
78+
Modality of the document
79+
"""
80+
language: Annotated[str | None, Field(json_schema_extra={"readOnly": True})] = None
81+
"""
82+
Language of the document
83+
"""
84+
index: Annotated[int | None, Field(json_schema_extra={"readOnly": True})] = None
85+
"""
86+
Index of the document
87+
"""
88+
embedding_model: Annotated[
89+
str | None, Field(json_schema_extra={"readOnly": True})
90+
] = None
91+
"""
92+
Embedding model to use for the document
93+
"""
94+
embedding_dimensions: Annotated[
95+
int | None, Field(json_schema_extra={"readOnly": True})
96+
] = None
97+
"""
98+
Dimensions of the embedding model
99+
"""
76100

77101

78102
class DocOwner(BaseModel):

Diff for: agents-api/agents_api/queries/docs/__init__.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,15 @@
1818

1919
from .create_doc import create_doc
2020
from .delete_doc import delete_doc
21-
from .embed_snippets import embed_snippets
2221
from .get_doc import get_doc
2322
from .list_docs import list_docs
24-
from .search_docs_by_embedding import search_docs_by_embedding
25-
from .search_docs_by_text import search_docs_by_text
23+
# from .search_docs_by_embedding import search_docs_by_embedding
24+
# from .search_docs_by_text import search_docs_by_text
25+
26+
__all__ = [
27+
"create_doc",
28+
"delete_doc",
29+
"get_doc",
30+
"list_docs",
31+
# "search_docs_by_embct",
32+
]

Diff for: agents-api/agents_api/queries/docs/create_doc.py

+22-18
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,4 @@
1-
"""
2-
Timescale-based creation of docs.
3-
4-
Mirrors the structure of create_file.py, but uses the docs/doc_owners tables.
5-
"""
6-
7-
import base64
8-
import hashlib
9-
from typing import Any, Literal
1+
from typing import Literal
102
from uuid import UUID
113

124
import asyncpg
@@ -15,6 +7,9 @@
157
from sqlglot import parse_one
168
from uuid_extensions import uuid7
179

10+
import ast
11+
12+
1813
from ...autogen.openapi_model import CreateDocRequest, Doc
1914
from ...metrics.counters import increase_counter
2015
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class
@@ -91,7 +86,7 @@
9186
transform=lambda d: {
9287
**d,
9388
"id": d["doc_id"],
94-
# You could optionally return a computed hash or partial content if desired
89+
"content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]),
9590
},
9691
)
9792
@increase_counter("create_doc")
@@ -102,26 +97,35 @@ async def create_doc(
10297
developer_id: UUID,
10398
doc_id: UUID | None = None,
10499
data: CreateDocRequest,
105-
owner_type: Literal["user", "agent", "org"] | None = None,
100+
owner_type: Literal["user", "agent"] | None = None,
106101
owner_id: UUID | None = None,
107-
) -> list[tuple[str, list]]:
102+
modality: Literal["text", "image", "mixed"] | None = "text",
103+
embedding_model: str | None = "voyage-3",
104+
embedding_dimensions: int | None = 1024,
105+
language: str | None = "english",
106+
index: int | None = 0,
107+
) -> list[tuple[str, list] | tuple[str, list, str]]:
108108
"""
109109
Insert a new doc record into Timescale and optionally associate it with an owner.
110110
"""
111111
# Generate a UUID if not provided
112112
doc_id = doc_id or uuid7()
113113

114+
# check if content is a string
115+
if isinstance(data.content, str):
116+
data.content = [data.content]
117+
114118
# Create the doc record
115119
doc_params = [
116120
developer_id,
117121
doc_id,
118122
data.title,
119-
data.content,
120-
data.index or 0, # fallback if no snippet index
121-
data.modality or "text",
122-
data.embedding_model or "none",
123-
data.embedding_dimensions or 0,
124-
data.language or "english",
123+
str(data.content),
124+
index,
125+
modality,
126+
embedding_model,
127+
embedding_dimensions,
128+
language,
125129
data.metadata or {},
126130
]
127131

Diff for: agents-api/agents_api/queries/docs/delete_doc.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
"""
2-
Timescale-based deletion of a doc record.
3-
"""
4-
51
from typing import Literal
62
from uuid import UUID
73

@@ -65,7 +61,7 @@ async def delete_doc(
6561
*,
6662
developer_id: UUID,
6763
doc_id: UUID,
68-
owner_type: Literal["user", "agent", "org"] | None = None,
64+
owner_type: Literal["user", "agent"] | None = None,
6965
owner_id: UUID | None = None,
7066
) -> tuple[str, list]:
7167
"""

Diff for: agents-api/agents_api/queries/docs/get_doc.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,22 @@
1-
"""
2-
Timescale-based retrieval of a single doc record.
3-
"""
4-
51
from typing import Literal
62
from uuid import UUID
73

8-
import asyncpg
94
from beartype import beartype
10-
from fastapi import HTTPException
115
from sqlglot import parse_one
6+
import ast
127

138
from ...autogen.openapi_model import Doc
149
from ..utils import pg_query, wrap_in_class
1510

1611
doc_query = parse_one("""
1712
SELECT d.*
1813
FROM docs d
19-
LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id
14+
LEFT JOIN doc_owners doc_own ON d.developer_id = doc_own.developer_id AND d.doc_id = doc_own.doc_id
2015
WHERE d.developer_id = $1
2116
AND d.doc_id = $2
2217
AND (
2318
($3::text IS NULL AND $4::uuid IS NULL)
24-
OR (do.owner_type = $3 AND do.owner_id = $4)
19+
OR (doc_own.owner_type = $3 AND doc_own.owner_id = $4)
2520
)
2621
LIMIT 1;
2722
""").sql(pretty=True)
@@ -33,6 +28,8 @@
3328
transform=lambda d: {
3429
**d,
3530
"id": d["doc_id"],
31+
"content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]),
32+
# "embeddings": d["embeddings"],
3633
},
3734
)
3835
@pg_query
@@ -41,7 +38,7 @@ async def get_doc(
4138
*,
4239
developer_id: UUID,
4340
doc_id: UUID,
44-
owner_type: Literal["user", "agent", "org"] | None = None,
41+
owner_type: Literal["user", "agent"] | None = None,
4542
owner_id: UUID | None = None,
4643
) -> tuple[str, list]:
4744
"""

Diff for: agents-api/agents_api/queries/docs/list_docs.py

+36-45
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,20 @@
1-
"""
2-
Timescale-based listing of docs with optional owner filter and pagination.
3-
"""
4-
5-
from typing import Literal
1+
from typing import Any, Literal
62
from uuid import UUID
73

8-
import asyncpg
94
from beartype import beartype
105
from fastapi import HTTPException
116
from sqlglot import parse_one
7+
import ast
128

139
from ...autogen.openapi_model import Doc
1410
from ..utils import pg_query, wrap_in_class
1511

16-
# Basic listing for all docs by developer
17-
developer_docs_query = parse_one("""
12+
# Base query for listing docs
13+
base_docs_query = parse_one("""
1814
SELECT d.*
1915
FROM docs d
20-
LEFT JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id
16+
LEFT JOIN doc_owners doc_own ON d.developer_id = doc_own.developer_id AND d.doc_id = doc_own.doc_id
2117
WHERE d.developer_id = $1
22-
ORDER BY
23-
CASE
24-
WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at
25-
WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at
26-
WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at
27-
WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at
28-
END DESC NULLS LAST
29-
LIMIT $2
30-
OFFSET $3;
31-
""").sql(pretty=True)
32-
33-
# Listing for docs associated with a specific owner
34-
owner_docs_query = parse_one("""
35-
SELECT d.*
36-
FROM docs d
37-
JOIN doc_owners do ON d.developer_id = do.developer_id AND d.doc_id = do.doc_id
38-
WHERE do.developer_id = $1
39-
AND do.owner_id = $6
40-
AND do.owner_type = $7
41-
ORDER BY
42-
CASE
43-
WHEN $4 = 'created_at' AND $5 = 'asc' THEN d.created_at
44-
WHEN $4 = 'created_at' AND $5 = 'desc' THEN d.created_at
45-
WHEN $4 = 'updated_at' AND $5 = 'asc' THEN d.updated_at
46-
WHEN $4 = 'updated_at' AND $5 = 'desc' THEN d.updated_at
47-
END DESC NULLS LAST
48-
LIMIT $2
49-
OFFSET $3;
5018
""").sql(pretty=True)
5119

5220

@@ -56,6 +24,8 @@
5624
transform=lambda d: {
5725
**d,
5826
"id": d["doc_id"],
27+
"content": ast.literal_eval(d["content"])[0] if len(ast.literal_eval(d["content"])) == 1 else ast.literal_eval(d["content"]),
28+
# "embeddings": d["embeddings"],
5929
},
6030
)
6131
@pg_query
@@ -64,29 +34,50 @@ async def list_docs(
6434
*,
6535
developer_id: UUID,
6636
owner_id: UUID | None = None,
67-
owner_type: Literal["user", "agent", "org"] | None = None,
37+
owner_type: Literal["user", "agent"] | None = None,
6838
limit: int = 100,
6939
offset: int = 0,
7040
sort_by: Literal["created_at", "updated_at"] = "created_at",
7141
direction: Literal["asc", "desc"] = "desc",
42+
metadata_filter: dict[str, Any] = {},
43+
include_without_embeddings: bool = False,
7244
) -> tuple[str, list]:
7345
"""
7446
Lists docs with optional owner filtering, pagination, and sorting.
7547
"""
7648
if direction.lower() not in ["asc", "desc"]:
7749
raise HTTPException(status_code=400, detail="Invalid sort direction")
7850

51+
if sort_by not in ["created_at", "updated_at"]:
52+
raise HTTPException(status_code=400, detail="Invalid sort field")
53+
7954
if limit > 100 or limit < 1:
8055
raise HTTPException(status_code=400, detail="Limit must be between 1 and 100")
8156

8257
if offset < 0:
8358
raise HTTPException(status_code=400, detail="Offset must be >= 0")
8459

85-
params = [developer_id, limit, offset, sort_by, direction]
86-
if owner_id and owner_type:
87-
params.extend([owner_id, owner_type])
88-
query = owner_docs_query
89-
else:
90-
query = developer_docs_query
60+
# Start with the base query
61+
query = base_docs_query
62+
params = [developer_id]
63+
64+
# Add owner filtering
65+
if owner_type and owner_id:
66+
query += " AND doc_own.owner_type = $2 AND doc_own.owner_id = $3"
67+
params.extend([owner_type, owner_id])
68+
69+
# Add metadata filtering
70+
if metadata_filter:
71+
for key, value in metadata_filter.items():
72+
query += f" AND d.metadata->>'{key}' = ${len(params) + 1}"
73+
params.append(value)
74+
75+
# Include or exclude documents without embeddings
76+
# if not include_without_embeddings:
77+
# query += " AND d.embeddings IS NOT NULL"
78+
79+
# Add sorting and pagination
80+
query += f" ORDER BY {sort_by} {direction} LIMIT ${len(params) + 1} OFFSET ${len(params) + 2}"
81+
params.extend([limit, offset])
9182

92-
return (query, params)
83+
return query, params

Diff for: agents-api/agents_api/queries/docs/search_docs_by_embedding.py

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from typing import List, Literal
66
from uuid import UUID
77

8-
import asyncpg
98
from beartype import beartype
109
from fastapi import HTTPException
1110
from sqlglot import parse_one

Diff for: agents-api/agents_api/queries/docs/search_docs_by_text.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from typing import Literal
66
from uuid import UUID
77

8-
import asyncpg
98
from beartype import beartype
109
from fastapi import HTTPException
1110
from sqlglot import parse_one
@@ -22,7 +21,7 @@
2221
AND d.doc_id = do.doc_id
2322
WHERE d.developer_id = $1
2423
AND (
25-
($4::text IS NULL AND $5::uuid IS NULL)
24+
($4 IS NULL AND $5 IS NULL)
2625
OR (do.owner_type = $4 AND do.owner_id = $5)
2726
)
2827
AND d.search_tsv @@ websearch_to_tsquery($3)

Diff for: agents-api/agents_api/queries/docs/search_docs_hybrid.py

-2
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@
77
from uuid import UUID
88

99
from beartype import beartype
10-
from fastapi import HTTPException
1110

1211
from ...autogen.openapi_model import Doc
13-
from ..utils import run_concurrently
1412
from .search_docs_by_embedding import search_docs_by_embedding
1513
from .search_docs_by_text import search_docs_by_text
1614

Diff for: agents-api/agents_api/queries/entries/get_history.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json
2-
from typing import Any, List, Tuple
32
from uuid import UUID
43

54
import asyncpg

Diff for: agents-api/agents_api/queries/files/get_file.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,11 @@
66
from typing import Literal
77
from uuid import UUID
88

9-
import asyncpg
109
from beartype import beartype
11-
from fastapi import HTTPException
1210
from sqlglot import parse_one
1311

1412
from ...autogen.openapi_model import File
15-
from ..utils import partialclass, pg_query, rewrap_exceptions, wrap_in_class
13+
from ..utils import pg_query, wrap_in_class
1614

1715
# Define the raw SQL query
1816
file_query = parse_one("""
@@ -47,8 +45,8 @@
4745
File,
4846
one=True,
4947
transform=lambda d: {
50-
"id": d["file_id"],
5148
**d,
49+
"id": d["file_id"],
5250
"hash": d["hash"].hex(),
5351
"content": "DUMMY: NEED TO FETCH CONTENT FROM BLOB STORAGE",
5452
},

0 commit comments

Comments
 (0)