Skip to content

Commit 8c0a1d0

Browse files
committed
code rabbit fixes and additional tests
1 parent f022501 commit 8c0a1d0

File tree

2 files changed

+147
-17
lines changed

2 files changed

+147
-17
lines changed

src/app/endpoints/query.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
router = APIRouter(tags=["query"])
4444
auth_dependency = get_auth_dependency()
4545

46-
METADATA_PATTERN = re.compile(r"\nMetadata: (\{.+})\n")
46+
METADATA_PATTERN = re.compile(r"^Metadata:\s*(\{.*?\})\s*$", re.MULTILINE)
4747

4848

4949
def _process_knowledge_search_content(
@@ -78,28 +78,20 @@ def extract_referenced_documents_from_steps(steps: list) -> list[dict[str, str]]
7878
metadata_map: dict[str, dict[str, Any]] = {}
7979

8080
for step in steps:
81-
if step.step_type != "tool_execution" or not hasattr(step, "tool_responses"):
81+
if getattr(step, "step_type", "") != "tool_execution" or not hasattr(step, "tool_responses"):
8282
continue
8383

84-
for tool_response in step.tool_responses:
85-
if (
86-
tool_response.tool_name != "knowledge_search"
87-
or not tool_response.content
88-
):
84+
for tool_response in getattr(step, "tool_responses", []) or []:
85+
if getattr(tool_response, "tool_name", "") != "knowledge_search" or not getattr(tool_response, "content", []):
8986
continue
9087

9188
_process_knowledge_search_content(tool_response, metadata_map)
9289

9390
# Extract referenced documents from metadata
9491
return [
95-
{
96-
"doc_url": v["docs_url"],
97-
"doc_title": v["title"],
98-
}
99-
for v in filter(
100-
lambda v: ("docs_url" in v) and ("title" in v),
101-
metadata_map.values(),
102-
)
92+
{"doc_url": v["docs_url"], "doc_title": v["title"]}
93+
for v in metadata_map.values()
94+
if "docs_url" in v and "title" in v
10395
]
10496

10597

@@ -480,10 +472,9 @@ async def retrieve_response( # pylint: disable=too-many-locals
480472
# Check for validation errors and extract referenced documents
481473
steps = getattr(response, "steps", [])
482474
for step in steps:
483-
if step.step_type == "shield_call" and step.violation:
475+
if getattr(step, "step_type", "") == "shield_call" and getattr(step, "violation", False):
484476
# Metric for LLM validation errors
485477
metrics.llm_calls_validation_errors_total.inc()
486-
break
487478

488479
# Extract referenced documents from tool execution steps
489480
referenced_documents = extract_referenced_documents_from_steps(steps)

tests/unit/app/endpoints/test_query.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
store_transcript,
2323
get_rag_toolgroups,
2424
evaluate_model_hints,
25+
_process_knowledge_search_content,
2526
)
2627

2728
from models.requests import QueryRequest, Attachment
@@ -1583,3 +1584,141 @@ def test_evaluate_model_hints(
15831584

15841585
assert provider_id == expected_provider
15851586
assert model_id == expected_model
1587+
1588+
1589+
def test_process_knowledge_search_content_with_valid_metadata(mocker):
1590+
"""Test _process_knowledge_search_content with valid metadata."""
1591+
# Mock tool response with valid metadata
1592+
text_content_item = mocker.Mock()
1593+
text_content_item.text = """Result 1
1594+
Content: Test content
1595+
Metadata: {'docs_url': 'https://example.com/doc1', 'title': 'Test Doc', 'document_id': 'doc-1'}
1596+
"""
1597+
1598+
tool_response = mocker.Mock()
1599+
tool_response.content = [text_content_item]
1600+
1601+
metadata_map = {}
1602+
1603+
_process_knowledge_search_content(tool_response, metadata_map)
1604+
1605+
# Verify metadata was correctly parsed and added
1606+
assert "doc-1" in metadata_map
1607+
assert metadata_map["doc-1"]["docs_url"] == "https://example.com/doc1"
1608+
assert metadata_map["doc-1"]["title"] == "Test Doc"
1609+
assert metadata_map["doc-1"]["document_id"] == "doc-1"
1610+
1611+
1612+
def test_process_knowledge_search_content_with_invalid_metadata_syntax_error(mocker):
1613+
"""Test _process_knowledge_search_content handles SyntaxError from invalid metadata."""
1614+
mock_logger = mocker.patch("app.endpoints.query.logger")
1615+
1616+
# Mock tool response with invalid metadata (invalid Python syntax)
1617+
text_content_item = mocker.Mock()
1618+
text_content_item.text = """Result 1
1619+
Content: Test content
1620+
Metadata: {'docs_url': 'https://example.com/doc1' 'title': 'Test Doc', 'document_id': 'doc-1'}
1621+
""" # Missing comma between 'doc1' and 'title' - will cause SyntaxError
1622+
1623+
tool_response = mocker.Mock()
1624+
tool_response.content = [text_content_item]
1625+
1626+
metadata_map = {}
1627+
1628+
_process_knowledge_search_content(tool_response, metadata_map)
1629+
1630+
# Verify metadata_map remains empty due to exception
1631+
assert len(metadata_map) == 0
1632+
1633+
# Verify debug logging was called
1634+
mock_logger.debug.assert_called_once()
1635+
args = mock_logger.debug.call_args[0]
1636+
assert "An exception was thrown in processing" in args[0]
1637+
1638+
1639+
def test_process_knowledge_search_content_with_invalid_metadata_value_error(mocker):
1640+
"""Test _process_knowledge_search_content handles ValueError from invalid metadata."""
1641+
mock_logger = mocker.patch("app.endpoints.query.logger")
1642+
1643+
# Mock tool response with invalid metadata containing complex expressions
1644+
text_content_item = mocker.Mock()
1645+
text_content_item.text = """Result 1
1646+
Content: Test content
1647+
Metadata: {func_call(): 'value', 'title': 'Test Doc', 'document_id': 'doc-1'}
1648+
""" # Function call in dict - will cause ValueError since it's not a literal
1649+
1650+
tool_response = mocker.Mock()
1651+
tool_response.content = [text_content_item]
1652+
1653+
metadata_map = {}
1654+
1655+
_process_knowledge_search_content(tool_response, metadata_map)
1656+
1657+
# Verify metadata_map remains empty due to exception
1658+
assert len(metadata_map) == 0
1659+
1660+
# Verify debug logging was called
1661+
mock_logger.debug.assert_called_once()
1662+
args = mock_logger.debug.call_args[0]
1663+
assert "An exception was thrown in processing" in args[0]
1664+
1665+
1666+
def test_process_knowledge_search_content_with_non_dict_metadata(mocker):
1667+
"""Test _process_knowledge_search_content handles non-dict metadata gracefully."""
1668+
mock_logger = mocker.patch("app.endpoints.query.logger")
1669+
1670+
# Mock tool response with metadata that's not a dict
1671+
text_content_item = mocker.Mock()
1672+
text_content_item.text = """Result 1
1673+
Content: Test content
1674+
Metadata: "just a string"
1675+
"""
1676+
1677+
tool_response = mocker.Mock()
1678+
tool_response.content = [text_content_item]
1679+
1680+
metadata_map = {}
1681+
1682+
_process_knowledge_search_content(tool_response, metadata_map)
1683+
1684+
# Verify metadata_map remains empty (no document_id in string)
1685+
assert len(metadata_map) == 0
1686+
1687+
# No exception should be logged since string is valid literal
1688+
mock_logger.debug.assert_not_called()
1689+
1690+
1691+
def test_process_knowledge_search_content_with_metadata_missing_document_id(mocker):
1692+
"""Test _process_knowledge_search_content skips metadata without document_id."""
1693+
# Mock tool response with valid metadata but missing document_id
1694+
text_content_item = mocker.Mock()
1695+
text_content_item.text = """Result 1
1696+
Content: Test content
1697+
Metadata: {'docs_url': 'https://example.com/doc1', 'title': 'Test Doc'}
1698+
""" # No document_id field
1699+
1700+
tool_response = mocker.Mock()
1701+
tool_response.content = [text_content_item]
1702+
1703+
metadata_map = {}
1704+
1705+
_process_knowledge_search_content(tool_response, metadata_map)
1706+
1707+
# Verify metadata_map remains empty since document_id is missing
1708+
assert len(metadata_map) == 0
1709+
1710+
1711+
def test_process_knowledge_search_content_with_no_text_attribute(mocker):
1712+
"""Test _process_knowledge_search_content skips content items without text attribute."""
1713+
# Mock tool response with content item that has no text attribute
1714+
text_content_item = mocker.Mock(spec=[]) # spec=[] means no attributes
1715+
1716+
tool_response = mocker.Mock()
1717+
tool_response.content = [text_content_item]
1718+
1719+
metadata_map = {}
1720+
1721+
_process_knowledge_search_content(tool_response, metadata_map)
1722+
1723+
# Verify metadata_map remains empty since text attribute is missing
1724+
assert len(metadata_map) == 0

0 commit comments

Comments
 (0)