@@ -264,6 +264,7 @@ async def test_query_endpoint_handler_with_referenced_documents(mocker):
264264 assert response .response == llm_response
265265 assert response .conversation_id == conversation_id
266266 assert response .referenced_documents == referenced_documents
267+ assert all (isinstance (d , ReferencedDocument ) for d in response .referenced_documents )
267268 assert len (response .referenced_documents ) == 2
268269 assert response .referenced_documents [0 ].doc_title == "Doc1"
269270 assert response .referenced_documents [1 ].doc_title == "Doc2"
@@ -1727,6 +1728,71 @@ def test_process_knowledge_search_content_with_none_content(mocker):
17271728 assert len (metadata_map ) == 0
17281729
17291730
1731+ def test_process_knowledge_search_content_duplicate_document_id_last_wins (mocker ):
1732+ """The last metadata block for a given document_id should win."""
1733+ text_items = [
1734+ mocker .Mock (
1735+ text = "Metadata: {'docs_url': 'https://example.com/first', "
1736+ "'title': 'First', 'document_id': 'doc-x'}"
1737+ ),
1738+ mocker .Mock (
1739+ text = "Metadata: {'docs_url': 'https://example.com/second', "
1740+ "'title': 'Second', 'document_id': 'doc-x'}"
1741+ ),
1742+ ]
1743+ tool_response = mocker .Mock ()
1744+ tool_response .tool_name = "knowledge_search"
1745+ tool_response .content = text_items
1746+
1747+ # Process content
1748+ metadata_map = _process_knowledge_search_content (tool_response )
1749+ assert metadata_map ["doc-x" ]["docs_url" ] == "https://example.com/second"
1750+ assert metadata_map ["doc-x" ]["title" ] == "Second"
1751+
1752+ # Ensure extraction reflects last-wins as well
1753+ step = mocker .Mock ()
1754+ step .step_type = "tool_execution"
1755+ step .tool_responses = [tool_response ]
1756+ docs = extract_referenced_documents_from_steps ([step ])
1757+ assert len (docs ) == 1
1758+ assert str (docs [0 ].doc_url ) == "https://example.com/second"
1759+ assert docs [0 ].doc_title == "Second"
1760+
1761+
1762+ def test_process_knowledge_search_content_with_braces_inside_strings (mocker ):
1763+ """Test that braces inside strings are handled correctly."""
1764+ text_content_item = mocker .Mock ()
1765+ text_content_item .text = """Result 1
1766+ Content: Test content
1767+ metadata: {'document_id': 'doc-100', 'title': 'A {weird} title', "
1768+ "'docs_url': 'https://example.com/100', 'extra': {'note': 'contains {braces}'}}"""
1769+ tool_response = mocker .Mock ()
1770+ tool_response .content = [text_content_item ]
1771+
1772+ metadata_map = _process_knowledge_search_content (tool_response )
1773+ assert "doc-100" in metadata_map
1774+ assert metadata_map ["doc-100" ]["title" ] == "A {weird} title"
1775+ assert metadata_map ["doc-100" ]["docs_url" ] == "https://example.com/100"
1776+ assert metadata_map ["doc-100" ]["extra" ]["note" ] == "contains {braces}"
1777+
1778+
1779+ def test_process_knowledge_search_content_with_nested_objects (mocker ):
1780+ """Test that nested objects are parsed correctly."""
1781+ text_content_item = mocker .Mock ()
1782+ text_content_item .text = """Result 1
1783+ Content: Test content
1784+ MetaData: {"document_id": "doc-200", "title": "Nested JSON", "
1785+ ""docs_url": "https://example.com/200", "meta": {"k": {"inner": 1}}}"""
1786+ tool_response = mocker .Mock ()
1787+ tool_response .content = [text_content_item ]
1788+
1789+ metadata_map = _process_knowledge_search_content (tool_response )
1790+ assert "doc-200" in metadata_map
1791+ assert metadata_map ["doc-200" ]["title" ] == "Nested JSON"
1792+ assert metadata_map ["doc-200" ]["docs_url" ] == "https://example.com/200"
1793+ assert metadata_map ["doc-200" ]["meta" ]["k" ]["inner" ] == 1
1794+
1795+
17301796@pytest .mark .asyncio
17311797async def test_retrieve_response_with_none_content (prepare_agent_mocks , mocker ):
17321798 """Test retrieve_response handles None content gracefully."""
@@ -1880,6 +1946,9 @@ async def test_retrieve_response_with_structured_content_object(
18801946@pytest .mark .asyncio
18811947async def test_retrieve_response_skips_invalid_docs_url (prepare_agent_mocks , mocker ):
18821948 """Test that retrieve_response skips entries with invalid docs_url."""
1949+ # Mock logger to capture warning logs
1950+ mock_logger = mocker .patch ("app.endpoints.query.logger" )
1951+
18831952 mock_client , mock_agent = prepare_agent_mocks
18841953 mock_agent .create_turn .return_value .output_message .content = "LLM answer"
18851954 mock_client .shields .list .return_value = []
@@ -1940,6 +2009,13 @@ async def test_retrieve_response_skips_invalid_docs_url(prepare_agent_mocks, moc
19402009 assert str (referenced_documents [0 ].doc_url ) == "https://example.com/doc1"
19412010 assert referenced_documents [0 ].doc_title == "Valid Doc"
19422011
2012+ # Ensure we logged a warning for the invalid docs_url
2013+ assert any (
2014+ call [0 ][0 ].startswith ("Skipping invalid referenced document" )
2015+ or "Skipping invalid referenced document" in str (call )
2016+ for call in mock_logger .warning .call_args_list
2017+ )
2018+
19432019
19442020@pytest .mark .asyncio
19452021async def test_extract_referenced_documents_from_steps_handles_validation_errors (
0 commit comments