Skip to content

Commit 12becc5

Browse files
committed
coderabbit
1 parent dcf4f23 commit 12becc5

File tree

4 files changed

+153
-150
lines changed

4 files changed

+153
-150
lines changed

src/app/endpoints/query.py

Lines changed: 9 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from pathlib import Path
88
from typing import Annotated, Any, cast
99

10-
import pydantic
1110

1211
from llama_stack_client import APIConnectionError
1312
from llama_stack_client import AsyncLlamaStackClient # type: ignore
@@ -43,103 +42,15 @@
4342
)
4443
from utils.mcp_headers import mcp_headers_dependency, handle_mcp_headers_with_toolgroups
4544
from utils.suid import get_suid
46-
from utils.metadata import parse_knowledge_search_metadata
45+
from utils.metadata import (
46+
extract_referenced_documents_from_steps,
47+
)
4748

4849
logger = logging.getLogger("app.endpoints.handlers")
4950
router = APIRouter(tags=["query"])
5051
auth_dependency = get_auth_dependency()
5152

5253

53-
def _process_knowledge_search_content(tool_response: Any) -> dict[str, dict[str, Any]]:
54-
"""Process knowledge search tool response content for metadata.
55-
56-
Args:
57-
tool_response: Tool response object containing content to parse
58-
59-
Returns:
60-
Dictionary mapping document_id to metadata dict
61-
"""
62-
metadata_map: dict[str, dict[str, Any]] = {}
63-
64-
# Guard against missing tool_response or content
65-
if not tool_response:
66-
return metadata_map
67-
68-
content = getattr(tool_response, "content", None)
69-
if not content:
70-
return metadata_map
71-
72-
# Ensure content is iterable
73-
try:
74-
iter(content)
75-
except TypeError:
76-
return metadata_map
77-
78-
for text_content_item in content:
79-
# Skip items that lack a non-empty "text" attribute
80-
text = getattr(text_content_item, "text", None)
81-
if not text:
82-
continue
83-
84-
try:
85-
parsed_metadata = parse_knowledge_search_metadata(text)
86-
metadata_map.update(parsed_metadata)
87-
except ValueError:
88-
logger.exception(
89-
"An exception was thrown in processing metadata from text: %s",
90-
text[:200] + "..." if len(text) > 200 else text,
91-
)
92-
93-
return metadata_map
94-
95-
96-
def extract_referenced_documents_from_steps(
97-
steps: list[Any],
98-
) -> list[ReferencedDocument]:
99-
"""Extract referenced documents from tool execution steps.
100-
101-
Args:
102-
steps: List of response steps from the agent
103-
104-
Returns:
105-
List of referenced documents with doc_url and doc_title
106-
"""
107-
metadata_map: dict[str, dict[str, Any]] = {}
108-
109-
for step in steps:
110-
if getattr(step, "step_type", "") != "tool_execution" or not hasattr(
111-
step, "tool_responses"
112-
):
113-
continue
114-
115-
for tool_response in getattr(step, "tool_responses", []) or []:
116-
if getattr(
117-
tool_response, "tool_name", ""
118-
) != "knowledge_search" or not getattr(tool_response, "content", []):
119-
continue
120-
121-
response_metadata = _process_knowledge_search_content(tool_response)
122-
metadata_map.update(response_metadata)
123-
124-
# Extract referenced documents from metadata with error handling
125-
referenced_documents = []
126-
for v in metadata_map.values():
127-
if "docs_url" in v and "title" in v:
128-
try:
129-
doc = ReferencedDocument(doc_url=v["docs_url"], doc_title=v["title"])
130-
referenced_documents.append(doc)
131-
except (pydantic.ValidationError, ValueError) as e:
132-
logger.warning(
133-
"Skipping invalid referenced document with docs_url='%s', title='%s': %s",
134-
v.get("docs_url", "<missing>"),
135-
v.get("title", "<missing>"),
136-
str(e),
137-
)
138-
continue
139-
140-
return referenced_documents
141-
142-
14354
query_response: dict[int | str, dict[str, Any]] = {
14455
200: {
14556
"conversation_id": "123e4567-e89b-12d3-a456-426614174000",
@@ -516,8 +427,9 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
516427
mcp_headers (dict[str, dict[str, str]], optional): Headers for multi-component processing.
517428
518429
Returns:
519-
tuple[str, str]: A tuple containing the LLM or agent's response content
520-
and the conversation ID.
430+
tuple[str, str, list[ReferencedDocument]]: A tuple containing the response
431+
content, the conversation ID, and the list of referenced documents parsed
432+
from tool execution steps.
521433
"""
522434
available_input_shields = [
523435
shield.identifier
@@ -615,12 +527,12 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
615527
# Safely guard access to output_message and content
616528
output_message = getattr(response_obj, "output_message", None)
617529
if output_message and getattr(output_message, "content", None) is not None:
618-
content_str = str(output_message.content)
530+
response_text = str(output_message.content)
619531
else:
620-
content_str = ""
532+
response_text = ""
621533

622534
return (
623-
content_str,
535+
response_text,
624536
conversation_id,
625537
referenced_documents,
626538
)

src/app/endpoints/streaming_query.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -452,17 +452,13 @@ def _handle_tool_execution_event(
452452
summary = summary[:newline_pos]
453453
try:
454454
parsed_metadata = parse_knowledge_search_metadata(
455-
text_content_item.text
455+
text_content_item.text, strict=False
456456
)
457457
metadata_map.update(parsed_metadata)
458-
except ValueError:
458+
except ValueError as e:
459459
logger.exception(
460-
"An exception was thrown in processing metadata from text: %s",
461-
(
462-
text_content_item.text[:200] + "..."
463-
if len(text_content_item.text) > 200
464-
else text_content_item.text
465-
),
460+
"Error processing metadata from text; position=%s",
461+
getattr(e, "position", "unknown"),
466462
)
467463

468464
yield format_stream_data(

src/utils/metadata.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
"""Shared utilities for parsing metadata from knowledge search responses."""
22

33
import ast
4+
import json
5+
import logging
46
import re
57
from typing import Any
68

9+
import pydantic
10+
11+
from models.responses import ReferencedDocument
12+
13+
logger = logging.getLogger(__name__)
14+
715

816
# Case-insensitive pattern to find "Metadata:" labels
917
METADATA_LABEL_PATTERN = re.compile(r"^\s*metadata:\s*", re.MULTILINE | re.IGNORECASE)
@@ -94,3 +102,104 @@ def parse_knowledge_search_metadata(
94102
continue
95103

96104
return metadata_map
105+
106+
107+
def process_knowledge_search_content(tool_response: Any) -> dict[str, dict[str, Any]]:
108+
"""Process knowledge search tool response content for metadata.
109+
110+
Args:
111+
tool_response: Tool response object containing content to parse
112+
113+
Returns:
114+
Dictionary mapping document_id to metadata dict
115+
"""
116+
metadata_map: dict[str, dict[str, Any]] = {}
117+
118+
# Guard against missing tool_response or content
119+
if not tool_response:
120+
return metadata_map
121+
122+
content = getattr(tool_response, "content", None)
123+
if not content:
124+
return metadata_map
125+
126+
# Handle string content by attempting JSON parsing
127+
if isinstance(content, str):
128+
try:
129+
content = json.loads(content, strict=False)
130+
except (json.JSONDecodeError, TypeError):
131+
# If JSON parsing fails or content is still a string, return empty
132+
if isinstance(content, str):
133+
return metadata_map
134+
135+
# Ensure content is iterable (but not a string)
136+
if isinstance(content, str):
137+
return metadata_map
138+
try:
139+
iter(content)
140+
except TypeError:
141+
return metadata_map
142+
143+
for text_content_item in content:
144+
# Skip items that lack a non-empty "text" attribute
145+
text = getattr(text_content_item, "text", None)
146+
if not text:
147+
continue
148+
149+
try:
150+
parsed_metadata = parse_knowledge_search_metadata(text, strict=False)
151+
metadata_map.update(parsed_metadata)
152+
except ValueError as e:
153+
logger.exception(
154+
"Error processing metadata from text; position=%s",
155+
getattr(e, "position", "unknown"),
156+
)
157+
158+
return metadata_map
159+
160+
161+
def extract_referenced_documents_from_steps(
162+
steps: list[Any],
163+
) -> list[ReferencedDocument]:
164+
"""Extract referenced documents from tool execution steps.
165+
166+
Args:
167+
steps: List of response steps from the agent
168+
169+
Returns:
170+
List of referenced documents with doc_url and doc_title, sorted deterministically
171+
"""
172+
metadata_map: dict[str, dict[str, Any]] = {}
173+
174+
for step in steps:
175+
if getattr(step, "step_type", "") != "tool_execution" or not hasattr(
176+
step, "tool_responses"
177+
):
178+
continue
179+
180+
for tool_response in getattr(step, "tool_responses", []) or []:
181+
if getattr(
182+
tool_response, "tool_name", ""
183+
) != "knowledge_search" or not getattr(tool_response, "content", []):
184+
continue
185+
186+
response_metadata = process_knowledge_search_content(tool_response)
187+
metadata_map.update(response_metadata)
188+
189+
# Extract referenced documents from metadata with error handling
190+
referenced_documents = []
191+
for v in metadata_map.values():
192+
if "docs_url" in v and "title" in v:
193+
try:
194+
doc = ReferencedDocument(doc_url=v["docs_url"], doc_title=v["title"])
195+
referenced_documents.append(doc)
196+
except (pydantic.ValidationError, ValueError) as e:
197+
logger.warning(
198+
"Skipping invalid referenced document with docs_url='%s', title='%s': %s",
199+
v.get("docs_url", "<missing>"),
200+
v.get("title", "<missing>"),
201+
str(e),
202+
)
203+
continue
204+
205+
return sorted(referenced_documents, key=lambda d: (d.doc_title, str(d.doc_url)))

0 commit comments

Comments
 (0)