Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
1f32952
fix(ai): redact message parts content of type blob
constantinius Dec 17, 2025
795bcea
fix(ai): skip non dict messages
constantinius Dec 17, 2025
a623e13
fix(ai): typing
constantinius Dec 17, 2025
3d3ce5b
fix(ai): content items may not be dicts
constantinius Dec 17, 2025
433bc88
fix(integrations): google-genai: reworked `gen_ai.request.messages` e…
constantinius Jan 5, 2026
4244319
fix(integrations): address cursor review comments
constantinius Jan 8, 2026
f72aa45
fix(integrations): ensure file_data returns valid blob structure only…
constantinius Jan 8, 2026
2be0419
fix(integrations): add type ignore for missing PIL.Image import
constantinius Jan 8, 2026
4abdcf8
Merge branch 'master' into constantinius/fix/integrations/google-gena…
constantinius Jan 13, 2026
86f6ecb
fix: linting issue and review comment
constantinius Jan 13, 2026
7e9335e
Merge branch 'master' into constantinius/fix/integrations/google-gena…
constantinius Jan 14, 2026
0355c63
fix(integrations): google-genai do not encode binary data that gets r…
constantinius Jan 14, 2026
910c679
fix(integrations): Use explicit None checks instead of `or {}` pattern
constantinius Jan 14, 2026
bd78165
feat(ai): Add shared content transformation functions for multimodal …
constantinius Jan 15, 2026
e7eb226
Merge shared content transformation functions
constantinius Jan 15, 2026
fc6bbfe
refactor(google-genai): Use shared transform_content_part for dict fo…
constantinius Jan 15, 2026
412b93e
refactor(ai): split transform_content_part into SDK-specific functions
constantinius Jan 15, 2026
ff7247b
Merge SDK-specific transform functions
constantinius Jan 15, 2026
b9b629e
refactor(google-genai): use transform_google_content_part directly
constantinius Jan 15, 2026
b80f6e9
test: added comprehensive tests for direct API access with various ki…
constantinius Jan 16, 2026
37b1761
fix: modality and tpe for file references
constantinius Jan 19, 2026
7d825af
fix: wrong modality and type for file references
constantinius Jan 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
351 changes: 319 additions & 32 deletions sentry_sdk/integrations/google_genai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import inspect
from functools import wraps
from .consts import ORIGIN, TOOL_ATTRIBUTES_MAP, GEN_AI_SYSTEM
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
from typing import (
cast,
TYPE_CHECKING,
Expand All @@ -12,13 +13,15 @@
Optional,
Union,
TypedDict,
Dict,
)

import sentry_sdk
from sentry_sdk.ai.utils import (
set_data_normalized,
truncate_and_annotate_messages,
normalize_message_roles,
redact_blob_message_parts,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
Expand Down Expand Up @@ -145,44 +148,314 @@ def get_model_name(model: "Union[str, Model]") -> str:
return str(model)


def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]":
"""Extract text from contents parameter which can have various formats."""
def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, Any]]":
"""Extract messages from contents parameter which can have various formats.

Returns a list of message dictionaries in the format:
- System: {"role": "system", "content": "string"}
- User/Assistant: {"role": "user"|"assistant", "content": [{"text": "...", "type": "text"}, ...]}
"""
if contents is None:
return None
return []

# Simple string case
messages = []

# Handle string case
if isinstance(contents, str):
return contents
return [{"role": "user", "content": contents}]

# List of contents or parts
# Handle list case - process each item (non-recursive, flatten at top level)
if isinstance(contents, list):
texts = []
for item in contents:
# Recursively extract text from each item
extracted = extract_contents_text(item)
if extracted:
texts.append(extracted)
return " ".join(texts) if texts else None
item_messages = extract_contents_messages(item)
messages.extend(item_messages)
return messages

# Dictionary case
# Handle dictionary case (ContentDict)
if isinstance(contents, dict):
if "text" in contents:
return contents["text"]
# Try to extract from parts if present in dict
if "parts" in contents:
return extract_contents_text(contents["parts"])
role = contents.get("role", "user")
parts = contents.get("parts")

if parts:
content_parts = []
tool_messages = []

for part in parts:
part_result = _extract_part_content(part)
if part_result is None:
continue

if isinstance(part_result, dict) and part_result.get("role") == "tool":
# Tool message - add separately
tool_messages.append(part_result)
else:
# Regular content part
content_parts.append(part_result)

# Add main message if we have content parts
if content_parts:
# Normalize role: "model" -> "assistant"
normalized_role = "assistant" if role == "model" else role or "user"
messages.append({"role": normalized_role, "content": content_parts})

# Add tool messages
messages.extend(tool_messages)
elif "text" in contents:
# Simple text in dict
messages.append(
{
"role": role or "user",
"content": [{"text": contents["text"], "type": "text"}],
}
)

return messages

# Handle Content object
if hasattr(contents, "parts") and contents.parts:
role = getattr(contents, "role", None) or "user"
content_parts = []
tool_messages = []

for part in contents.parts:
part_result = _extract_part_content(part)
if part_result is None:
continue

if isinstance(part_result, dict) and part_result.get("role") == "tool":
tool_messages.append(part_result)
else:
content_parts.append(part_result)

# Content object with parts - recurse into parts
if getattr(contents, "parts", None):
return extract_contents_text(contents.parts)
if content_parts:
normalized_role = "assistant" if role == "model" else role
messages.append({"role": normalized_role, "content": content_parts})

messages.extend(tool_messages)
return messages

# Handle Part object directly
part_result = _extract_part_content(contents)
if part_result:
if isinstance(part_result, dict) and part_result.get("role") == "tool":
return [part_result]
else:
return [{"role": "user", "content": [part_result]}]

# Handle PIL.Image.Image
try:
from PIL import Image as PILImage # type: ignore[import-not-found]

if isinstance(contents, PILImage.Image):
blob_part = _extract_pil_image(contents)
if blob_part:
return [{"role": "user", "content": [blob_part]}]
except ImportError:
pass

# Handle File object
if hasattr(contents, "uri") and hasattr(contents, "mime_type"):
# File object
file_uri = getattr(contents, "uri", None)
mime_type = getattr(contents, "mime_type", None)
if file_uri and mime_type:
blob_part = {
"type": "blob",
"mime_type": mime_type,
"file_uri": file_uri,
}
return [{"role": "user", "content": [blob_part]}]
Comment thread
sentry[bot] marked this conversation as resolved.

# Handle direct text attribute
if hasattr(contents, "text") and contents.text:
return [
{"role": "user", "content": [{"text": str(contents.text), "type": "text"}]}
]

return []


def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]":
"""Extract content from a Part object or dict.

Returns:
- dict for content part (text/blob) or tool message
- None if part should be skipped
"""
if part is None:
return None

# Handle dict Part
if isinstance(part, dict):
# Check for function_response first (tool message)
if "function_response" in part:
return _extract_tool_message_from_part(part)

if part.get("text"):
return {"text": part["text"], "type": "text"}
Comment thread
constantinius marked this conversation as resolved.

if part.get("file_data"):
file_data = part["file_data"]
if isinstance(file_data, dict):
mime_type = file_data.get("mime_type")
file_uri = file_data.get("file_uri")
if mime_type and file_uri:
return {
"type": "blob",
"mime_type": mime_type,
"file_uri": file_uri,
}

if part.get("inline_data"):
inline_data = part["inline_data"]
if isinstance(inline_data, dict):
data = inline_data.get("data")
mime_type = inline_data.get("mime_type")
if data and mime_type:
if isinstance(data, bytes):
return {
"type": "blob",
"mime_type": mime_type,
"content": BLOB_DATA_SUBSTITUTE,
}

return None

# Direct text attribute
if hasattr(contents, "text"):
return contents.text
# Handle Part object
# Check for function_response (tool message)
if hasattr(part, "function_response") and part.function_response:
return _extract_tool_message_from_part(part)

# Handle text
if hasattr(part, "text") and part.text:
return {"text": part.text, "type": "text"}

# Handle file_data
if hasattr(part, "file_data") and part.file_data:
file_data = part.file_data
file_uri = getattr(file_data, "file_uri", None)
mime_type = getattr(file_data, "mime_type", None)
if file_uri and mime_type:
return {
"type": "blob",
"mime_type": mime_type,
"file_uri": file_uri,
}
Comment thread
cursor[bot] marked this conversation as resolved.
Comment thread
constantinius marked this conversation as resolved.
Comment thread
constantinius marked this conversation as resolved.

# Handle inline_data
if hasattr(part, "inline_data") and part.inline_data:
inline_data = part.inline_data
data = getattr(inline_data, "data", None)
mime_type = getattr(inline_data, "mime_type", None)
if data and mime_type:
if isinstance(data, bytes):
return {
"type": "blob",
"mime_type": mime_type,
"content": BLOB_DATA_SUBSTITUTE,
}
Comment thread
cursor[bot] marked this conversation as resolved.
Comment on lines +343 to +347
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: When processing object-based inline_data or PIL.Image objects, the returned blob dictionary is missing the modality field, creating an inconsistent data structure compared to other data types.
Severity: HIGH

Suggested Fix

In _extract_part_content, update the logic for handling object-based inline_data and PIL.Image objects. Add the modality field to the returned dictionary by calling get_modality_from_mime_type(mime_type), similar to how file_data is handled. This will ensure all blob data structures are consistent.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/google_genai/utils.py#L343-L347

Potential issue: The function `_extract_part_content` creates an inconsistent data
structure for blob data. When handling object-based `Part` objects with `inline_data`
containing bytes (lines 337-348) or `PIL.Image` objects (lines 418-422), the returned
dictionary omits the `modality` field. However, when processing dictionary-based
`inline_data` or `file_data`, the `modality` field is correctly included using the
`get_modality_from_mime_type` helper. This inconsistency can lead to downstream
processing errors if other parts of the system expect a standardized blob format that
always includes `modality`.

Did we get this right? 👍 / 👎 to inform future reviews.


return None


def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]":
"""Extract tool message from a Part with function_response.

Returns:
{"role": "tool", "content": {"toolCallId": "...", "toolName": "...", "output": "..."}}
or None if not a valid tool message
"""
function_response = None

if isinstance(part, dict):
function_response = part.get("function_response")
elif hasattr(part, "function_response"):
function_response = part.function_response

if not function_response:
return None

# Extract fields from function_response
tool_call_id = None
tool_name = None
output = None

if isinstance(function_response, dict):
tool_call_id = function_response.get("id")
tool_name = function_response.get("name")
response_dict = function_response.get("response", {})
# Prefer "output" key if present, otherwise use entire response
output = response_dict.get("output", response_dict)
Comment thread
constantinius marked this conversation as resolved.
else:
# FunctionResponse object
tool_call_id = getattr(function_response, "id", None)
tool_name = getattr(function_response, "name", None)
response_obj = getattr(function_response, "response", None)
if response_obj is None:
response_obj = {}
if isinstance(response_obj, dict):
output = response_obj.get("output", response_obj)
else:
output = response_obj

if not tool_name:
return None

return {
"role": "tool",
"content": {
"toolCallId": str(tool_call_id) if tool_call_id else None,
"toolName": str(tool_name),
"output": safe_serialize(output) if output is not None else None,
},
}


def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]":
"""Extract blob part from PIL.Image.Image."""
try:
from PIL import Image as PILImage

if not isinstance(image, PILImage.Image):
return None

# Get format, default to JPEG
format_str = image.format or "JPEG"
suffix = format_str.lower()
mime_type = f"image/{suffix}"

return {
"type": "blob",
"mime_type": mime_type,
"content": BLOB_DATA_SUBSTITUTE,
}
except Exception:
return None


def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]":
"""Extract text from contents parameter which can have various formats.

This is a compatibility function that extracts text from messages.
For new code, use extract_contents_messages instead.
"""
messages = extract_contents_messages(contents)
if not messages:
return None

texts = []
for message in messages:
content = message.get("content")
if isinstance(content, str):
texts.append(content)
elif isinstance(content, list):
for part in content:
if isinstance(part, dict) and part.get("type") == "text":
texts.append(part.get("text", ""))

return " ".join(texts) if texts else None


def _format_tools_for_span(
tools: "Iterable[Tool | Callable[..., Any]]",
) -> "Optional[List[dict[str, Any]]]":
Expand Down Expand Up @@ -457,14 +730,28 @@ def set_span_data_for_request(
if config and hasattr(config, "system_instruction"):
system_instruction = config.system_instruction
if system_instruction:
system_text = extract_contents_text(system_instruction)
if system_text:
messages.append({"role": "system", "content": system_text})

# Add user message
contents_text = extract_contents_text(contents)
if contents_text:
messages.append({"role": "user", "content": contents_text})
system_messages = extract_contents_messages(system_instruction)
# System instruction should be a single system message
# Extract text from all messages and combine into one system message
system_texts = []
for msg in system_messages:
content = msg.get("content")
if isinstance(content, list):
# Extract text from content parts
for part in content:
if isinstance(part, dict) and part.get("type") == "text":
system_texts.append(part.get("text", ""))
elif isinstance(content, str):
system_texts.append(content)

if system_texts:
messages.append(
{"role": "system", "content": " ".join(system_texts)}
)

# Extract messages from contents
contents_messages = extract_contents_messages(contents)
messages.extend(contents_messages)

if messages:
normalized_messages = normalize_message_roles(messages)
Expand Down
Loading
Loading