Skip to content
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1f32952
fix(ai): redact message parts content of type blob
constantinius Dec 17, 2025
795bcea
fix(ai): skip non dict messages
constantinius Dec 17, 2025
a623e13
fix(ai): typing
constantinius Dec 17, 2025
3d3ce5b
fix(ai): content items may not be dicts
constantinius Dec 17, 2025
ce29e47
fix(integrations): OpenAI input messages are now being converted to t…
constantinius Dec 17, 2025
7074f0b
test(integrations): add test for message conversion
constantinius Dec 17, 2025
e8a1adc
feat(integrations): add transformation functions for OpenAI Agents co…
constantinius Jan 8, 2026
c1a2239
feat(ai): implement parse_data_uri function and integrate it into Ope…
constantinius Jan 8, 2026
bd46a6a
Merge branch 'master' into constantinius/fix/integrations/openai-repo…
constantinius Jan 13, 2026
04b27f4
fix: review comment
constantinius Jan 13, 2026
f8345d0
Merge branch 'master' into constantinius/fix/integrations/openai-repo…
constantinius Jan 14, 2026
b74bdb9
fix(integrations): addressing review comments
constantinius Jan 14, 2026
8080904
fix: review comment
constantinius Jan 15, 2026
05b1a79
fix(integrations): extract text content from OpenAI responses instead…
constantinius Jan 15, 2026
bd78165
feat(ai): Add shared content transformation functions for multimodal …
constantinius Jan 15, 2026
4795c3b
Merge shared content transformation functions
constantinius Jan 15, 2026
df59f49
refactor(openai): Use shared transform_message_content from ai/utils
constantinius Jan 15, 2026
412b93e
refactor(ai): split transform_content_part into SDK-specific functions
constantinius Jan 15, 2026
b99640e
Merge SDK-specific transform functions
constantinius Jan 15, 2026
4fba982
refactor(openai): use transform_openai_content_part directly
constantinius Jan 15, 2026
a2565c1
fix: Delete uv.lock
constantinius Jan 16, 2026
2c030cf
test: skip tests if `chat_completion_message_tool_call` is not available
constantinius Jan 22, 2026
6fb6def
Merge branch 'master' into constantinius/fix/integrations/openai-repo…
constantinius Jan 22, 2026
2f51b6d
Merge branch 'master' into constantinius/fix/integrations/openai-repo…
constantinius Jan 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -38,6 +40,39 @@ class GEN_AI_ALLOWED_MESSAGE_ROLES:
GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role


def parse_data_uri(url):
Comment thread
constantinius marked this conversation as resolved.
Outdated
# type: (str) -> Tuple[str, str]
"""
Parse a data URI and return (mime_type, content).

Data URI format (RFC 2397): data:[<mediatype>][;base64],<data>

Examples:
data:image/jpeg;base64,/9j/4AAQ... → ("image/jpeg", "/9j/4AAQ...")
data:text/plain,Hello → ("text/plain", "Hello")
data:;base64,SGVsbG8= → ("", "SGVsbG8=")

Raises:
ValueError: If the URL is not a valid data URI (missing comma separator)
"""
if "," not in url:
raise ValueError("Invalid data URI: missing comma separator")

header, content = url.split(",", 1)

# Extract mime type from header
# Format: "data:<mime>[;param1][;param2]..." e.g. "data:image/jpeg;base64"
# Remove "data:" prefix, then take everything before the first semicolon
if header.startswith("data:"):
mime_part = header[5:] # Remove "data:" prefix
else:
mime_part = header

mime_type = mime_part.split(";")[0]

return mime_type, content


def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
if hasattr(data, "model_dump"):
Expand Down Expand Up @@ -141,6 +176,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +272,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
79 changes: 78 additions & 1 deletion sentry_sdk/integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry_sdk.ai.utils import (
set_data_normalized,
normalize_message_roles,
parse_data_uri,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import SPANDATA
Expand All @@ -18,7 +19,7 @@
safe_serialize,
)

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict

if TYPE_CHECKING:
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
Expand Down Expand Up @@ -177,6 +178,80 @@ def _calculate_token_usage(
)


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
url = image_url.get("url", "")
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
Comment thread
sentry[bot] marked this conversation as resolved.
Outdated
if url.startswith("data:"):
try:
mime_type, content = parse_data_uri(url)
return {
"type": "blob",
"modality": "image",
"mime_type": mime_type,
"content": content,
}
except ValueError:
# If parsing fails, return as URI
return {
"type": "uri",
"modality": "image",
"uri": url,
}
else:
return {
"type": "uri",
"uri": url,
Comment thread
constantinius marked this conversation as resolved.
Outdated
}
Comment thread
constantinius marked this conversation as resolved.
Outdated
return item
Comment thread
constantinius marked this conversation as resolved.
Outdated

for message in messages:
if not isinstance(message, dict):
continue
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
return messages


def _set_input_data(
span: "Span",
kwargs: "dict[str, Any]",
Expand All @@ -198,6 +273,8 @@ def _set_input_data(
and integration.include_prompts
):
normalized_messages = normalize_message_roles(messages)
normalized_messages = _convert_message_parts(normalized_messages)

scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
if messages_data is not None:
Expand Down
52 changes: 40 additions & 12 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@
get_start_span_function,
set_data_normalized,
normalize_message_roles,
normalize_message_role,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import safe_serialize

from ..consts import SPAN_ORIGIN
from ..utils import _set_agent_data, _set_usage_data
from ..utils import (
_set_agent_data,
_set_usage_data,
_transform_openai_agents_message_content,
)

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -49,17 +54,40 @@ def invoke_agent_span(

original_input = kwargs.get("original_input")
if original_input is not None:
message = (
original_input
if isinstance(original_input, str)
else safe_serialize(original_input)
)
messages.append(
{
"content": [{"text": message, "type": "text"}],
"role": "user",
}
)
if isinstance(original_input, str):
# String input: wrap in text block
messages.append(
{
"content": [{"text": original_input, "type": "text"}],
"role": "user",
}
)
elif isinstance(original_input, list) and len(original_input) > 0:
# Check if list contains message objects (with type="message")
# or content parts (input_text, input_image, etc.)
first_item = original_input[0]
if isinstance(first_item, dict) and first_item.get("type") == "message":
# List of message objects - process each individually
for msg in original_input:
if isinstance(msg, dict) and msg.get("type") == "message":
role = normalize_message_role(msg.get("role", "user"))
content = msg.get("content")
transformed = _transform_openai_agents_message_content(
content
)
if isinstance(transformed, str):
transformed = [{"text": transformed, "type": "text"}]
elif not isinstance(transformed, list):
transformed = [
{"text": str(transformed), "type": "text"}
]
Comment thread
cursor[bot] marked this conversation as resolved.
messages.append({"content": transformed, "role": role})
else:
# List of content parts - transform and wrap as user message
content = _transform_openai_agents_message_content(original_input)
if not isinstance(content, list):
content = [{"text": str(content), "type": "text"}]
messages.append({"content": content, "role": "user"})
Comment thread
constantinius marked this conversation as resolved.
Comment thread
cursor[bot] marked this conversation as resolved.

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
Expand Down
Loading
Loading