Skip to content

Commit 84468d2

Browse files
author
Vamil Gandhi
committed
fix: correctly label tool result messages in OpenTelemetry events
- Label messages containing tool results as 'gen_ai.tool.message' regardless of role - Add OpenTelemetry semantic conventions v1.36.0 reference - Note that GenAI namespace is experimental - Add documentation link to semantic conventions - Convert tests to data-driven parametrized format for better maintainability - Improve test coverage with comprehensive edge cases including mixed content and multiple tool results
1 parent 8122453 commit 84468d2

File tree

2 files changed

+228
-3
lines changed

2 files changed

+228
-3
lines changed

src/strands/telemetry/tracer.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,30 @@ def _add_event(self, span: Optional[Span], event_name: str, event_attributes: Di
207207

208208
span.add_event(event_name, attributes=event_attributes)
209209

210+
def _get_event_name_for_message(self, message: Message) -> str:
211+
"""Determine the appropriate OpenTelemetry event name for a message.
212+
213+
According to OpenTelemetry semantic conventions v1.36.0, messages containing tool results
214+
should be labeled as 'gen_ai.tool.message' regardless of their role field.
215+
This ensures proper categorization of tool responses in traces.
216+
217+
Note: The GenAI namespace is experimental and may change in future versions.
218+
219+
Reference: https://github.com/open-telemetry/semantic-conventions/blob/v1.36.0/docs/gen-ai/gen-ai-events.md#event-gen_aitoolmessage
220+
221+
Args:
222+
message: The message to determine the event name for
223+
224+
Returns:
225+
The OpenTelemetry event name (e.g., 'gen_ai.user.message', 'gen_ai.tool.message')
226+
"""
227+
# Check if the message contains a tool result
228+
for content_block in message.get("content", []):
229+
if "toolResult" in content_block:
230+
return "gen_ai.tool.message"
231+
232+
return f"gen_ai.{message['role']}.message"
233+
210234
def start_model_invoke_span(
211235
self,
212236
messages: Messages,
@@ -240,7 +264,7 @@ def start_model_invoke_span(
240264
for message in messages:
241265
self._add_event(
242266
span,
243-
f"gen_ai.{message['role']}.message",
267+
self._get_event_name_for_message(message),
244268
{"content": serialize(message["content"])},
245269
)
246270
return span
@@ -379,7 +403,7 @@ def start_event_loop_cycle_span(
379403
for message in messages or []:
380404
self._add_event(
381405
span,
382-
f"gen_ai.{message['role']}.message",
406+
self._get_event_name_for_message(message),
383407
{"content": serialize(message["content"])},
384408
)
385409

@@ -456,7 +480,7 @@ def start_agent_span(
456480
for message in messages:
457481
self._add_event(
458482
span,
459-
f"gen_ai.{message['role']}.message",
483+
self._get_event_name_for_message(message),
460484
{"content": serialize(message["content"])},
461485
)
462486

tests/strands/telemetry/test_tracer.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,3 +746,204 @@ def test_serialize_vs_json_dumps():
746746
custom_result = serialize({"text": japanese_text})
747747
assert japanese_text in custom_result
748748
assert "\\u" not in custom_result
749+
750+
751+
@pytest.mark.parametrize(
752+
"message, expected_event_name, description",
753+
[
754+
# Regular role-based messages
755+
(
756+
{"role": "user", "content": [{"text": "Hello"}]},
757+
"gen_ai.user.message",
758+
"regular user message",
759+
),
760+
(
761+
{"role": "assistant", "content": [{"text": "Hello"}]},
762+
"gen_ai.assistant.message",
763+
"regular assistant message",
764+
),
765+
(
766+
{"role": "system", "content": [{"text": "You are a helpful assistant"}]},
767+
"gen_ai.system.message",
768+
"regular system message",
769+
),
770+
# Messages with tool results should always be labeled as tool messages
771+
(
772+
{
773+
"role": "user",
774+
"content": [
775+
{
776+
"toolResult": {
777+
"toolUseId": "123",
778+
"status": "success",
779+
"content": [{"text": "Tool response"}],
780+
}
781+
}
782+
],
783+
},
784+
"gen_ai.tool.message",
785+
"user message containing tool result",
786+
),
787+
(
788+
{
789+
"role": "assistant",
790+
"content": [
791+
{
792+
"toolResult": {
793+
"toolUseId": "123",
794+
"status": "success",
795+
"content": [{"text": "Tool response"}],
796+
}
797+
}
798+
],
799+
},
800+
"gen_ai.tool.message",
801+
"assistant message containing tool result",
802+
),
803+
# Mixed content with tool results
804+
(
805+
{
806+
"role": "user",
807+
"content": [
808+
{"text": "Here are the results:"},
809+
{
810+
"toolResult": {
811+
"toolUseId": "123",
812+
"status": "success",
813+
"content": [{"text": "Tool response"}],
814+
}
815+
},
816+
],
817+
},
818+
"gen_ai.tool.message",
819+
"message with both text and tool result",
820+
),
821+
# Multiple tool results
822+
(
823+
{
824+
"role": "user",
825+
"content": [
826+
{
827+
"toolResult": {
828+
"toolUseId": "123",
829+
"status": "success",
830+
"content": [{"text": "First tool"}],
831+
}
832+
},
833+
{
834+
"toolResult": {
835+
"toolUseId": "456",
836+
"status": "success",
837+
"content": [{"text": "Second tool"}],
838+
}
839+
},
840+
],
841+
},
842+
"gen_ai.tool.message",
843+
"message with multiple tool results",
844+
),
845+
# Edge cases
846+
(
847+
{"role": "user", "content": []},
848+
"gen_ai.user.message",
849+
"message with empty content",
850+
),
851+
(
852+
{"role": "assistant"},
853+
"gen_ai.assistant.message",
854+
"message with no content key",
855+
),
856+
],
857+
)
858+
def test_get_event_name_for_message(message, expected_event_name, description):
859+
"""Test getting event name for various message types using data-driven approach."""
860+
tracer = Tracer()
861+
862+
event_name = tracer._get_event_name_for_message(message)
863+
864+
assert event_name == expected_event_name, f"Failed for {description}"
865+
866+
867+
def test_start_model_invoke_span_with_tool_result_message(mock_tracer):
868+
"""Test that start_model_invoke_span correctly labels tool result messages."""
869+
with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer):
870+
tracer = Tracer()
871+
tracer.tracer = mock_tracer
872+
873+
mock_span = mock.MagicMock()
874+
mock_tracer.start_span.return_value = mock_span
875+
876+
# Message that contains a tool result
877+
messages = [
878+
{
879+
"role": "user",
880+
"content": [
881+
{"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}}
882+
],
883+
}
884+
]
885+
886+
span = tracer.start_model_invoke_span(messages=messages, model_id="test-model")
887+
888+
# Should use gen_ai.tool.message event name instead of gen_ai.user.message
889+
mock_span.add_event.assert_called_with(
890+
"gen_ai.tool.message", attributes={"content": json.dumps(messages[0]["content"])}
891+
)
892+
assert span is not None
893+
894+
895+
def test_start_agent_span_with_tool_result_message(mock_tracer):
896+
"""Test that start_agent_span correctly labels tool result messages."""
897+
with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer):
898+
tracer = Tracer()
899+
tracer.tracer = mock_tracer
900+
901+
mock_span = mock.MagicMock()
902+
mock_tracer.start_span.return_value = mock_span
903+
904+
# Message that contains a tool result
905+
messages = [
906+
{
907+
"role": "user",
908+
"content": [
909+
{"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}}
910+
],
911+
}
912+
]
913+
914+
span = tracer.start_agent_span(messages=messages, agent_name="WeatherAgent", model_id="test-model")
915+
916+
# Should use gen_ai.tool.message event name instead of gen_ai.user.message
917+
mock_span.add_event.assert_called_with(
918+
"gen_ai.tool.message", attributes={"content": json.dumps(messages[0]["content"])}
919+
)
920+
assert span is not None
921+
922+
923+
def test_start_event_loop_cycle_span_with_tool_result_message(mock_tracer):
924+
"""Test that start_event_loop_cycle_span correctly labels tool result messages."""
925+
with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer):
926+
tracer = Tracer()
927+
tracer.tracer = mock_tracer
928+
929+
mock_span = mock.MagicMock()
930+
mock_tracer.start_span.return_value = mock_span
931+
932+
# Message that contains a tool result
933+
messages = [
934+
{
935+
"role": "user",
936+
"content": [
937+
{"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}}
938+
],
939+
}
940+
]
941+
942+
event_loop_kwargs = {"event_loop_cycle_id": "cycle-123"}
943+
span = tracer.start_event_loop_cycle_span(event_loop_kwargs, messages=messages)
944+
945+
# Should use gen_ai.tool.message event name instead of gen_ai.user.message
946+
mock_span.add_event.assert_called_with(
947+
"gen_ai.tool.message", attributes={"content": json.dumps(messages[0]["content"])}
948+
)
949+
assert span is not None

0 commit comments

Comments
 (0)