traceloop · max-deygin-traceloop · Apr 30, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/...metry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py b/...metry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py
diff --git a/...mentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py b/...mentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py
@@ -4,6 +4,7 @@
 so we need to patch the RealtimeSession class directly to add OpenTelemetry tracing.
 """
 
+import json
 import logging
 import time
 from typing import Dict, Any, Optional, List, Tuple
@@ -119,7 +120,7 @@ def start_workflow_span(self, agent_name: str):
             kind=SpanKind.CLIENT,
             attributes={
                 SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.WORKFLOW.value,
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
                 SpanAttributes.TRACELOOP_WORKFLOW_NAME: "Realtime Session",
             },
         )
@@ -175,7 +176,7 @@ def start_agent_span(self, agent_name: str):
             attributes={
                 SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.AGENT.value,
                 GenAIAttributes.GEN_AI_AGENT_NAME: agent_name,
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
             },
         )
         self.agent_spans[agent_name] = span
@@ -202,7 +203,7 @@ def start_tool_span(self, tool_name: str, agent_name: Optional[str] = None):
                 SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.TOOL.value,
                 GenAIAttributes.GEN_AI_TOOL_NAME: tool_name,
                 GenAIAttributes.GEN_AI_TOOL_TYPE: "function",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
             },
         )
         self.tool_spans[tool_name] = span
@@ -239,7 +240,7 @@ def create_handoff_span(self, from_agent: str, to_agent: str):
             context=parent_context,
             attributes={
                 SpanAttributes.TRACELOOP_SPAN_KIND: "handoff",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
                 GEN_AI_HANDOFF_FROM_AGENT: from_agent,
                 GEN_AI_HANDOFF_TO_AGENT: to_agent,
             },
@@ -258,8 +259,8 @@ def start_audio_span(self, item_id: str, content_index: int):
             kind=SpanKind.CLIENT,
             context=parent_context,
             attributes={
-                SpanAttributes.LLM_REQUEST_TYPE: "realtime",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
+                GenAIAttributes.GEN_AI_OPERATION_NAME: "realtime",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
             },
         )
         self.audio_spans[span_key] = span
@@ -351,9 +352,8 @@ def create_llm_span(self, completion_content: str):
             context=parent_context,
             start_time=start_time,
             attributes={
-                SpanAttributes.LLM_REQUEST_TYPE: "realtime",
-                SpanAttributes.LLM_SYSTEM: "openai",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
+                GenAIAttributes.GEN_AI_OPERATION_NAME: "realtime",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
                 GenAIAttributes.GEN_AI_REQUEST_MODEL: model_name_str,
             },
         )
@@ -373,21 +373,23 @@ def create_llm_span(self, completion_content: str):
 
         if should_send_prompts():
             if prompt_content:
+                input_msg = {
+                    "role": prompt_role or "user",
+                    "parts": [{"type": "text", "content": prompt_content}],
+                }
                 span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", prompt_role or "user"
-                )
-                span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", prompt_content
+                    GenAIAttributes.GEN_AI_INPUT_MESSAGES,
+                    json.dumps([input_msg]),
                 )
 
+            out_msg = {
+                "role": "assistant",
+                "parts": [{"type": "text", "content": completion_content}],
+                "finish_reason": None,
+            }
             span.set_attribute(
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", "assistant"
-            )
-            span.set_attribute(
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", completion_content
-            )
-            span.set_attribute(
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.0.finish_reason", "stop"
+                GenAIAttributes.GEN_AI_OUTPUT_MESSAGES,
+                json.dumps([out_msg]),
             )
 
         span.set_status(Status(StatusCode.OK))

diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py
@@ -1,3 +1,4 @@
+import json
 import pytest
 from unittest.mock import MagicMock
 from opentelemetry.instrumentation.openai_agents import (
@@ -49,11 +50,11 @@ def test_dict_content_serialization(exporter):
 
     spans = exporter.get_finished_spans()
 
-    # Look for any spans with prompt/content attributes
+    # Look for any spans with message content attributes
     for span in spans:
         for attr_name, attr_value in span.attributes.items():
-            prompt_content_check = ("prompt" in attr_name and "content" in attr_name) or (
-                "gen_ai.prompt" in attr_name and "content" in attr_name
+            prompt_content_check = (
+                attr_name in ("gen_ai.input.messages", "gen_ai.output.messages")
             )
             if prompt_content_check:
                 # All content attributes should be strings, not dicts
@@ -90,43 +91,47 @@ def test_agent_spans(exporter, test_agent):
     assert agent_span.kind == agent_span.kind.CLIENT
     assert agent_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value
     assert agent_span.attributes[GenAIAttributes.GEN_AI_AGENT_NAME] == "testAgent"
-    assert agent_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "openai_agents"
+    assert agent_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
     assert agent_span.status.status_code == StatusCode.OK
 
     # Agent span should NOT contain LLM parameters
-    assert SpanAttributes.LLM_REQUEST_TEMPERATURE not in agent_span.attributes
-    assert SpanAttributes.LLM_REQUEST_MAX_TOKENS not in agent_span.attributes
-    assert SpanAttributes.LLM_REQUEST_TOP_P not in agent_span.attributes
-    assert "openai.agent.model.frequency_penalty" not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_TOP_P not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY not in agent_span.attributes
 
     # Find the response span (openai.response) - this should contain prompts/completions/usage
     response_spans = [s for s in spans if s.name == "openai.response"]
     assert len(response_spans) >= 1, f"Expected at least 1 openai.response span, got {len(response_spans)}"
     response_span = response_spans[0]
 
-    # Test response span attributes (should contain prompts/completions/usage)
-
     # Test proper semantic conventions
-    assert response_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "response"
-    assert response_span.attributes["gen_ai.operation.name"] == "response"
-    assert response_span.attributes["gen_ai.system"] == "openai"
+    assert response_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat"
+    assert response_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
 
-    # Test prompts using OpenAI semantic conventions
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user"
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == "What is AI?"
+    # Test input messages (JSON array with parts-based schema)
+    input_messages = json.loads(response_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES])
+    assert input_messages[0]["role"] == "user"
+    assert "parts" in input_messages[0], "Input messages must use parts-based schema"
+    assert input_messages[0]["parts"][0]["type"] == "text"
+    assert input_messages[0]["parts"][0]["content"] == "What is AI?"
 
     # Test usage tokens
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] is not None
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] is not None
-    assert response_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] is not None
+    assert response_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] is not None
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] > 0
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] > 0
-    assert response_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] > 0
+    assert response_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] > 0
 
-    # Test completions using OpenAI semantic conventions
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] is not None
-    assert len(response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"]) > 0
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role"] is not None
+    # Test output messages (JSON array with parts-based schema)
+    output_messages = json.loads(response_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES])
+    assert "parts" in output_messages[0], "Output messages must use parts-based schema"
+    assert output_messages[0]["parts"][0]["type"] == "text"
+    assert output_messages[0]["parts"][0]["content"] is not None
+    assert len(output_messages[0]["parts"][0]["content"]) > 0
+    assert output_messages[0]["role"] is not None
+    assert "finish_reason" in output_messages[0], "Output messages must have finish_reason"
 
     # Test model settings are in the response span
     assert response_span.attributes["gen_ai.request.temperature"] == 0.3
@@ -444,60 +449,53 @@ async def get_city_info(city_name: str) -> str:
     second_response_span = response_spans[1]
 
     # The tool call and result appear in the SECOND response span as part of conversation history
-    # Find the assistant message with tool call
+    # Parse the input messages JSON array (parts-based schema)
+    input_messages = json.loads(
+        second_response_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]
+    )
+
     tool_call_found = False
     tool_result_found = False
 
-    for i in range(20):  # Check conversation history
-        role_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.role"
-        if role_key not in second_response_span.attributes:
-            continue
-
-        role = second_response_span.attributes[role_key]
+    for msg in input_messages:
+        role = msg.get("role")
+        parts = msg.get("parts", [])
 
         if role == "assistant" and not tool_call_found:
-            # Check if this assistant message has tool_calls
-            tool_call_name_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.name"
-            if tool_call_name_key in second_response_span.attributes:
-                tool_call_found = True
-                # Verify tool call attributes
-                assert second_response_span.attributes[tool_call_name_key] == "get_city_info", (
-                    f"Expected tool name 'get_city_info', got '{second_response_span.attributes[tool_call_name_key]}'"
-                )
-                # Verify tool call ID exists
-                tool_call_id_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.id"
-                assert tool_call_id_key in second_response_span.attributes, (
-                    f"Tool call ID not found at {tool_call_id_key}"
-                )
-                tool_call_id = second_response_span.attributes[tool_call_id_key]
-                assert len(tool_call_id) > 0, "Tool call ID should not be empty"
-
-                # Verify arguments exist and contain city name
-                tool_call_args_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.arguments"
-                assert tool_call_args_key in second_response_span.attributes, (
-                    f"Tool call arguments not found at {tool_call_args_key}"
-                )
-                arguments = second_response_span.attributes[tool_call_args_key]
-                assert "London" in arguments or "london" in arguments.lower(), (
-                    f"Expected 'London' in arguments, got: {arguments}"
-                )
+            # Look for tool_call parts
+            for part in parts:
+                if part.get("type") == "tool_call":
+                    tool_call_found = True
+                    assert part["name"] == "get_city_info", (
+                        f"Expected tool name 'get_city_info', got '{part['name']}'"
+                    )
+                    tool_call_id = part.get("id", "")
+                    assert len(tool_call_id) > 0, "Tool call ID should not be empty"
+                    arguments = part.get("arguments", "")
+                    if isinstance(arguments, dict):
+                        arguments = json.dumps(arguments)
+                    assert "London" in arguments or "london" in arguments.lower(), (
+                        f"Expected 'London' in arguments, got: {arguments}"
+                    )
+                    break
 
         elif role == "tool" and not tool_result_found:
-            tool_result_found = True
-            # Verify tool result attributes
-            content_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.content"
-            tool_call_id_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_call_id"
-
-            assert content_key in second_response_span.attributes, f"Tool result content not found at {content_key}"
-            content = second_response_span.attributes[content_key]
-            assert len(content) > 0, "Tool result content should not be empty"
-            assert "London" in content or "9000000" in content or "United Kingdom" in content, (
-                f"Expected tool result to contain city info, got: {content}"
-            )
-
-            assert tool_call_id_key in second_response_span.attributes, f"Tool call ID not found at {tool_call_id_key}"
-            tool_call_id = second_response_span.attributes[tool_call_id_key]
-            assert len(tool_call_id) > 0, "Tool call ID should not be empty"
-
-    assert tool_call_found, "No assistant message with tool_calls found in second response span"
-    assert tool_result_found, "No tool message found in second response span"
+            # Look for tool_call_response parts
+            for part in parts:
+                if part.get("type") == "tool_call_response":
+                    tool_result_found = True
+                    response_text = part.get("response", "")
+                    assert len(response_text) > 0, "Tool result response should not be empty"
+                    assert (
+                        "London" in response_text
+                        or "9000000" in response_text
+                        or "United Kingdom" in response_text
+                    ), (
+                        f"Expected tool result to contain city info, got: {response_text}"
+                    )
+                    tool_call_id = part.get("id", "")
+                    assert len(tool_call_id) > 0, "Tool call ID should not be empty"
+                    break
+
+    assert tool_call_found, "No assistant message with tool_call parts found in second response span"
+    assert tool_result_found, "No tool message with tool_call_response parts found in second response span"
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py
@@ -11,7 +11,6 @@
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry.trace import StatusCode
-from opentelemetry.semconv_ai import SpanAttributes
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAIAttributes,
 )
@@ -117,8 +116,7 @@ def test_speech_span_start_creates_otel_span(self, tracer_provider_and_exporter)
         assert "openai.realtime.speech" in span_names
 
         speech_span = next(s for s in spans if s.name == "openai.realtime.speech")
-        assert speech_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime"
-        assert speech_span.attributes["gen_ai.system"] == "openai"
+        assert speech_span.attributes["gen_ai.provider.name"] == "openai"
         assert speech_span.attributes["gen_ai.operation.name"] == "speech"
         assert speech_span.status.status_code == StatusCode.OK
 
@@ -213,8 +211,7 @@ def test_transcription_span_start_creates_otel_span(self, tracer_provider_and_ex
         assert "openai.realtime.transcription" in span_names
 
         transcription_span = next(s for s in spans if s.name == "openai.realtime.transcription")
-        assert transcription_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime"
-        assert transcription_span.attributes["gen_ai.system"] == "openai"
+        assert transcription_span.attributes["gen_ai.provider.name"] == "openai"
         assert transcription_span.attributes["gen_ai.operation.name"] == "transcription"
 
     def test_transcription_span_captures_model_and_format(self, tracer_provider_and_exporter):
@@ -306,8 +303,7 @@ def test_speech_group_span_creates_otel_span(self, tracer_provider_and_exporter)
         assert "openai.realtime.speech_group" in span_names
 
         speech_group_span = next(s for s in spans if s.name == "openai.realtime.speech_group")
-        assert speech_group_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime"
-        assert speech_group_span.attributes["gen_ai.system"] == "openai"
+        assert speech_group_span.attributes["gen_ai.provider.name"] == "openai"
         assert speech_group_span.attributes["gen_ai.operation.name"] == "speech_group"
         assert speech_group_span.status.status_code == StatusCode.OK