Arize-ai · nate-mar · Apr 1, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
@@ -54,6 +54,20 @@
 
 _OPENINF_TOOL_LIST_KEY = "llm.tools"
 
+# GenAI semconv attribute keys (v0.55.0+ default format)
+_GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages"
+_GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages"
+_GEN_AI_TOOL_DEFINITIONS = "gen_ai.tool.definitions"
+
+_VALID_LLM_PROVIDERS = frozenset(v.value for v in sc.OpenInferenceLLMProviderValues)
+_VALID_LLM_SYSTEMS = frozenset(v.value for v in sc.OpenInferenceLLMSystemValues)
+
+_TOOL_KEY_CANDIDATES = [
+    SpanAttributes.LLM_REQUEST_FUNCTIONS,
+    "llm.request.tools",
+    _GEN_AI_TOOL_DEFINITIONS,
+]
+
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -107,7 +121,7 @@ def _map_generic_span(attrs: Dict[str, Any]) -> Dict[str, Any]:
     return mapped
 
 
-def _collect_oi_messages(
+def _parse_messages_from_attributes(
     attrs: Dict[str, Any], prefix: str
 ) -> tuple[List[oi.Message], List[Optional[str]]]:
     """
@@ -170,6 +184,71 @@ def _collect_oi_messages(
     return messages, finish_reasons
 
 
+def _parse_messages_from_json(
+    raw_json: str,
+) -> tuple[List[oi.Message], List[Optional[str]]]:
+    """
+    Parse the updated ``gen_ai.input.messages`` / ``gen_ai.output.messages``
+    JSON-string attribute (OTel GenAI semconv 0.5.1+).
+
+    Each message is ``{"role": "...", "parts": [{"type": "text", "content": "..."},
+    {"type": "tool_call", ...}], "finish_reason": "..."}``
+    """
+    try:
+        items = json.loads(raw_json) if isinstance(raw_json, str) else raw_json
+    except Exception:
+        return [], []
+    if not isinstance(items, list):
+        items = [items]
+
+    messages: List[oi.Message] = []
+    finish_reasons: List[Optional[str]] = []
+
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        role = item.get("role", "user")
+        msg = oi.Message(role=role)
+
+        parts = item.get("parts") or []
+        text_parts: List[str] = []
+        tool_calls: List[oi.ToolCall] = []
+
+        for part in parts:
+            if not isinstance(part, dict):
+                continue
+            ptype = part.get("type", "")
+            if ptype == "text":
+                text_parts.append(part.get("content", ""))
+            elif ptype == "tool_call":
+                tc = oi.ToolCall(
+                    function=oi.ToolCallFunction(
+                        name=part.get("name", ""),
+                        arguments=part.get("arguments", ""),
+                    )
+                )
+                if part.get("id"):
+                    tc["id"] = part["id"]
+                tool_calls.append(tc)
+            elif ptype == "tool_call_response":
+                # tool role messages carry the response as content
+                text_parts.append(str(part.get("response", "")))
+
+        # If no parts array, fall back to top-level content
+        if not parts and "content" in item:
+            text_parts.append(str(item["content"]))
+
+        if text_parts:
+            msg["content"] = "\n".join(text_parts) if len(text_parts) > 1 else text_parts[0]
+        if tool_calls:
+            msg["tool_calls"] = tool_calls
+
+        messages.append(msg)
+        finish_reasons.append(item.get("finish_reason"))
+
+    return messages, finish_reasons
+
+
 def _handle_tool_list(raw: Any, dst: Dict[str, Any]) -> List[oi.Tool]:
     """
     Convert OpenLLMetry functions/tools list into OpenInference tools list
@@ -205,11 +284,13 @@ def _extract_llm_provider_and_system(
     provider_val: Optional[str] = str(
         attrs.get(GenAIAttributes.GEN_AI_PROVIDER_NAME, "unknown")
     ).lower()
-    if provider_val not in {v.value for v in sc.OpenInferenceLLMProviderValues}:
+    if provider_val not in _VALID_LLM_PROVIDERS:
         provider_val = None
 
+    # gen_ai.system is deprecated (OTel semconv v1.37.0); v0.55.0+ only emits
+    # gen_ai.provider.name, so system_val will be None for newer spans.
     system_val: Optional[str] = str(attrs.get(GenAIAttributes.GEN_AI_SYSTEM, "unknown")).lower()
-    if system_val not in {v.value for v in sc.OpenInferenceLLMSystemValues}:
+    if system_val not in _VALID_LLM_SYSTEMS:
         system_val = None
 
     return provider_val, system_val
@@ -230,13 +311,30 @@ def on_end(self, span: Any) -> None:
             attrs.update(generic)
             return
 
-        # Skip if no LLM prompt data
-        if not any(k.startswith("gen_ai.prompt.") for k in attrs):
+        # Detect which message format is present.
+        # The JSON-based format (v0.55.0+) is the default; the legacy
+        # attribute-per-field format is kept only as a fallback.
+        has_json_messages = _GEN_AI_INPUT_MESSAGES in attrs
+        has_legacy_attributes = any(k.startswith("gen_ai.prompt.") for k in attrs)
+
+        # Skip if no LLM prompt data in either format
+        if not has_json_messages and not has_legacy_attributes:
             return
 
-        # Reconstruct messages
-        inputs, input_finish_reasons = _collect_oi_messages(attrs, "gen_ai.prompt.")
-        outputs, output_finish_reasons = _collect_oi_messages(attrs, "gen_ai.completion.")
+        # Reconstruct messages, preferring the current format
+        if has_json_messages:
+            inputs, input_finish_reasons = _parse_messages_from_json(
+                attrs.get(_GEN_AI_INPUT_MESSAGES, "[]")
+            )
+            outputs, output_finish_reasons = _parse_messages_from_json(
+                attrs.get(_GEN_AI_OUTPUT_MESSAGES, "[]")
+            )
+        else:
+            # Fallback for older OpenLLMetry versions (< 0.55.0)
+            inputs, input_finish_reasons = _parse_messages_from_attributes(attrs, "gen_ai.prompt.")
+            outputs, output_finish_reasons = _parse_messages_from_attributes(
+                attrs, "gen_ai.completion."
+            )
 
         # Token usage
         prompt_toks = _safe_int(attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)) or 0
@@ -260,11 +358,7 @@ def on_end(self, span: Any) -> None:
         if GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs:
             invocation_params.setdefault("model", attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL])
         # Tools
-        tool_key = (
-            SpanAttributes.LLM_REQUEST_FUNCTIONS
-            if SpanAttributes.LLM_REQUEST_FUNCTIONS in attrs
-            else ("llm.request.tools" if "llm.request.tools" in attrs else None)
-        )
+        tool_key = next((k for k in _TOOL_KEY_CANDIDATES if k in attrs), None)
         oi_tools: List[oi.Tool] = []
         if tool_key:
             oi_tools = _handle_tool_list(attrs[tool_key], attrs)

@@ -1,3 +1,4 @@
+import json
 from typing import Any, Dict, Mapping, Optional, cast
 
 import openai
@@ -15,6 +16,7 @@
 from openinference.instrumentation.openllmetry import OpenInferenceSpanProcessor
 from openinference.instrumentation.openllmetry._span_processor import (
     _extract_llm_provider_and_system,
+    _parse_messages_from_json,
 )
 from openinference.semconv.trace import (
     OpenInferenceLLMProviderValues,
@@ -34,7 +36,7 @@ def is_openinference_span(span: ReadableSpan) -> bool:
 
 class TestOpenLLMetryInstrumentor:
     @pytest.mark.vcr
-    def test_openllmetry_instrumentor(
+    def test_span_processor_with_legacy_message_format(
         self,
         openai_api_key: str,
     ) -> None:
@@ -92,7 +94,11 @@ def test_openllmetry_instrumentor(
 
         # LLM identity
         assert attributes[SpanAttributes.LLM_MODEL_NAME] == "gpt-4.1"
-        assert attributes[SpanAttributes.LLM_SYSTEM] == OpenInferenceLLMSystemValues.OPENAI.value
+        # gen_ai.system is deprecated; latest OpenLLMetry only emits gen_ai.provider.name
+        if SpanAttributes.LLM_SYSTEM in attributes:
+            assert (
+                attributes[SpanAttributes.LLM_SYSTEM] == OpenInferenceLLMSystemValues.OPENAI.value
+            )
         assert isinstance(attributes[SpanAttributes.LLM_INVOCATION_PARAMETERS], str)
         total_tokens = attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL)
         assert isinstance(total_tokens, (int, float))
@@ -206,3 +212,144 @@ def test_extract_llm_provider_and_system(
 
     assert provider == expected_provider
     assert system == expected_system
+
+
+class TestUpdatedGenAIMessageFormat:
+    """Tests for the updated gen_ai.input/output.messages format (OTel GenAI semconv 0.5.1+)."""
+
+    def test_parse_messages_from_json_simple(self) -> None:
+        raw = json.dumps([{"role": "user", "parts": [{"type": "text", "content": "Hello"}]}])
+        messages, finish_reasons = _parse_messages_from_json(raw)
+        assert len(messages) == 1
+        assert messages[0]["role"] == "user"
+        assert messages[0]["content"] == "Hello"
+        assert finish_reasons == [None]
+
+    def test_parse_messages_from_json_with_tool_calls(self) -> None:
+        raw = json.dumps(
+            [
+                {
+                    "role": "assistant",
+                    "parts": [
+                        {"type": "text", "content": "Let me check."},
+                        {
+                            "type": "tool_call",
+                            "name": "get_weather",
+                            "id": "call_123",
+                            "arguments": {"city": "Paris"},
+                        },
+                    ],
+                    "finish_reason": "tool_calls",
+                }
+            ]
+        )
+        messages, finish_reasons = _parse_messages_from_json(raw)
+        assert len(messages) == 1
+        assert messages[0]["content"] == "Let me check."
+        assert len(messages[0]["tool_calls"]) == 1
+        assert messages[0]["tool_calls"][0]["function"]["name"] == "get_weather"
+        assert messages[0]["tool_calls"][0]["id"] == "call_123"
+        assert finish_reasons == ["tool_calls"]
+
+    def test_span_processor_with_invalid_json_messages(self) -> None:
+        """Verify on_end handles malformed JSON message attributes without crashing."""
+        in_memory_span_exporter = InMemorySpanExporter()
+        tracer_provider = TracerProvider()
+        tracer_provider.add_span_processor(OpenInferenceSpanProcessor())
+        tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
+
+        tracer = tracer_provider.get_tracer(__name__)
+
+        with tracer.start_as_current_span("openai.chat") as span:
+            span.set_attribute("gen_ai.input.messages", "not valid json{{{")
+            span.set_attribute("gen_ai.output.messages", "also broken")
+            span.set_attribute("gen_ai.request.model", "gpt-4.1")
+            span.set_attribute("gen_ai.usage.input_tokens", 10)
+            span.set_attribute("gen_ai.usage.output_tokens", 5)
+            span.set_attribute("gen_ai.provider.name", "openai")
+
+        spans = in_memory_span_exporter.get_finished_spans()
+        assert len(spans) == 1
+        attributes = dict(cast(Mapping[str, AttributeValue], spans[0].attributes))
+        assert (
+            attributes[SpanAttributes.OPENINFERENCE_SPAN_KIND]
+            == OpenInferenceSpanKindValues.LLM.value
+        )
+        assert attributes[SpanAttributes.LLM_MODEL_NAME] == "gpt-4.1"
+        assert attributes[SpanAttributes.LLM_TOKEN_COUNT_PROMPT] == 10
+        assert attributes[SpanAttributes.LLM_TOKEN_COUNT_COMPLETION] == 5
+        assert attributes[SpanAttributes.LLM_TOKEN_COUNT_TOTAL] == 15
+        assert (
+            attributes[SpanAttributes.LLM_PROVIDER] == OpenInferenceLLMProviderValues.OPENAI.value
+        )
+
+    def test_span_processor_with_json_message_format(self) -> None:
+        """Verify on_end sets OI attributes when spans use the updated message format."""
+        in_memory_span_exporter = InMemorySpanExporter()
+        tracer_provider = TracerProvider()
+        tracer_provider.add_span_processor(OpenInferenceSpanProcessor())
+        tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
+
+        tracer = tracer_provider.get_tracer(__name__)
+
+        input_msgs = json.dumps(
+            [
+                {"role": "user", "parts": [{"type": "text", "content": "What is 2+2?"}]},
+                {
+                    "role": "tool",
+                    "parts": [
+                        {
+                            "type": "tool_call_response",
+                            "id": "call_123",
+                            "response": {"result": 4},
+                        }
+                    ],
+                },
+            ]
+        )
+        output_msgs = json.dumps(
+            [
+                {
+                    "role": "assistant",
+                    "parts": [{"type": "text", "content": "4"}],
+                    "finish_reason": "stop",
+                }
+            ]
+        )
+
+        with tracer.start_as_current_span("openai.chat") as span:
+            span.set_attribute("gen_ai.input.messages", input_msgs)
+            span.set_attribute("gen_ai.output.messages", output_msgs)
+            span.set_attribute("gen_ai.request.model", "gpt-4.1")
+            span.set_attribute("gen_ai.response.model", "gpt-4.1-2026-04-14")
+            span.set_attribute("gen_ai.usage.input_tokens", 10)
+            span.set_attribute("gen_ai.usage.output_tokens", 5)
+            span.set_attribute("gen_ai.provider.name", "openai")
+
+        spans = in_memory_span_exporter.get_finished_spans()
+        assert len(spans) == 1
+
+        attributes = dict(cast(Mapping[str, AttributeValue], spans[0].attributes))
+
+        assert (
+            attributes[SpanAttributes.OPENINFERENCE_SPAN_KIND]
+            == OpenInferenceSpanKindValues.LLM.value
+        )
+        assert attributes[SpanAttributes.LLM_MODEL_NAME] == "gpt-4.1"
+        assert isinstance(attributes[SpanAttributes.INPUT_VALUE], str)
+        assert isinstance(attributes[SpanAttributes.OUTPUT_VALUE], str)
+        assert attributes["llm.input_messages.0.message.role"] == "user"
+        assert attributes["llm.input_messages.0.message.content"] == "What is 2+2?"
+        assert attributes["llm.input_messages.1.message.role"] == "tool"
+        assert attributes["llm.input_messages.1.message.content"] == "{'result': 4}"
+        assert attributes["llm.output_messages.0.message.role"] == "assistant"
+        assert attributes["llm.output_messages.0.message.content"] == "4"
+        assert attributes[SpanAttributes.LLM_TOKEN_COUNT_PROMPT] == 10
+        assert attributes[SpanAttributes.LLM_TOKEN_COUNT_COMPLETION] == 5
+        assert attributes[SpanAttributes.LLM_TOKEN_COUNT_TOTAL] == 15
+        # gen_ai.system is deprecated; v0.55.0+ only emits gen_ai.provider.name,
+        # so LLM_SYSTEM is not set for new-format spans.
+        assert SpanAttributes.LLM_SYSTEM not in attributes
+        assert (
+            attributes[SpanAttributes.LLM_PROVIDER] == OpenInferenceLLMProviderValues.OPENAI.value
+        )