diff --git a/python/instrumentation/openinference-instrumentation-openllmetry/src/openinference/instrumentation/openllmetry/_span_processor.py b/python/instrumentation/openinference-instrumentation-openllmetry/src/openinference/instrumentation/openllmetry/_span_processor.py index 3800d8da68..ddb0533580 100644 --- a/python/instrumentation/openinference-instrumentation-openllmetry/src/openinference/instrumentation/openllmetry/_span_processor.py +++ b/python/instrumentation/openinference-instrumentation-openllmetry/src/openinference/instrumentation/openllmetry/_span_processor.py @@ -54,6 +54,20 @@ _OPENINF_TOOL_LIST_KEY = "llm.tools" +# GenAI semconv attribute keys (v0.55.0+ default format) +_GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages" +_GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages" +_GEN_AI_TOOL_DEFINITIONS = "gen_ai.tool.definitions" + +_VALID_LLM_PROVIDERS = frozenset(v.value for v in sc.OpenInferenceLLMProviderValues) +_VALID_LLM_SYSTEMS = frozenset(v.value for v in sc.OpenInferenceLLMSystemValues) + +_TOOL_KEY_CANDIDATES = [ + SpanAttributes.LLM_REQUEST_FUNCTIONS, + "llm.request.tools", + _GEN_AI_TOOL_DEFINITIONS, +] + # --------------------------------------------------------------------------- # Helpers @@ -107,7 +121,7 @@ def _map_generic_span(attrs: Dict[str, Any]) -> Dict[str, Any]: return mapped -def _collect_oi_messages( +def _parse_messages_from_attributes( attrs: Dict[str, Any], prefix: str ) -> tuple[List[oi.Message], List[Optional[str]]]: """ @@ -170,6 +184,71 @@ def _collect_oi_messages( return messages, finish_reasons +def _parse_messages_from_json( + raw_json: str, +) -> tuple[List[oi.Message], List[Optional[str]]]: + """ + Parse the updated ``gen_ai.input.messages`` / ``gen_ai.output.messages`` + JSON-string attribute (OTel GenAI semconv 0.5.1+). + + Each message is ``{"role": "...", "parts": [{"type": "text", "content": "..."}, + {"type": "tool_call", ...}], "finish_reason": "..."}`` + """ + try: + items = json.loads(raw_json) if isinstance(raw_json, str) else raw_json + except Exception: + return [], [] + if not isinstance(items, list): + items = [items] + + messages: List[oi.Message] = [] + finish_reasons: List[Optional[str]] = [] + + for item in items: + if not isinstance(item, dict): + continue + role = item.get("role", "user") + msg = oi.Message(role=role) + + parts = item.get("parts") or [] + text_parts: List[str] = [] + tool_calls: List[oi.ToolCall] = [] + + for part in parts: + if not isinstance(part, dict): + continue + ptype = part.get("type", "") + if ptype == "text": + text_parts.append(part.get("content", "")) + elif ptype == "tool_call": + tc = oi.ToolCall( + function=oi.ToolCallFunction( + name=part.get("name", ""), + arguments=part.get("arguments", ""), + ) + ) + if part.get("id"): + tc["id"] = part["id"] + tool_calls.append(tc) + elif ptype == "tool_call_response": + # tool role messages carry the response as content + text_parts.append(str(part.get("response", ""))) + + # If no parts array, fall back to top-level content + if not parts and "content" in item: + text_parts.append(str(item["content"])) + + if text_parts: + msg["content"] = "\n".join(text_parts) if len(text_parts) > 1 else text_parts[0] + if tool_calls: + msg["tool_calls"] = tool_calls + + messages.append(msg) + finish_reasons.append(item.get("finish_reason")) + + return messages, finish_reasons + + def _handle_tool_list(raw: Any, dst: Dict[str, Any]) -> List[oi.Tool]: """ Convert OpenLLMetry functions/tools list into OpenInference tools list @@ -205,11 +284,13 @@ def _extract_llm_provider_and_system( provider_val: Optional[str] = str( attrs.get(GenAIAttributes.GEN_AI_PROVIDER_NAME, "unknown") ).lower() - if provider_val not in {v.value for v in sc.OpenInferenceLLMProviderValues}: + if provider_val not in _VALID_LLM_PROVIDERS: provider_val = None + # gen_ai.system is deprecated (OTel semconv v1.37.0); v0.55.0+ only emits + # gen_ai.provider.name, so system_val will be None for newer spans. system_val: Optional[str] = str(attrs.get(GenAIAttributes.GEN_AI_SYSTEM, "unknown")).lower() - if system_val not in {v.value for v in sc.OpenInferenceLLMSystemValues}: + if system_val not in _VALID_LLM_SYSTEMS: system_val = None return provider_val, system_val @@ -230,13 +311,30 @@ def on_end(self, span: Any) -> None: attrs.update(generic) return - # Skip if no LLM prompt data - if not any(k.startswith("gen_ai.prompt.") for k in attrs): + # Detect which message format is present. + # The JSON-based format (v0.55.0+) is the default; the legacy + # attribute-per-field format is kept only as a fallback. + has_json_messages = _GEN_AI_INPUT_MESSAGES in attrs + has_legacy_attributes = any(k.startswith("gen_ai.prompt.") for k in attrs) + + # Skip if no LLM prompt data in either format + if not has_json_messages and not has_legacy_attributes: return - # Reconstruct messages - inputs, input_finish_reasons = _collect_oi_messages(attrs, "gen_ai.prompt.") - outputs, output_finish_reasons = _collect_oi_messages(attrs, "gen_ai.completion.") + # Reconstruct messages, preferring the current format + if has_json_messages: + inputs, input_finish_reasons = _parse_messages_from_json( + attrs.get(_GEN_AI_INPUT_MESSAGES, "[]") + ) + outputs, output_finish_reasons = _parse_messages_from_json( + attrs.get(_GEN_AI_OUTPUT_MESSAGES, "[]") + ) + else: + # Fallback for older OpenLLMetry versions (< 0.55.0) + inputs, input_finish_reasons = _parse_messages_from_attributes(attrs, "gen_ai.prompt.") + outputs, output_finish_reasons = _parse_messages_from_attributes( + attrs, "gen_ai.completion." + ) # Token usage prompt_toks = _safe_int(attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)) or 0 @@ -260,11 +358,7 @@ def on_end(self, span: Any) -> None: if GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs: invocation_params.setdefault("model", attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL]) # Tools - tool_key = ( - SpanAttributes.LLM_REQUEST_FUNCTIONS - if SpanAttributes.LLM_REQUEST_FUNCTIONS in attrs - else ("llm.request.tools" if "llm.request.tools" in attrs else None) - ) + tool_key = next((k for k in _TOOL_KEY_CANDIDATES if k in attrs), None) oi_tools: List[oi.Tool] = [] if tool_key: oi_tools = _handle_tool_list(attrs[tool_key], attrs) diff --git a/python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/cassettes/test_instrumentor/TestOpenLLMetryInstrumentor.test_openllmetry_instrumentor.yaml b/python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/cassettes/test_instrumentor/TestOpenLLMetryInstrumentor.test_span_processor_with_legacy_message_format.yaml similarity index 100% rename from python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/cassettes/test_instrumentor/TestOpenLLMetryInstrumentor.test_openllmetry_instrumentor.yaml rename to python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/cassettes/test_instrumentor/TestOpenLLMetryInstrumentor.test_span_processor_with_legacy_message_format.yaml diff --git a/python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/test_instrumentor.py index 9f802a19f3..c0d9a1e5ca 100644 --- a/python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/test_instrumentor.py +++ b/python/instrumentation/openinference-instrumentation-openllmetry/tests/openinference/instrumentation/openllmetry/test_instrumentor.py @@ -1,3 +1,4 @@ +import json from typing import Any, Dict, Mapping, Optional, cast import openai @@ -15,6 +16,7 @@ from openinference.instrumentation.openllmetry import OpenInferenceSpanProcessor from openinference.instrumentation.openllmetry._span_processor import ( _extract_llm_provider_and_system, + _parse_messages_from_json, ) from openinference.semconv.trace import ( OpenInferenceLLMProviderValues, @@ -34,7 +36,7 @@ def is_openinference_span(span: ReadableSpan) -> bool: class TestOpenLLMetryInstrumentor: @pytest.mark.vcr - def test_openllmetry_instrumentor( + def test_span_processor_with_legacy_message_format( self, openai_api_key: str, ) -> None: @@ -92,7 +94,11 @@ def test_openllmetry_instrumentor( # LLM identity assert attributes[SpanAttributes.LLM_MODEL_NAME] == "gpt-4.1" - assert attributes[SpanAttributes.LLM_SYSTEM] == OpenInferenceLLMSystemValues.OPENAI.value + # gen_ai.system is deprecated; latest OpenLLMetry only emits gen_ai.provider.name + if SpanAttributes.LLM_SYSTEM in attributes: + assert ( + attributes[SpanAttributes.LLM_SYSTEM] == OpenInferenceLLMSystemValues.OPENAI.value + ) assert isinstance(attributes[SpanAttributes.LLM_INVOCATION_PARAMETERS], str) total_tokens = attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL) assert isinstance(total_tokens, (int, float)) @@ -206,3 +212,144 @@ def test_extract_llm_provider_and_system( assert provider == expected_provider assert system == expected_system + + +class TestUpdatedGenAIMessageFormat: + """Tests for the updated gen_ai.input/output.messages format (OTel GenAI semconv 0.5.1+).""" + + def test_parse_messages_from_json_simple(self) -> None: + raw = json.dumps([{"role": "user", "parts": [{"type": "text", "content": "Hello"}]}]) + messages, finish_reasons = _parse_messages_from_json(raw) + assert len(messages) == 1 + assert messages[0]["role"] == "user" + assert messages[0]["content"] == "Hello" + assert finish_reasons == [None] + + def test_parse_messages_from_json_with_tool_calls(self) -> None: + raw = json.dumps( + [ + { + "role": "assistant", + "parts": [ + {"type": "text", "content": "Let me check."}, + { + "type": "tool_call", + "name": "get_weather", + "id": "call_123", + "arguments": {"city": "Paris"}, + }, + ], + "finish_reason": "tool_calls", + } + ] + ) + messages, finish_reasons = _parse_messages_from_json(raw) + assert len(messages) == 1 + assert messages[0]["content"] == "Let me check." + assert len(messages[0]["tool_calls"]) == 1 + assert messages[0]["tool_calls"][0]["function"]["name"] == "get_weather" + assert messages[0]["tool_calls"][0]["id"] == "call_123" + assert finish_reasons == ["tool_calls"] + + def test_span_processor_with_invalid_json_messages(self) -> None: + """Verify on_end handles malformed JSON message attributes without crashing.""" + in_memory_span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(OpenInferenceSpanProcessor()) + tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) + + tracer = tracer_provider.get_tracer(__name__) + + with tracer.start_as_current_span("openai.chat") as span: + span.set_attribute("gen_ai.input.messages", "not valid json{{{") + span.set_attribute("gen_ai.output.messages", "also broken") + span.set_attribute("gen_ai.request.model", "gpt-4.1") + span.set_attribute("gen_ai.usage.input_tokens", 10) + span.set_attribute("gen_ai.usage.output_tokens", 5) + span.set_attribute("gen_ai.provider.name", "openai") + + spans = in_memory_span_exporter.get_finished_spans() + assert len(spans) == 1 + attributes = dict(cast(Mapping[str, AttributeValue], spans[0].attributes)) + assert ( + attributes[SpanAttributes.OPENINFERENCE_SPAN_KIND] + == OpenInferenceSpanKindValues.LLM.value + ) + assert attributes[SpanAttributes.LLM_MODEL_NAME] == "gpt-4.1" + assert attributes[SpanAttributes.LLM_TOKEN_COUNT_PROMPT] == 10 + assert attributes[SpanAttributes.LLM_TOKEN_COUNT_COMPLETION] == 5 + assert attributes[SpanAttributes.LLM_TOKEN_COUNT_TOTAL] == 15 + assert ( + attributes[SpanAttributes.LLM_PROVIDER] == OpenInferenceLLMProviderValues.OPENAI.value + ) + + def test_span_processor_with_json_message_format(self) -> None: + """Verify on_end sets OI attributes when spans use the updated message format.""" + in_memory_span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(OpenInferenceSpanProcessor()) + tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) + + tracer = tracer_provider.get_tracer(__name__) + + input_msgs = json.dumps( + [ + {"role": "user", "parts": [{"type": "text", "content": "What is 2+2?"}]}, + { + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": "call_123", + "response": {"result": 4}, + } + ], + }, + ] + ) + output_msgs = json.dumps( + [ + { + "role": "assistant", + "parts": [{"type": "text", "content": "4"}], + "finish_reason": "stop", + } + ] + ) + + with tracer.start_as_current_span("openai.chat") as span: + span.set_attribute("gen_ai.input.messages", input_msgs) + span.set_attribute("gen_ai.output.messages", output_msgs) + span.set_attribute("gen_ai.request.model", "gpt-4.1") + span.set_attribute("gen_ai.response.model", "gpt-4.1-2026-04-14") + span.set_attribute("gen_ai.usage.input_tokens", 10) + span.set_attribute("gen_ai.usage.output_tokens", 5) + span.set_attribute("gen_ai.provider.name", "openai") + + spans = in_memory_span_exporter.get_finished_spans() + assert len(spans) == 1 + + attributes = dict(cast(Mapping[str, AttributeValue], spans[0].attributes)) + + assert ( + attributes[SpanAttributes.OPENINFERENCE_SPAN_KIND] + == OpenInferenceSpanKindValues.LLM.value + ) + assert attributes[SpanAttributes.LLM_MODEL_NAME] == "gpt-4.1" + assert isinstance(attributes[SpanAttributes.INPUT_VALUE], str) + assert isinstance(attributes[SpanAttributes.OUTPUT_VALUE], str) + assert attributes["llm.input_messages.0.message.role"] == "user" + assert attributes["llm.input_messages.0.message.content"] == "What is 2+2?" + assert attributes["llm.input_messages.1.message.role"] == "tool" + assert attributes["llm.input_messages.1.message.content"] == "{'result': 4}" + assert attributes["llm.output_messages.0.message.role"] == "assistant" + assert attributes["llm.output_messages.0.message.content"] == "4" + assert attributes[SpanAttributes.LLM_TOKEN_COUNT_PROMPT] == 10 + assert attributes[SpanAttributes.LLM_TOKEN_COUNT_COMPLETION] == 5 + assert attributes[SpanAttributes.LLM_TOKEN_COUNT_TOTAL] == 15 + # gen_ai.system is deprecated; v0.55.0+ only emits gen_ai.provider.name, + # so LLM_SYSTEM is not set for new-format spans. + assert SpanAttributes.LLM_SYSTEM not in attributes + assert ( + attributes[SpanAttributes.LLM_PROVIDER] == OpenInferenceLLMProviderValues.OPENAI.value + )