diff --git a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py index e3805cb209..5282560386 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py +++ b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py @@ -24,6 +24,8 @@ dont_throw, GEN_AI_HANDOFF_FROM_AGENT, GEN_AI_HANDOFF_TO_AGENT, + GEN_AI_HANDOFF_PARENT_AGENT, + OPENAI_AGENT_HANDOFFS, ) try: @@ -45,139 +47,346 @@ SpeechGroupSpanData = None -def _extract_prompt_attributes(otel_span, input_data, trace_content: bool): +# --------------------------------------------------------------------------- +# Finish-reason mapping: OpenAI → OTel GenAI semconv +# --------------------------------------------------------------------------- +_FINISH_REASON_MAP = { + "stop": "stop", + "tool_calls": "tool_call", # plural → singular per OTel spec + "function_call": "tool_call", # legacy → OTel value + "length": "length", + "content_filter": "content_filter", + "error": "error", + # Responses API uses status instead of finish_reason + "completed": "stop", + "failed": "error", + "cancelled": "cancelled", # distinct from error; preserved as extension string + "incomplete": "incomplete", # may be content_filter or token limit; preserve semantics +} + + +def _map_finish_reason(raw): + """Map a provider-specific finish reason to the OTel enum value.""" + if raw is None: + return None + return _FINISH_REASON_MAP.get(raw, raw) + + +def _parse_arguments(args): + """Best-effort parse of tool-call arguments to a dict (object) or None. + + Per OTel spec, arguments must be objects, never raw JSON strings. + Falls back to ``{"_raw": args}`` when the string is not valid JSON + or parses to a non-dict type. """ - Extract prompt/input data from messages and set them as span attributes. + if args is None: + return None + if isinstance(args, dict): + return args + if isinstance(args, str): + if not args.strip(): + return None + try: + parsed = json.loads(args) + if isinstance(parsed, dict): + return parsed + # Parsed OK but not a dict (e.g. array, scalar) – wrap + return {"_raw": args} + except (json.JSONDecodeError, ValueError): + return {"_raw": args} + return {"_raw": str(args)} + + +def _normalize_tool_call(tool_call): + """Normalize a tool call (object or dict) into a flat {id, name, arguments} dict.""" + if isinstance(tool_call, dict): + tc = dict(tool_call) + if "function" in tc: + function = tc["function"] + if isinstance(function, dict): + tc = { + "id": tc.get("id"), + "name": function.get("name"), + "arguments": function.get("arguments"), + } + else: + tc = { + "id": tc.get("id"), + "name": getattr(function, "name", None), + "arguments": getattr(function, "arguments", None), + } + return tc + # Object with attributes + tc_dict: dict = {} + if hasattr(tool_call, "id"): + tc_dict["id"] = tool_call.id + if hasattr(tool_call, "function"): + func = tool_call.function + if hasattr(func, "name"): + tc_dict["name"] = func.name + if hasattr(func, "arguments"): + tc_dict["arguments"] = func.arguments + elif hasattr(tool_call, "name"): + tc_dict["name"] = tool_call.name + if hasattr(tool_call, "arguments") and "arguments" not in tc_dict: + tc_dict["arguments"] = tool_call.arguments + return tc_dict + + +_MESSAGE_ATTRS = ( + "role", "content", "tool_call_id", "tool_calls", + "type", "name", "arguments", "call_id", "output", +) + + +def _msg_to_dict(message) -> dict: + """Normalize a message (dict or SDK object) into a plain dict.""" + if isinstance(message, dict): + return message + return { + attr: getattr(message, attr) + for attr in _MESSAGE_ATTRS + if hasattr(message, attr) + } + + +def _stringify_content(content) -> str: + """Coerce non-string content to a string for simple text parts.""" + if isinstance(content, str): + return content + return json.dumps(content) + + +def _reasoning_text(s): + """Extract text from a reasoning summary item (object or dict).""" + if isinstance(s, dict): + return s.get("text", "") + return getattr(s, "text", str(s)) + + +def _content_block_to_part(block) -> dict: + """Convert a single multimodal content block to an OTel part. + + Handles dict blocks (OpenAI chat format) and SDK objects. + """ + if isinstance(block, str): + return {"type": "text", "content": block} + + if isinstance(block, dict): + return _dict_block_to_part(block) + + return _object_block_to_part(block) + + +def _url_to_part(url: str) -> dict: + """Dispatch an image URL to UriPart or BlobPart depending on scheme. + + data: URLs carry inline base64 content and must be BlobPart per OTel spec. + All other URLs (https:, http:, gs:, …) become UriPart. + """ + if url.startswith("data:"): + # data:;base64, or data:, + header, _, content = url.partition(",") + mime: str | None = None + if header.startswith("data:"): + mime_part = header[5:] # strip "data:" + mime = mime_part.split(";")[0] or None + part: dict = {"type": "blob", "modality": "image", "content": content} + if mime: + part["mime_type"] = mime + return part + return {"type": "uri", "modality": "image", "uri": url} + + +_AUDIO_MIME: dict = { + "mp3": "audio/mpeg", + "wav": "audio/wav", + "ogg": "audio/ogg", + "flac": "audio/flac", + "webm": "audio/webm", + "m4a": "audio/mp4", +} + + +def _audio_blob_part(data: str, fmt: str | None) -> dict: + """Build a BlobPart for audio data, including mime_type when format is known.""" + part: dict = {"type": "blob", "modality": "audio", "content": data} + if fmt: + mime = _AUDIO_MIME.get(fmt) or f"audio/{fmt}" + part["mime_type"] = mime + return part + + +def _dict_block_to_part(block: dict) -> dict: + """Map a dict-based content block (OpenAI format) to an OTel part. + + Spec mapping (openllmetry-semconv-review.md §1 / Part Types): + OpenAI image_url data URL → OTel BlobPart {type:blob, modality:image, ...} + OpenAI image_url https URL → OTel UriPart {type:uri, modality:image, uri:...} + OpenAI input_audio → OTel BlobPart {type:blob, modality:audio, mime_type:...} + """ + btype = block.get("type", "text") + if btype in ("text", "input_text", "output_text"): + return {"type": "text", "content": block.get("text", "")} + if btype == "image_url": + url_info = block.get("image_url", {}) + url = ( + url_info.get("url", "") + if isinstance(url_info, dict) + else str(url_info) + ) + return _url_to_part(url) + if btype == "input_audio": + audio_info = block.get("input_audio", {}) + if isinstance(audio_info, dict): + data = audio_info.get("data", "") + fmt = audio_info.get("format") + else: + data = str(audio_info) + fmt = None + return _audio_blob_part(data, fmt) + return {"type": btype, **{k: v for k, v in block.items() if k != "type"}} + + +def _object_block_to_part(block) -> dict: + """Map an SDK-object content block via getattr.""" + btype = getattr(block, "type", "text") + if btype in ("text", "input_text", "output_text"): + return { + "type": "text", + "content": getattr(block, "text", str(block)), + } + if btype == "image_url": + url_obj = getattr(block, "image_url", None) + url = getattr(url_obj, "url", str(url_obj)) if url_obj else "" + return _url_to_part(url) + if btype == "input_audio": + audio_obj = getattr(block, "input_audio", None) + data = getattr(audio_obj, "data", str(audio_obj)) if audio_obj else "" + fmt = getattr(audio_obj, "format", None) if audio_obj else None + return _audio_blob_part(data, fmt) + return {"type": btype, "content": str(block)} + + +def _content_to_parts(content) -> list: + """Convert message content (str | list | scalar) into a list of OTel parts.""" + if isinstance(content, str): + return [{"type": "text", "content": content}] + if isinstance(content, list): + return [_content_block_to_part(block) for block in content] + return [{"type": "text", "content": str(content)}] + + +def _tool_call_to_part(tool_call) -> dict: + """Convert a single tool call to an OTel tool_call part.""" + tc = _normalize_tool_call(tool_call) + part: dict = {"type": "tool_call"} + if tc.get("id"): + part["id"] = tc["id"] + # name is required by OTel ToolCallRequestPart; fall back to "" rather than omit + part["name"] = tc.get("name") or "" + if tc.get("arguments") is not None: + part["arguments"] = _parse_arguments(tc["arguments"]) + return part + + +def _build_tool_response_part(call_id, content) -> dict: + """Build a tool_call_response part from an id and optional content.""" + part: dict = {"type": "tool_call_response"} + if call_id is not None: + part["id"] = call_id + if content is None: + part["response"] = "" + elif isinstance(content, (dict, list)): + part["response"] = content + else: + part["response"] = _stringify_content(content) + return part + + +def _convert_chat_message(msg: dict): + """Convert a role-based chat message to (role, parts) or None.""" + role = msg["role"] + content = msg.get("content") + tool_call_id = msg.get("tool_call_id") + tool_calls = msg.get("tool_calls") + + if role == "tool" and tool_call_id: + return role, [_build_tool_response_part(tool_call_id, content)] + + parts = [] + if tool_calls: + if content is not None: + if isinstance(content, list): + parts.extend(_content_to_parts(content)) + else: + text = _stringify_content(content) + if text: + parts.append({"type": "text", "content": text}) + parts.extend(_tool_call_to_part(tc) for tc in tool_calls) + elif content is not None: + parts = _content_to_parts(content) + + return role, parts + + +def _convert_agents_sdk_message(msg: dict): + """Convert an Agents SDK type-based message to (role, parts) or None.""" + msg_type = msg["type"] + if msg_type == "function_call": + part: dict = {"type": "tool_call", "name": msg.get("name", "")} + call_id = msg.get("id") + if call_id: + part["id"] = call_id + if msg.get("arguments") is not None: + part["arguments"] = _parse_arguments(msg["arguments"]) + return "assistant", [part] + + if msg_type == "function_call_output": + part = _build_tool_response_part( + msg.get("call_id"), + msg.get("output"), + ) + return "tool", [part] + + return None, [] + + +def _extract_prompt_attributes(otel_span, input_data, trace_content: bool): + """Set ``gen_ai.input.messages`` using the OTel parts-based schema. Handles both OpenAI chat format (role/content) and Agents SDK format (type/function_call/function_call_output). + + Only emitted when *trace_content* is True (opt-in content attribute). """ - if not input_data: + if not input_data or not trace_content: return - for i, message in enumerate(input_data): - prefix = f"{GenAIAttributes.GEN_AI_PROMPT}.{i}" - - # Convert message to dict for unified handling - if isinstance(message, dict): - msg = message - else: - # Convert object to dict - msg = {} - for attr in [ - "role", - "content", - "tool_call_id", - "tool_calls", - "type", - "name", - "arguments", - "call_id", - "output", - ]: - if hasattr(message, attr): - msg[attr] = getattr(message, attr) - - # Determine message format and extract data - role = None - content = None - tool_call_id = None - tool_calls = None + messages = [] + for message in input_data: + msg = _msg_to_dict(message) if "role" in msg: - # Standard OpenAI chat format - role = msg["role"] - content = msg.get("content") - tool_call_id = msg.get("tool_call_id") - tool_calls = msg.get("tool_calls") + role, parts = _convert_chat_message(msg) elif "type" in msg: - # OpenAI Agents SDK format - msg_type = msg["type"] - if msg_type == "function_call": - # Tool calls are assistant messages - role = "assistant" - # Create tool_calls structure matching OpenAI SDK format - tool_calls = [ - { - "id": msg.get("id", ""), - "name": msg.get("name", ""), - } | ( - {"arguments": msg.get("arguments", "")} - if trace_content else {} - ) - ] - elif ( - msg_type == "function_call_output" - and trace_content - ): - # Tool outputs are tool messages - role = "tool" - content = msg.get("output") - tool_call_id = msg.get("call_id") - - # Set role attribute - if role: - otel_span.set_attribute(f"{prefix}.role", role) - - # Set content attribute - if content is not None and trace_content: - if not isinstance(content, str): - content = json.dumps(content) - otel_span.set_attribute(f"{prefix}.content", content) - - # Set tool_call_id for tool result messages - if tool_call_id: - otel_span.set_attribute(f"{prefix}.tool_call_id", tool_call_id) - - # Set tool_calls for assistant messages with tool calls - if tool_calls: - for j, tool_call in enumerate(tool_calls): - # Convert to dict if needed - if not isinstance(tool_call, dict): - tc_dict = {} - if hasattr(tool_call, "id"): - tc_dict["id"] = tool_call.id - if hasattr(tool_call, "function"): - func = tool_call.function - if hasattr(func, "name"): - tc_dict["name"] = func.name - if hasattr(func, "arguments"): - tc_dict["arguments"] = func.arguments - elif hasattr(tool_call, "name"): - tc_dict["name"] = tool_call.name - if hasattr(tool_call, "arguments"): - tc_dict["arguments"] = tool_call.arguments - tool_call = tc_dict - - # Extract function details if nested (standard OpenAI format) - if "function" in tool_call: - function = tool_call["function"] - tool_call = { - "id": tool_call.get("id"), - "name": function.get("name"), - "arguments": function.get("arguments"), - } - - # Set tool call attributes - if tool_call.get("id"): - otel_span.set_attribute( - f"{prefix}.tool_calls.{j}.id", tool_call["id"] - ) - if tool_call.get("name"): - otel_span.set_attribute( - f"{prefix}.tool_calls.{j}.name", tool_call["name"] - ) - if tool_call.get("arguments") and trace_content: - args = tool_call["arguments"] - if not isinstance(args, str): - args = json.dumps(args) - otel_span.set_attribute(f"{prefix}.tool_calls.{j}.arguments", args) + role, parts = _convert_agents_sdk_message(msg) + else: + continue + + if role and parts: + messages.append({"role": role, "parts": parts}) + + if messages: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_INPUT_MESSAGES, json.dumps(messages) + ) def _extract_response_attributes(otel_span, response, trace_content: bool): """ Extract model settings, completions, and usage from a response object - and set them as span attributes. + and set them as span attributes using the OTel parts-based schema. Returns a dict of model_settings for potential use by parent spans. """ @@ -208,76 +417,146 @@ def _extract_response_attributes(otel_span, response, trace_content: bool): if hasattr(response, "model") and response.model: model_settings["model"] = response.model - otel_span.set_attribute(GenAIAttributes.GEN_AI_REQUEST_MODEL, response.model) + otel_span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_MODEL, response.model) + + if hasattr(response, "id") and response.id: + otel_span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_ID, response.id) if ( hasattr(response, "frequency_penalty") and response.frequency_penalty is not None ): model_settings["frequency_penalty"] = response.frequency_penalty + otel_span.set_attribute( + GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY, + response.frequency_penalty, + ) - # Extract completions from response.output + # Map finish reason (top-level fallback) + raw_finish_reason = getattr(response, "finish_reason", None) + if raw_finish_reason is None: + raw_finish_reason = getattr(response, "status", None) + mapped_finish_reason = _map_finish_reason(raw_finish_reason) + + # Extract completions from response.output. + # gen_ai.response.finish_reasons is Recommended metadata (not opt-in content), + # so we always iterate output items to collect per-item finish reasons, even + # when trace_content=False. Message content is only serialised when trace_content + # is True. if hasattr(response, "output") and response.output: - for i, output in enumerate(response.output): - if hasattr(output, "content") and output.content and trace_content: - # Text message with content array (ResponseOutputMessage) - content_text = "" - for content_item in output.content: - if hasattr(content_item, "text"): - content_text += content_item.text - - if content_text: - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", content_text - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role", - getattr(output, "role", "assistant"), - ) + output_messages = [] + per_item_reasons: list = [] - elif hasattr(output, "name"): - # Function/tool call (ResponseFunctionToolCall) - tool_name = getattr(output, "name", "unknown_tool") - arguments = getattr(output, "arguments", "{}") - tool_call_id = getattr(output, "call_id", f"call_{i}") + for output in response.output: + item_type = getattr(output, "type", None) + + if item_type == "function_call" or ( + item_type is None and getattr(output, "call_id", None) + ): + # Function/tool call always contributes "tool_call" regardless of + # the response-level finish_reason. + item_reason = _map_finish_reason("tool_calls") + per_item_reasons.append(item_reason) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role", "assistant" - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.finish_reason", - "tool_calls", - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.tool_calls.0.name", - tool_name, - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.tool_calls.0.id", - tool_call_id, - ) if trace_content: - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.tool_calls.0.arguments", - arguments, - ) + tool_name = getattr(output, "name", "unknown_tool") + tool_call_id = getattr(output, "call_id", None) + part: dict = {"type": "tool_call", "name": tool_name} + if tool_call_id: + part["id"] = tool_call_id + raw_args = getattr(output, "arguments", None) + if raw_args is not None: + part["arguments"] = _parse_arguments(raw_args) + output_messages.append({ + "role": "assistant", + "parts": [part], + "finish_reason": item_reason, + }) + + elif hasattr(output, "content") and output.content: + # Text message with content array (ResponseOutputMessage) + item_reason = mapped_finish_reason or "" + per_item_reasons.append(item_reason) - elif hasattr(output, "text") and trace_content: + if trace_content: + parts = [] + for content_item in output.content: + ci_type = getattr(content_item, "type", None) + # Check known types first; use hasattr(.text) only as last resort + # to avoid misclassifying reasoning/refusal items that also carry .text + if ci_type == "output_text": + parts.append({ + "type": "text", + "content": getattr(content_item, "text", ""), + }) + elif ci_type == "refusal": + parts.append({ + "type": "refusal", + "content": getattr(content_item, "refusal", ""), + }) + elif ci_type == "reasoning": + summary = getattr(content_item, "summary", None) + text = "" + if isinstance(summary, list): + text = " ".join(_reasoning_text(s) for s in summary) + elif summary: + text = str(summary) + parts.append({"type": "reasoning", "content": text}) + elif ci_type is not None: + parts.append({ + "type": ci_type, + "content": str(content_item), + }) + elif hasattr(content_item, "text") and content_item.text: + parts.append({ + "type": "text", + "content": content_item.text, + }) + output_messages.append({ + "role": getattr(output, "role", "assistant"), + "parts": parts, + "finish_reason": item_reason, + }) + + elif hasattr(output, "text"): # Direct text content - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", output.text - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role", - getattr(output, "role", "assistant"), - ) + item_reason = mapped_finish_reason or "" + per_item_reasons.append(item_reason) - # Add finish reason if available (for non-tool-call cases) - if hasattr(response, "finish_reason") and not hasattr(output, "name"): - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.finish_reason", - response.finish_reason, - ) + if trace_content: + parts = [] + if output.text: + parts.append({"type": "text", "content": output.text}) + output_messages.append({ + "role": getattr(output, "role", "assistant"), + "parts": parts, + "finish_reason": item_reason, + }) + + if trace_content and output_messages: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages) + ) + + # Set top-level finish_reasons from per-item discovery; fall back to the + # response-level reason if no output items provided reasons. + meaningful_reasons = list(dict.fromkeys(r for r in per_item_reasons if r)) + if meaningful_reasons: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, + tuple(meaningful_reasons), + ) + elif mapped_finish_reason is not None: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, + (mapped_finish_reason,), + ) + else: + if mapped_finish_reason is not None: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, + (mapped_finish_reason,), + ) # Extract usage data if hasattr(response, "usage") and response.usage: @@ -304,12 +583,45 @@ def _extract_response_attributes(otel_span, response, trace_content: bool): if hasattr(usage, "total_tokens") and usage.total_tokens is not None: otel_span.set_attribute( - SpanAttributes.LLM_USAGE_TOTAL_TOKENS, usage.total_tokens + SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens ) return model_settings +def _extract_tool_definitions(tools): + """Extract tool/function specs into a JSON-serializable list. + + Handles both function-wrapped tools (tool.function.name) and + direct function tools (tool.name). + """ + if not tools: + return [] + tool_defs = [] + for tool in tools: + if hasattr(tool, "function"): + function = tool.function + func_def = { + "name": getattr(function, "name", ""), + "description": getattr(function, "description", ""), + } + if hasattr(function, "parameters"): + func_def["parameters"] = function.parameters + tool_def = { + "type": getattr(tool, "type", "function"), + "function": func_def, + } + tool_defs.append(tool_def) + elif hasattr(tool, "name"): + func_def = {"name": tool.name} + if hasattr(tool, "description"): + func_def["description"] = tool.description + if hasattr(tool, "parameters"): + func_def["parameters"] = tool.parameters + tool_defs.append({"type": "function", "function": func_def}) + return tool_defs + + class OpenTelemetryTracingProcessor(TracingProcessor): """ A tracing processor that creates OpenTelemetry spans for OpenAI Agents. @@ -323,7 +635,6 @@ def __init__(self, tracer: Tracer): self._root_spans: Dict[str, Any] = {} # trace_id -> root span self._otel_spans: Dict[str, Any] = {} # agents span -> otel span self._span_contexts: Dict[str, Any] = {} # agents span -> context token - self._last_model_settings: Dict[str, Any] = {} self._reverse_handoffs_dict: OrderedDict[str, str] = OrderedDict() @dont_throw @@ -332,10 +643,10 @@ def on_trace_start(self, trace): # Create a root "Agent Workflow" span for the entire trace workflow_span = self.tracer.start_span( "Agent Workflow", - kind=SpanKind.CLIENT, + kind=SpanKind.INTERNAL, attributes={ SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.WORKFLOW.value, - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", SpanAttributes.TRACELOOP_WORKFLOW_NAME: "Agent Workflow", }, ) @@ -375,177 +686,30 @@ def on_span_start(self, span): otel_span = None if isinstance(span_data, AgentSpanData): - agent_name = getattr(span_data, "name", None) or "unknown_agent" - - if set_agent_name is not None: - set_agent_name(agent_name) - - handoff_parent = None - trace_id = getattr(span, "trace_id", None) - if trace_id: - handoff_key = f"{agent_name}:{trace_id}" - if parent_agent_name := self._reverse_handoffs_dict.pop( - handoff_key, None - ): - handoff_parent = parent_agent_name - - attributes = { - SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.AGENT.value, - GenAIAttributes.GEN_AI_AGENT_NAME: agent_name, - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", - } - - if handoff_parent: - attributes["gen_ai.agent.handoff_parent"] = handoff_parent - - if hasattr(span_data, "handoffs") and span_data.handoffs: - for i, handoff_agent in enumerate(span_data.handoffs): - handoff_info = { - "name": getattr(handoff_agent, "name", "unknown"), - "instructions": getattr( - handoff_agent, "instructions", "No instructions" - ), - } - attributes[f"openai.agent.handoff{i}"] = json.dumps(handoff_info) - - otel_span = self.tracer.start_span( - f"{agent_name}.agent", - kind=SpanKind.CLIENT, - context=parent_context, - attributes=attributes, - ) + otel_span = self._start_agent_span(span_data, parent_context, trace_id) elif isinstance(span_data, HandoffSpanData): - from_agent = getattr(span_data, "from_agent", None) - to_agent = getattr(span_data, "to_agent", None) - - from_agent = from_agent or "unknown" - - to_agent = to_agent or "unknown" - - trace_id = getattr(span, "trace_id", None) - if to_agent and to_agent != "unknown" and trace_id: - handoff_key = f"{to_agent}:{trace_id}" - self._reverse_handoffs_dict[handoff_key] = from_agent - - if len(self._reverse_handoffs_dict) > 1000: - self._reverse_handoffs_dict.popitem(last=False) - - from_agent_span = self._find_agent_span(from_agent) - if from_agent_span: - parent_context = set_span_in_context(from_agent_span) - - handoff_attributes = { - SpanAttributes.TRACELOOP_SPAN_KIND: "handoff", - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", - } - - if from_agent and from_agent != "unknown": - handoff_attributes[GEN_AI_HANDOFF_FROM_AGENT] = from_agent - handoff_attributes[GenAIAttributes.GEN_AI_AGENT_NAME] = from_agent - if to_agent and to_agent != "unknown": - handoff_attributes[GEN_AI_HANDOFF_TO_AGENT] = to_agent - - otel_span = self.tracer.start_span( - f"{from_agent} → {to_agent}.handoff", - kind=SpanKind.INTERNAL, - context=parent_context, - attributes=handoff_attributes, - ) + otel_span = self._start_handoff_span(span_data, parent_context, trace_id) elif isinstance(span_data, FunctionSpanData): - tool_name = getattr(span_data, "name", None) or "unknown_tool" - - current_agent_span = self._find_current_agent_span() - if current_agent_span: - parent_context = set_span_in_context(current_agent_span) - - tool_attributes = { - SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.TOOL.value, - GenAIAttributes.GEN_AI_TOOL_NAME: tool_name, - GenAIAttributes.GEN_AI_TOOL_TYPE: "function", - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", - f"{GenAIAttributes.GEN_AI_COMPLETION}.tool.name": tool_name, - f"{GenAIAttributes.GEN_AI_COMPLETION}.tool.type": "function", - f"{GenAIAttributes.GEN_AI_COMPLETION}.tool.strict_json_schema": True, - } + agent_ctx = self._resolve_agent_parent(parent_context) + otel_span = self._start_function_span(span_data, agent_ctx) - if hasattr(span_data, "description") and span_data.description: - # Only use description if it's not a generic class description - desc = span_data.description - if desc and not desc.startswith("Represents a Function Span"): - tool_attributes[GenAIAttributes.GEN_AI_TOOL_DESCRIPTION] = desc - - otel_span = self.tracer.start_span( - f"{tool_name}.tool", - kind=SpanKind.INTERNAL, - context=parent_context, - attributes=tool_attributes, - ) - - elif type(span_data).__name__ == "ResponseSpanData": - current_agent_span = self._find_current_agent_span() - if current_agent_span: - parent_context = set_span_in_context(current_agent_span) - - response_attributes = { - SpanAttributes.LLM_REQUEST_TYPE: "response", - GenAIAttributes.GEN_AI_SYSTEM: "openai", - GenAIAttributes.GEN_AI_OPERATION_NAME: "response", - } - - otel_span = self.tracer.start_span( - "openai.response", - kind=SpanKind.CLIENT, - context=parent_context, - attributes=response_attributes, - start_time=time.time_ns(), - ) - - elif isinstance(span_data, GenerationSpanData): - current_agent_span = self._find_current_agent_span() - if current_agent_span: - parent_context = set_span_in_context(current_agent_span) - - response_attributes = { - SpanAttributes.LLM_REQUEST_TYPE: "chat", - GenAIAttributes.GEN_AI_SYSTEM: "openai", - GenAIAttributes.GEN_AI_OPERATION_NAME: "chat", - } - - otel_span = self.tracer.start_span( - "openai.response", - kind=SpanKind.CLIENT, - context=parent_context, - attributes=response_attributes, - start_time=time.time_ns(), - ) + elif ( + type(span_data).__name__ == "ResponseSpanData" + or isinstance(span_data, GenerationSpanData) + ): + agent_ctx = self._resolve_agent_parent(parent_context) + otel_span = self._start_generation_span(agent_ctx, span_data) elif ( _has_realtime_spans and SpeechSpanData and isinstance(span_data, SpeechSpanData) ): - current_agent_span = self._find_current_agent_span() - if current_agent_span: - parent_context = set_span_in_context(current_agent_span) - - speech_attributes = { - SpanAttributes.LLM_REQUEST_TYPE: "realtime", - GenAIAttributes.GEN_AI_SYSTEM: "openai", - GenAIAttributes.GEN_AI_OPERATION_NAME: "speech", - } - - model = getattr(span_data, "model", None) - if model: - speech_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model - - otel_span = self.tracer.start_span( - "openai.realtime.speech", - kind=SpanKind.CLIENT, - context=parent_context, - attributes=speech_attributes, - start_time=time.time_ns(), + agent_ctx = self._resolve_agent_parent(parent_context) + otel_span = self._start_realtime_span( + span_data, agent_ctx, "openai.realtime.speech", "speech", ) elif ( @@ -553,26 +717,9 @@ def on_span_start(self, span): and TranscriptionSpanData and isinstance(span_data, TranscriptionSpanData) ): - current_agent_span = self._find_current_agent_span() - if current_agent_span: - parent_context = set_span_in_context(current_agent_span) - - transcription_attributes = { - SpanAttributes.LLM_REQUEST_TYPE: "realtime", - GenAIAttributes.GEN_AI_SYSTEM: "openai", - GenAIAttributes.GEN_AI_OPERATION_NAME: "transcription", - } - - model = getattr(span_data, "model", None) - if model: - transcription_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model - - otel_span = self.tracer.start_span( - "openai.realtime.transcription", - kind=SpanKind.CLIENT, - context=parent_context, - attributes=transcription_attributes, - start_time=time.time_ns(), + agent_ctx = self._resolve_agent_parent(parent_context) + otel_span = self._start_realtime_span( + span_data, agent_ctx, "openai.realtime.transcription", "transcription", ) elif ( @@ -580,22 +727,9 @@ def on_span_start(self, span): and SpeechGroupSpanData and isinstance(span_data, SpeechGroupSpanData) ): - current_agent_span = self._find_current_agent_span() - if current_agent_span: - parent_context = set_span_in_context(current_agent_span) - - speech_group_attributes = { - SpanAttributes.LLM_REQUEST_TYPE: "realtime", - GenAIAttributes.GEN_AI_SYSTEM: "openai", - GenAIAttributes.GEN_AI_OPERATION_NAME: "speech_group", - } - - otel_span = self.tracer.start_span( - "openai.realtime.speech_group", - kind=SpanKind.CLIENT, - context=parent_context, - attributes=speech_group_attributes, - start_time=time.time_ns(), + agent_ctx = self._resolve_agent_parent(parent_context) + otel_span = self._start_realtime_span( + span_data, agent_ctx, "openai.realtime.speech_group", "speech_group", ) if otel_span: @@ -607,7 +741,7 @@ def on_span_start(self, span): @dont_throw def on_span_end(self, span): """Called when a span ends - finish OpenTelemetry span.""" - from agents import GenerationSpanData + from agents import FunctionSpanData, GenerationSpanData if not span or not hasattr(span, "span_data"): return @@ -620,152 +754,18 @@ def on_span_end(self, span): type(span_data).__name__ == "ResponseSpanData" or isinstance(span_data, GenerationSpanData) ): - # Extract prompt data from input - input_data = getattr(span_data, "input", []) - _extract_prompt_attributes(otel_span, input_data, trace_content) - - # Add function/tool specifications to the request using OpenAI semantic conventions - response = getattr(span_data, "response", None) - if ( - response - and hasattr(response, "tools") - and response.tools - ): - # Extract tool specifications - for i, tool in enumerate(response.tools): - if hasattr(tool, "function"): - function = tool.function - otel_span.set_attribute( - f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.name", - getattr(function, "name", ""), - ) - otel_span.set_attribute( - f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.description", - getattr(function, "description", ""), - ) - if hasattr(function, "parameters"): - otel_span.set_attribute( - f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.parameters", - json.dumps(function.parameters), - ) - elif hasattr(tool, "name"): - # Direct function format - otel_span.set_attribute( - f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.name", - tool.name, - ) - if hasattr(tool, "description"): - otel_span.set_attribute( - f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.description", - tool.description, - ) - if hasattr(tool, "parameters"): - otel_span.set_attribute( - f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.parameters", - json.dumps(tool.parameters), - ) - - if response: - model_settings = _extract_response_attributes(otel_span, response, trace_content) - self._last_model_settings = model_settings - - # Legacy fallback for other span types - elif span_data: - input_data = getattr(span_data, "input", []) - _extract_prompt_attributes(otel_span, input_data, trace_content) - - response = getattr(span_data, "response", None) - if response: - model_settings = _extract_response_attributes(otel_span, response, trace_content) - self._last_model_settings = model_settings - - elif ( - _has_realtime_spans - and SpeechSpanData - and isinstance(span_data, SpeechSpanData) - and trace_content - ): - input_text = getattr(span_data, "input", None) - if input_text: - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", input_text - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", "user" - ) - - output_audio = getattr(span_data, "output", None) - if output_audio: - if not isinstance(output_audio, (bytes, bytearray)): - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - str(output_audio), - ) - - elif ( - _has_realtime_spans - and TranscriptionSpanData - and isinstance(span_data, TranscriptionSpanData) - and trace_content - ): - input_audio = getattr(span_data, "input", None) - if input_audio: - if not isinstance(input_audio, (bytes, bytearray)): - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", - str(input_audio), - ) - - output_text = getattr(span_data, "output", None) - if output_text: - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", output_text - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", "assistant" - ) + self._end_generation_span(otel_span, span_data, trace_content) - elif ( - _has_realtime_spans - and SpeechGroupSpanData - and isinstance(span_data, SpeechGroupSpanData) - and trace_content - ): - input_text = getattr(span_data, "input", None) - if input_text: - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", input_text - ) - otel_span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", "user" - ) + elif span_data and isinstance(span_data, FunctionSpanData): + self._end_function_span(otel_span, span_data, trace_content) - elif span_data and type(span_data).__name__ == "AgentSpanData": - # For agent spans, add the model settings we stored from the response span - if hasattr(self, "_last_model_settings") and self._last_model_settings: - for key, value in self._last_model_settings.items(): - if key == "temperature": - otel_span.set_attribute( - GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE, value - ) - elif key == "max_tokens": - otel_span.set_attribute( - GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS, value - ) - elif key == "top_p": - otel_span.set_attribute( - GenAIAttributes.GEN_AI_REQUEST_TOP_P, value - ) - elif key == "model": - otel_span.set_attribute( - GenAIAttributes.GEN_AI_REQUEST_MODEL, value - ) - elif key == "frequency_penalty": - otel_span.set_attribute( - "openai.agent.model.frequency_penalty", value - ) - # Note: prompt_attributes, completion_attributes, and usage tokens are now - # on response spans only + elif trace_content and span_data and _has_realtime_spans: + if SpeechSpanData and isinstance(span_data, SpeechSpanData): + self._set_realtime_io_attributes(otel_span, span_data, has_output=True) + elif TranscriptionSpanData and isinstance(span_data, TranscriptionSpanData): + self._set_realtime_io_attributes(otel_span, span_data, has_output=True) + elif SpeechGroupSpanData and isinstance(span_data, SpeechGroupSpanData): + self._set_realtime_io_attributes(otel_span, span_data, has_output=False) if hasattr(span, "error") and span.error: otel_span.set_status(Status(StatusCode.ERROR, str(span.error))) @@ -778,6 +778,237 @@ def on_span_end(self, span): context.detach(self._span_contexts[span]) del self._span_contexts[span] + # ------------------------------------------------------------------ + # on_span_start handlers (extracted from the former if-elif chain) + # ------------------------------------------------------------------ + + def _resolve_agent_parent(self, fallback_context): + """Resolve parent context, preferring the current agent span.""" + current = self._find_current_agent_span() + if current: + return set_span_in_context(current) + return fallback_context + + def _start_agent_span(self, span_data, parent_context, trace_id): + """Create an OTel span for an AgentSpanData.""" + agent_name = getattr(span_data, "name", None) or "unknown_agent" + + if set_agent_name is not None: + set_agent_name(agent_name) + + handoff_parent = None + if trace_id: + handoff_key = f"{agent_name}:{trace_id}" + if parent_agent_name := self._reverse_handoffs_dict.pop( + handoff_key, None + ): + handoff_parent = parent_agent_name + + attributes = { + SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.AGENT.value, + GenAIAttributes.GEN_AI_AGENT_NAME: agent_name, + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "invoke_agent", + } + + if handoff_parent: + attributes[GEN_AI_HANDOFF_PARENT_AGENT] = handoff_parent + + if hasattr(span_data, "handoffs") and span_data.handoffs: + handoffs_list = [] + trace_content = should_send_prompts() + for handoff_agent in span_data.handoffs: + handoff = {"name": getattr(handoff_agent, "name", "unknown")} + if trace_content: + handoff["instructions"] = getattr( + handoff_agent, "instructions", "No instructions" + ) + handoffs_list.append(handoff) + attributes[OPENAI_AGENT_HANDOFFS] = json.dumps(handoffs_list) + + return self.tracer.start_span( + f"{agent_name}.agent", + kind=SpanKind.INTERNAL, + context=parent_context, + attributes=attributes, + ) + + def _start_handoff_span(self, span_data, parent_context, trace_id): + """Create an OTel span for a HandoffSpanData.""" + from_agent = getattr(span_data, "from_agent", None) or "unknown" + to_agent = getattr(span_data, "to_agent", None) or "unknown" + + if to_agent and to_agent != "unknown" and trace_id: + handoff_key = f"{to_agent}:{trace_id}" + self._reverse_handoffs_dict[handoff_key] = from_agent + + if len(self._reverse_handoffs_dict) > 1000: + self._reverse_handoffs_dict.popitem(last=False) + + from_agent_span = self._find_agent_span(from_agent) + if from_agent_span: + parent_context = set_span_in_context(from_agent_span) + + handoff_attributes = { + SpanAttributes.TRACELOOP_SPAN_KIND: "handoff", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "handoff", + } + + if from_agent and from_agent != "unknown": + handoff_attributes[GEN_AI_HANDOFF_FROM_AGENT] = from_agent + handoff_attributes[GenAIAttributes.GEN_AI_AGENT_NAME] = from_agent + if to_agent and to_agent != "unknown": + handoff_attributes[GEN_AI_HANDOFF_TO_AGENT] = to_agent + + return self.tracer.start_span( + f"{from_agent} → {to_agent}.handoff", + kind=SpanKind.INTERNAL, + context=parent_context, + attributes=handoff_attributes, + ) + + def _start_function_span(self, span_data, parent_context): + """Create an OTel span for a FunctionSpanData.""" + tool_name = getattr(span_data, "name", None) or "unknown_tool" + + tool_attributes = { + SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.TOOL.value, + GenAIAttributes.GEN_AI_TOOL_NAME: tool_name, + GenAIAttributes.GEN_AI_TOOL_TYPE: "function", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "execute_tool", + } + + if hasattr(span_data, "description") and span_data.description: + # Only use description if it's not a generic class description + desc = span_data.description + if desc and not desc.startswith("Represents a Function Span"): + tool_attributes[GenAIAttributes.GEN_AI_TOOL_DESCRIPTION] = desc + + return self.tracer.start_span( + f"{tool_name}.tool", + kind=SpanKind.INTERNAL, + context=parent_context, + attributes=tool_attributes, + ) + + def _start_generation_span(self, parent_context, span_data=None): + """Create an OTel span for a GenerationSpanData or ResponseSpanData.""" + attributes = { + GenAIAttributes.GEN_AI_OPERATION_NAME: "chat", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + } + model = getattr(span_data, "model", None) if span_data else None + if model: + attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model + return self.tracer.start_span( + "openai.response", + kind=SpanKind.CLIENT, + context=parent_context, + attributes=attributes, + start_time=time.time_ns(), + ) + + def _start_realtime_span(self, span_data, parent_context, span_name, operation): + """Create an OTel span for a realtime span (Speech/Transcription/SpeechGroup). + + NOTE: "speech", "transcription", "speech_group" are OpenAI + Realtime API-specific operations with no well-known OTel + equivalents. Kept as custom operation names intentionally. + """ + attributes = { + GenAIAttributes.GEN_AI_OPERATION_NAME: operation, + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + } + + model = getattr(span_data, "model", None) + if model: + attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model + + return self.tracer.start_span( + span_name, + kind=SpanKind.CLIENT, + context=parent_context, + attributes=attributes, + start_time=time.time_ns(), + ) + + # ------------------------------------------------------------------ + # on_span_end helpers (extracted from the former if-elif chain) + # ------------------------------------------------------------------ + + def _end_generation_span(self, otel_span, span_data, trace_content): + """Handle on_span_end logic for generation/response spans.""" + input_data = getattr(span_data, "input", []) + _extract_prompt_attributes(otel_span, input_data, trace_content) + + response = getattr(span_data, "response", None) + tools = getattr(span_data, "tools", None) or ( + getattr(response, "tools", None) if response else None + ) + if trace_content and tools: + tool_defs = _extract_tool_definitions(tools) + if tool_defs: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_TOOL_DEFINITIONS, json.dumps(tool_defs) + ) + + if response: + _extract_response_attributes(otel_span, response, trace_content) + + def _end_function_span(self, otel_span, span_data, trace_content): + """Handle on_span_end logic for function/tool spans. + + Sets ``gen_ai.tool.call.arguments`` and ``gen_ai.tool.call.result`` + from ``FunctionSpanData.input`` / ``.output``. Both are content + attributes and are only emitted when *trace_content* is True. + """ + if not trace_content: + return + + tool_input = getattr(span_data, "input", None) + if tool_input is not None: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS, + tool_input if isinstance(tool_input, str) else json.dumps(tool_input), + ) + + tool_output = getattr(span_data, "output", None) + if tool_output is not None: + otel_span.set_attribute( + GenAIAttributes.GEN_AI_TOOL_CALL_RESULT, + tool_output if isinstance(tool_output, str) else json.dumps(tool_output), + ) + + def _set_realtime_io_attributes(self, otel_span, span_data, has_output=True): + """Set input/output message attributes for realtime spans.""" + input_val = getattr(span_data, "input", None) + if input_val and not isinstance(input_val, (bytes, bytearray)): + otel_span.set_attribute( + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps([{"role": "user", "parts": [{"type": "text", "content": str(input_val)}]}]), + ) + + if not has_output: + return + + output_val = getattr(span_data, "output", None) + if output_val and not isinstance(output_val, (bytes, bytearray)): + out_msg = { + "role": "assistant", + "parts": [{"type": "text", "content": str(output_val)}], + "finish_reason": "", + } + otel_span.set_attribute( + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([out_msg]), + ) + + # ------------------------------------------------------------------ + # Span lookup helpers + # ------------------------------------------------------------------ + def _find_agent_span(self, agent_name: str): """Find the OpenTelemetry span for a given agent.""" for agents_span, otel_span in self._otel_spans.items(): diff --git a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py index 108fc30701..48d6a9e442 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py @@ -4,8 +4,10 @@ so we need to patch the RealtimeSession class directly to add OpenTelemetry tracing. """ +import json import logging import time +from collections import OrderedDict from typing import Dict, Any, Optional, List, Tuple from opentelemetry.trace import Tracer, Status, StatusCode, SpanKind, Span from opentelemetry.trace import set_span_in_context @@ -108,7 +110,8 @@ def __init__(self, tracer: Tracer): self.prompt_agent_name: Optional[str] = None self.starting_agent_name: Optional[str] = None self.model_name: str = "gpt-4o-realtime-preview" - self.seen_completions: set = set() + self._seen_completions: OrderedDict = OrderedDict() + self._seen_completions_max: int = 1000 self.pending_usage: Optional[Dict[str, int]] = None def start_workflow_span(self, agent_name: str): @@ -116,10 +119,10 @@ def start_workflow_span(self, agent_name: str): self.starting_agent_name = agent_name self.workflow_span = self.tracer.start_span( "Realtime Session", - kind=SpanKind.CLIENT, + kind=SpanKind.INTERNAL, attributes={ SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.WORKFLOW.value, - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", SpanAttributes.TRACELOOP_WORKFLOW_NAME: "Realtime Session", }, ) @@ -170,12 +173,13 @@ def start_agent_span(self, agent_name: str): span = self.tracer.start_span( f"{agent_name}.agent", - kind=SpanKind.CLIENT, + kind=SpanKind.INTERNAL, context=parent_context, attributes={ SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.AGENT.value, GenAIAttributes.GEN_AI_AGENT_NAME: agent_name, - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "invoke_agent", }, ) self.agent_spans[agent_name] = span @@ -202,7 +206,8 @@ def start_tool_span(self, tool_name: str, agent_name: Optional[str] = None): SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.TOOL.value, GenAIAttributes.GEN_AI_TOOL_NAME: tool_name, GenAIAttributes.GEN_AI_TOOL_TYPE: "function", - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "execute_tool", }, ) self.tool_spans[tool_name] = span @@ -214,8 +219,9 @@ def end_tool_span( """End a tool span.""" if tool_name in self.tool_spans: span = self.tool_spans[tool_name] - if output is not None: - span.set_attribute(GenAIAttributes.GEN_AI_TOOL_CALL_RESULT, str(output)) + if output is not None and should_send_prompts(): + result = output if isinstance(output, str) else json.dumps(output, default=str) + span.set_attribute(GenAIAttributes.GEN_AI_TOOL_CALL_RESULT, result) if error: span.set_status(Status(StatusCode.ERROR, str(error))) else: @@ -239,7 +245,8 @@ def create_handoff_span(self, from_agent: str, to_agent: str): context=parent_context, attributes={ SpanAttributes.TRACELOOP_SPAN_KIND: "handoff", - GenAIAttributes.GEN_AI_SYSTEM: "openai_agents", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "handoff", GEN_AI_HANDOFF_FROM_AGENT: from_agent, GEN_AI_HANDOFF_TO_AGENT: to_agent, }, @@ -258,8 +265,8 @@ def start_audio_span(self, item_id: str, content_index: int): kind=SpanKind.CLIENT, context=parent_context, attributes={ - SpanAttributes.LLM_REQUEST_TYPE: "realtime", - GenAIAttributes.GEN_AI_SYSTEM: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "realtime", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", }, ) self.audio_spans[span_key] = span @@ -309,14 +316,20 @@ def record_usage(self, usage: Any): "total_tokens": getattr(usage, "total_tokens", 0) or 0, } + @property + def seen_completions(self): + return self._seen_completions + def record_completion(self, role: str, content: str): """Record a completion message - creates an LLM span with prompt and completion.""" if not content: return - content_hash = hash(content[:100]) - if content_hash in self.seen_completions: + content_hash = hash(content) + if content_hash in self._seen_completions: return - self.seen_completions.add(content_hash) + self._seen_completions[content_hash] = None + if len(self._seen_completions) > self._seen_completions_max: + self._seen_completions.popitem(last=False) self.create_llm_span(content) def create_llm_span(self, completion_content: str): @@ -351,13 +364,16 @@ def create_llm_span(self, completion_content: str): context=parent_context, start_time=start_time, attributes={ - SpanAttributes.LLM_REQUEST_TYPE: "realtime", - SpanAttributes.LLM_SYSTEM: "openai", - GenAIAttributes.GEN_AI_SYSTEM: "openai", + GenAIAttributes.GEN_AI_OPERATION_NAME: "realtime", + GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai", GenAIAttributes.GEN_AI_REQUEST_MODEL: model_name_str, }, ) + span.set_attribute( + GenAIAttributes.GEN_AI_RESPONSE_MODEL, model_name_str, + ) + if self.pending_usage: if self.pending_usage.get("input_tokens"): span.set_attribute( @@ -369,27 +385,38 @@ def create_llm_span(self, completion_content: str): GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, self.pending_usage["output_tokens"], ) + if self.pending_usage.get("total_tokens"): + span.set_attribute( + SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS, + self.pending_usage["total_tokens"], + ) self.pending_usage = None if should_send_prompts(): if prompt_content: + input_msg = { + "role": prompt_role or "user", + "parts": [{"type": "text", "content": prompt_content}], + } span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", prompt_role or "user" - ) - span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", prompt_content + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps([input_msg]), ) + out_msg = { + "role": "assistant", + "parts": [{"type": "text", "content": completion_content}], + "finish_reason": "", + } span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", "assistant" - ) - span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", completion_content - ) - span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.finish_reason", "stop" + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([out_msg]), ) + # Realtime API does not provide finish reasons; set top-level + # attribute only when a meaningful value is available (consistent + # with _hooks.py which omits the attribute when mapped value is None). + span.set_status(Status(StatusCode.OK)) span.end() diff --git a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py index 92b665b724..a8b1fdc60b 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py +++ b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py @@ -10,6 +10,8 @@ # Handoff span attribute names GEN_AI_HANDOFF_FROM_AGENT = "gen_ai.handoff.from_agent" GEN_AI_HANDOFF_TO_AGENT = "gen_ai.handoff.to_agent" +GEN_AI_HANDOFF_PARENT_AGENT = "gen_ai.agent.handoff_parent" +OPENAI_AGENT_HANDOFFS = "openai.agent.handoffs" _TRACELOOP_TRACE_CONTENT = "TRACELOOP_TRACE_CONTENT" diff --git a/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml b/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml index 551683bada..dc33fb9794 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml +++ b/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml @@ -34,7 +34,7 @@ dev = [ ] test = [ "litellm>=1.71.2,<2", - "openai-agents>=0.6.9", + "openai-agents>=0.14.2", "opentelemetry-sdk>=1.38.0,<2", "pytest-asyncio>=1.0.0,<2", "pytest-recording>=0.13.1,<0.14.0", diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_handoff_span_operation_name.yaml b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_handoff_span_operation_name.yaml new file mode 100644 index 0000000000..dcf2387b8f --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_handoff_span_operation_name.yaml @@ -0,0 +1,224 @@ +interactions: +- request: + body: '{"include":[],"input":[{"content":"Please handle this task by delegating + to another agent.","role":"user"}],"instructions":"You decide which agent to + handoff to.","model":"gpt-4.1","stream":false,"tools":[{"name":"handoff_to_agent_a","parameters":{"properties":{"input":{"title":"Input","type":"string"}},"required":["input"],"title":"handoff_to_agent_a_args","type":"object","additionalProperties":false},"strict":true,"type":"function","description":"Handoff + to Agent A for specific tasks"},{"name":"handoff_to_agent_b","parameters":{"properties":{"input":{"title":"Input","type":"string"}},"required":["input"],"title":"handoff_to_agent_b_args","type":"object","additionalProperties":false},"strict":true,"type":"function","description":"Handoff + to Agent B for different tasks"},{"name":"transfer_to_agenta","parameters":{"additionalProperties":false,"type":"object","properties":{},"required":[]},"strict":true,"type":"function","description":"Handoff + to the AgentA agent to handle the request. "},{"name":"transfer_to_agentb","parameters":{"additionalProperties":false,"type":"object","properties":{},"required":[]},"strict":true,"type":"function","description":"Handoff + to the AgentB agent to handle the request. "}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1223' + content-type: + - application/json + cookie: + - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE; + _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - Agents/Python 0.2.7 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.99.9 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.13 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//5FbLbts6EN37Kwitk0Cy5fixS4qivbt7VxdFEQgjcmizpkiVHCZNA/97 + IUrWI482QIsCRTeGPIcznDlnSM7DjLFEiWTLEoe+Li7XG7nKRbbYLNfrbJNBfjlHyHGV5pfLNF2k + KwmcCyFFXpabfJ6cNQFs+Qk5nYJY47G1c4dAKAposGy1XM7X6XKziJgnoOAbH26rWiOhaJ1K4Ied + s8E0WUnQHqMZnbMu2TITtI4GZU6OhUACpf0U9eQCJ2VN3OSDDUwgVwLZ3V7xPYMdGmJk2R6MsFIy + shdtAhV8KWygOlBB9oBmErcByVpdcNDTHSsrUDdb7Wo6zy+y83k6X56n+XmWdyzFmMmWfZwxxthD + /O3pl7wnvyyXPJJ/WcpFLuVilS3yfJ0/S34MQfc1xiDBxJJjdgP8EtcRBLcLFRqK+MNxAJoYRZtc + /HxH9PXdytzd0+GN/S/c5eu35u3/7tPgYaCKaZAD4yW6gmwReYYkLjnOGLuJXNTgQGvUUy7JhVbs + 2uGtssEXp35q8+i5rp2taio48D0WB7wfYw7BW6PMLtl2HCcopXU0WtRwEqoK3MlzxtixbUuQSPeF + EmhISYWTlvPobhXHglp7IlBC0JR0DW0djosgrGp0QCGas4u0s36hITNpXQXD/5GUcV3LWpfxLbrS + ekVNzkmFQoUq6fNuedxbxaM3BLJJD/inTfe4YwYRBXruVB2NW5a8748Hu4pH5opJ65ivkSupOCPw + B/+0BbpT1XdAAcOaRvwKCZ0fFd6qWqMjhVN7PM3tyRkbmyIU6bjbPxE/e4R2JXpyTTeMwGP/fRx8 + Eoefg3IoerImm/eWm5FLn8DTcgtwO5+M13bpdJflCAEhVEM36H/HDMS7b/Yoz1hNvGubPhv3xy+Q + 9jpKK5SU6OLt+Epty79L2/LP0pb22Op7NX30NEaooQY9XbBX3eM/FPp7Bb+KsWmnvCTize8m7/qn + ySv/LPL6t5psXWi7q50tm6hpb6zHz5oLhsOJQKE8lPo00QUPOxzevHjmhtEqW27OngKjme5hGEj4 + HsXgmU6ex8cjW7Z4Dngubj8xvBSaLIEeRV7N+3c3+OmIUCGBAIJI/+z4DQAA//8DAFuxG79gCwAA + headers: + CF-RAY: + - 96fa91bb0eaf09c9-HFA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 15 Aug 2025 17:56:33 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - traceloop + openai-processing-ms: + - '595' + openai-project: + - proj_tzz1TbPPOXaf6j9tEkVUBIAa + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '597' + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '30000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '29999624' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_ac5a5da3776763a1e744e75310bb345e + status: + code: 200 + message: OK +- request: + body: '{"include":[],"input":[{"content":"Please handle this task by delegating + to another agent.","role":"user"},{"arguments":"{}","call_id":"call_GttzG7nwytkCoQuw48EnEWrj","name":"transfer_to_agenta","type":"function_call","id":"fc_689f74d1bb5c8191a6bf34ff3713448407faccddfd4bb942","status":"completed"},{"call_id":"call_GttzG7nwytkCoQuw48EnEWrj","output":"{\"assistant\": + \"AgentA\"}","type":"function_call_output"}],"instructions":"Agent A does something.","model":"gpt-4.1","stream":false,"tools":[]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '498' + content-type: + - application/json + cookie: + - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE; + _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - Agents/Python 0.2.7 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.99.9 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.13 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//dFTBjts4DL3PV7C69DITOKkziXPrce+9FQuDlmhHG1n0StS0QTH/vrCc + OHZ3egkcPvKJfHzSrycAZY06gQoUh/r1WLWH0uwK0sfjtto2B10dd01F2KKhQheHFrU2pjVl01Tl + Tj2PBNz8Q1ruJOwjTXEdCIVMjSO2Pez3u2Oxr8qMRUFJcazR3A+OhMxU1KC+dIGTH7tq0UXKYQqB + gzqBT87lgPX3wtqQoHVxjUYJSYtlnw/52pEX+AqGKULknuRsfbeZjuzxZ81JhiS18IX8imkEhdnV + Gt36jJ4NuZG8G+Sl3GxfdsVu/1KUL9vypkvmVCf4/gQA8Cv/zoL3sZv1PlCb9cZtcdTFa4klVs2u + On6od+aQ60CZhWLEjh7An4TNoGYv5B8tLdta0d7loJ8yV+cE9J4F77p+/3sFOu6GwM0HSCY6gfqG + 8QJnjNAQeTDkqBstAsJwW9Hm/gE/rHNwRm8cgZwJAv2bKMoG/mrhygnO+EbQcyBYbhs4gCcygB7S + YFAI2Od6wXj5HGEI3AWK8RkGRxgJHAn0BBfPPz6puen329c8hwrssjYYo42CXqbkMTEnqQEDOkdu + 7RcJabLwEOjNcor1/ZbU2Qizn4bA/SC1Rn2m+kLXJRYII3vrO3W6LUxR23KQRdK4/NT3GO6VTwDv + 02XDluRaW0NebGtpdZEihTerqZYprgy1mNy0dhWFAy2HEOoHCigph7eb4hbN67111nLo8fF/Yauc + N6l26/iNQsPRynUys7GpV3Pfk45ntnoSPgmrGXi4TAkP9cJ7xRwclj2G5HV2bp7SRmzc/dFJ+Q7N + A1i/egv2h+f/xxevzjxmXp15FBarUX9/Yr68fgR8xDtv/0/UwoLuAVZfZglTXG+7J0GDgiP9+9P7 + fwAAAP//AwCfc+CEAQYAAA== + headers: + CF-RAY: + - 96fa91c02a0f09c9-HFA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 15 Aug 2025 17:56:35 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - traceloop + openai-processing-ms: + - '1282' + openai-project: + - proj_tzz1TbPPOXaf6j9tEkVUBIAa + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1284' + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '30000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '29999924' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_d1c98a5aeb66ec1b124d4c4d4ad0605e + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_tool_span_operation_name.yaml b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_tool_span_operation_name.yaml new file mode 100644 index 0000000000..4062e28876 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_tool_span_operation_name.yaml @@ -0,0 +1,283 @@ +interactions: +- request: + body: '{"include":[],"input":[{"content":"What is the weather in London?","role":"user"}],"instructions":"You + get the weather for a city using the get_weather tool.","model":"gpt-4.1","stream":false,"tools":[{"name":"get_weather","parameters":{"properties":{"city":{"title":"City","type":"string"}},"required":["city"],"title":"get_weather_args","type":"object","additionalProperties":false},"strict":true,"type":"function","description":"Gets + the current weather for a specified city."}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '482' + content-type: + - application/json + cookie: + - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE; + _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - Agents/Python 0.2.7 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.99.9 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.13 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3xUyW7bMBC9+ysInuNAcqxY9q0LWqAo0Fx6KJpAGJMjmQ1FquQwbRD43wtSthY3 + 7cWQ53G2N2/mZcEYV5LvGHfou+q23Nab9V6u8kyU+TYDLGElsnWONeaykNkWV1mey1Uh1vu63vCr + GMDuf6CgcxBrPPZ24RAIZQURyzdFsSqzYnOTME9AwUcfYdtOI6HsnfYgHhtng4lV1aA9JjM6Zx3f + MRO0TgZlzo6VRAKl/Rz15IIgZU1K8s0G1iAxOiD7hUAHdKy2jgETip5Z8Mo0CWyQqvMDslZf91W1 + 8LuygbpAFdlHNLNkEYxvKwF6XkZrJeqYv+loub7Ol6tsVSyz9TJfn6hLMfmOfV8wxthL+h1mUoth + InWZZXEi2zy/ucnK7W2Ry1LC9tWJpBD03GEKEkziIVU3wv8aQALBNaFFQwl/ueeRpHu+u+efrZHW + 3PPj+DaGrfp60+fbkpq7rc6+fvn0Y/v+w3qDb/bF+ks7ehhoU2UTrnnCjgvGHhIvHTjQGvWcV3Kh + V0Pn8EnZ4Kuz4PoCBt47Z9uOKgHigNUjPk8xh+CtUabhuxPfHOvaOpo8ivyEtgV39lwwdux1CzXS + c6UkGlK1wpkmPbonJbCi3s4l1hA08ZPircNpE4Rthw4oJHN+nZ2sv2msrLauhfH/ZKzpXc/aqeIn + dHvrFcWaeYtShZYPdfc8HqwSyRsCWT4A/m8BXqpnnJ5EL5zqknHH+EcknzZHBOfQ0MV6+Q5F5Emm + Rbv+vwgGMI6/RULnJ633c+3QkcK5Paqw73tqi10o0inLuwhfXYCnFj25qIYJeBy+j6MPd/gzKIdy + IGuaejA8TDyG9JMmK3CN59NHpzJOV3SCgJQq0gz6btp3OoqLi/pSF+kIR31dbBPZrtK26ZzdxwDZ + YOymwnPBCDiPVSoPe30+ysFDg6MqlZkdws3q6m/75Cq/jKdCHFCOjtlMv5f3NS9eA16LO6z0v0KT + JdAjWG6GvQh+vsItEkggiOGPi+MfAAAA//8DAFEaBBMhBwAA + headers: + CF-RAY: + - 96fa913b3a9209c9-HFA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 15 Aug 2025 17:56:14 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - traceloop + openai-processing-ms: + - '1154' + openai-project: + - proj_tzz1TbPPOXaf6j9tEkVUBIAa + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1156' + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '30000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '29999711' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_4e4374bb248e1a33239c0d7d3ad949e0 + status: + code: 200 + message: OK +- request: + body: "{\"data\":[{\"object\":\"trace.span\",\"id\":\"span_06251ea089b64345a73bd1fa\",\"trace_id\":\"trace_6a430ad653c745b78c89622b8e61fccc\",\"parent_id\":\"span_0e985f83783e4a9facb13938\",\"started_at\":\"2025-08-15T17:56:08.287798+00:00\",\"ended_at\":\"2025-08-15T17:56:12.476921+00:00\",\"span_data\":{\"type\":\"response\",\"response_id\":\"resp_689f74b98be8819ead89b1c91c2819b20a8b48c839804f1e\"},\"error\":null},{\"object\":\"trace.span\",\"id\":\"span_0e985f83783e4a9facb13938\",\"trace_id\":\"trace_6a430ad653c745b78c89622b8e61fccc\",\"parent_id\":null,\"started_at\":\"2025-08-15T17:56:08.287474+00:00\",\"ended_at\":\"2025-08-15T17:56:12.477388+00:00\",\"span_data\":{\"type\":\"agent\",\"name\":\"testAgent\",\"handoffs\":[],\"tools\":[],\"output_type\":\"str\"},\"error\":null},{\"object\":\"trace\",\"id\":\"trace_6549cb4b93ea47c8967199b27a04d7c0\",\"workflow_name\":\"Agent + workflow\",\"group_id\":null,\"metadata\":null},{\"object\":\"trace.span\",\"id\":\"span_dd1aefc5a00e419d8f568f10\",\"trace_id\":\"trace_6549cb4b93ea47c8967199b27a04d7c0\",\"parent_id\":\"span_50bdc65df957417a9c596a17\",\"started_at\":\"2025-08-15T17:56:12.490029+00:00\",\"ended_at\":\"2025-08-15T17:56:14.226669+00:00\",\"span_data\":{\"type\":\"response\",\"response_id\":\"resp_689f74bd210c8190ae8a2c041efe1d5d09e2011d25c4bff7\"},\"error\":null},{\"object\":\"trace.span\",\"id\":\"span_41eb9236408240b8a88180c6\",\"trace_id\":\"trace_6549cb4b93ea47c8967199b27a04d7c0\",\"parent_id\":\"span_50bdc65df957417a9c596a17\",\"started_at\":\"2025-08-15T17:56:14.226963+00:00\",\"ended_at\":\"2025-08-15T17:56:14.227365+00:00\",\"span_data\":{\"type\":\"function\",\"name\":\"get_weather\",\"input\":\"{\\\"city\\\":\\\"London\\\"}\",\"output\":\"It's + cloudy with 15\xB0C\",\"mcp_data\":null},\"error\":null}]}" + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1601' + content-type: + - application/json + cookie: + - __cf_bm=UhrfEFws9O_ZBKuSryCKFovrTxciXL8p2WJuM1K2dN8-1755280562-1.0.1.1-dIIsnsWKGJtA9W6u0MbXjq7UUseSGAthIGNSZMriLzkecTBUlPjjJFr6r0QnteF8Ul.liPTWhJI6mlCKQBREwPTAAOYdCC2ZirAu9ZrwIWA; + _cfuvid=zDtlMy4g5CGjInt8L2ecM4HeWcHtz0bFgxVbfE5vSqk-1755280562683-0.0.1.1-604800000 + host: + - api.openai.com + openai-beta: + - traces=v1 + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://api.openai.com/v1/traces/ingest + response: + body: + string: '' + headers: + CF-RAY: + - 96fa9147be745bf5-FRA + Connection: + - keep-alive + Date: + - Fri, 15 Aug 2025 17:56:15 GMT + Server: + - cloudflare + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - traceloop + openai-processing-ms: + - '201' + openai-project: + - proj_tzz1TbPPOXaf6j9tEkVUBIAa + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '205' + x-request-id: + - req_05380352daaa71309ccb911d54f73031 + status: + code: 204 + message: No Content +- request: + body: "{\"include\":[],\"input\":[{\"content\":\"What is the weather in London?\",\"role\":\"user\"},{\"arguments\":\"{\\\"city\\\":\\\"London\\\"}\",\"call_id\":\"call_B8tgP9l0UOJj9DF47eAb54Om\",\"name\":\"get_weather\",\"type\":\"function_call\",\"id\":\"fc_689f74bdf800819091133089651d8da909e2011d25c4bff7\",\"status\":\"completed\"},{\"call_id\":\"call_B8tgP9l0UOJj9DF47eAb54Om\",\"output\":\"It's + cloudy with 15\xB0C\",\"type\":\"function_call_output\"}],\"instructions\":\"You + get the weather for a city using the get_weather tool.\",\"model\":\"gpt-4.1\",\"stream\":false,\"tools\":[{\"name\":\"get_weather\",\"parameters\":{\"properties\":{\"city\":{\"title\":\"City\",\"type\":\"string\"}},\"required\":[\"city\"],\"title\":\"get_weather_args\",\"type\":\"object\",\"additionalProperties\":false},\"strict\":true,\"type\":\"function\",\"description\":\"Gets + the current weather for a specified city.\"}]}" + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '794' + content-type: + - application/json + cookie: + - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE; + _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - Agents/Python 0.2.7 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.99.9 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.13 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//dFU9c+M2EO31KzCobQ0pk5al9oo0KVKkyVwynCWwpBCDAAMsfKe50X/P + ABRJUOdrPPK+/d73wB87xriS/My4Qz82r2+n7li1KE519VaeirdXWR7xWLUv3ctJ1K/FCQ9FWcpD + Laq26478KSaw7b8oaE5ijcfJLhwCoWwgYuWxrg9vRX2sEuYJKPgYI+wwaiSUU1AL4r13NpjYVQfa + YzKjc9bxMzNB62RQZg5sJBIo7beoJxcEKWtSkb9sYD0Sowuybwh0Qcc66xgwoejKglemT2CP1MwO + ZK3eT10N8L2xgcZADdl3NJtiEYy+jQC9bWOwEnWs34/0XO3L50NxqJ+L6rms7qtLOfmZfd0xxtiP + 9He5yeD7+STdC4p0krZrq7rCrqoPh0qWL5+eJOWg64gpC3oPPa7Ar3afQGENoVlbytvapJ3Xgd9p + iU4OYIwlmFf/9Z8NqG0/Ott+gqREZ8b/zE6kDPvdGmkNU56J4Bwa0lcmtA3yyr4pujBghMOIDig4 + ZLZjZf13KIq2+LLnS/rb/ddSkTur0xTgvfIEhibn6Jic+AgOtEa9vSy5MPFxdPihbPDNTPkmnWy5 + /OjsMFIjQFywecdrjjkEb40yPT/fV8ux66yjzCmeKQwDuDlyx9htUg50SNdGSTSkOoUbVXh0H0pg + Q5OdS+wg6OlA3JN1mA+RbS4qdF/crekQ98466wZY/88IkPymrd07/kDXWq/oOtFOqjDwpe9pjxer + xLT4QJYvgP9ZAnOZLpik45WiEr1wakzGM+O/Ifmk3TtBHgTuRxRxTzJJfb+mMTCkApnkVzCef0BC + 57PRp7uO6Ejh1h6FM82d2+IUiiaifYnw0wN4H9GTi2zIwNtK3TWGO/wvKIdyI8659GLIZLWWz4Zs + wPWe506zoqd3PENAShXXDPqPfO70LO8e+ktTpM9A5NeDmsiOTSb9YjGOOfFcMALms0rlodXzZyGk + J2xhpTKbp7gsyqefgezDsLAqCVKukcWGwI9PfHn8DPgs76LpX6UmS6CzzOXboozgtyIekEACQcx/ + 293+BwAA//8DAAdiPY2lBwAA + headers: + CF-RAY: + - 96fa9146097f09c9-HFA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 15 Aug 2025 17:56:15 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - traceloop + openai-processing-ms: + - '776' + openai-project: + - proj_tzz1TbPPOXaf6j9tEkVUBIAa + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '779' + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '30000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '29999682' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f46f76aed514a020bb0ff7dc5c8f3600 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_content_parts.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_content_parts.py new file mode 100644 index 0000000000..a6d4b35e39 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_content_parts.py @@ -0,0 +1,131 @@ +""" +Unit tests for multimodal content block → OTel part conversion. + +Covers _dict_block_to_part and _object_block_to_part helpers in _hooks.py: + - data: URLs in image_url must produce BlobPart, not UriPart + - input_audio must include mime_type when format is provided +""" + +from types import SimpleNamespace + + +def _dict_block(block: dict): + from opentelemetry.instrumentation.openai_agents._hooks import _dict_block_to_part + return _dict_block_to_part(block) + + +def _object_block(block): + from opentelemetry.instrumentation.openai_agents._hooks import _object_block_to_part + return _object_block_to_part(block) + + +class TestImageUrlDataUrlBecomesBlob: + """image_url with a data: URL must produce BlobPart, not UriPart.""" + + def test_data_url_png_produces_blob_part(self): + block = {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc123=="}} + part = _dict_block(block) + assert part["type"] == "blob", f"Expected blob, got {part['type']}" + assert part["modality"] == "image" + assert part["content"] == "abc123==" + assert part.get("mime_type") == "image/png" + + def test_data_url_jpeg_produces_blob_part(self): + block = {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ=="}} + part = _dict_block(block) + assert part["type"] == "blob" + assert part["content"] == "/9j/4AAQ==" + assert part.get("mime_type") == "image/jpeg" + + def test_data_url_no_mime_produces_blob_without_mime_type(self): + """data: URL with no detectable MIME must still be BlobPart (no mime_type key).""" + block = {"type": "image_url", "image_url": {"url": "data:base64,abc123"}} + part = _dict_block(block) + assert part["type"] == "blob" + # mime_type may be absent or empty — must NOT be a URI + assert "uri" not in part + + def test_https_url_still_produces_uri_part(self): + """Regular https: URLs must remain UriPart.""" + block = {"type": "image_url", "image_url": {"url": "https://example.com/image.png"}} + part = _dict_block(block) + assert part["type"] == "uri" + assert part["modality"] == "image" + assert part["uri"] == "https://example.com/image.png" + + def test_http_url_still_produces_uri_part(self): + block = {"type": "image_url", "image_url": {"url": "http://cdn.example.com/photo.jpg"}} + part = _dict_block(block) + assert part["type"] == "uri" + assert part["uri"] == "http://cdn.example.com/photo.jpg" + + def test_object_block_data_url_produces_blob_part(self): + """SDK object path (_object_block_to_part) must also handle data: URLs.""" + image_url_obj = SimpleNamespace(url="data:image/png;base64,xyz789==") + block = SimpleNamespace(type="image_url", image_url=image_url_obj) + part = _object_block(block) + assert part["type"] == "blob", f"Expected blob, got {part['type']}" + assert part["modality"] == "image" + assert part["content"] == "xyz789==" + assert part.get("mime_type") == "image/png" + + def test_object_block_https_url_produces_uri_part(self): + image_url_obj = SimpleNamespace(url="https://example.com/photo.png") + block = SimpleNamespace(type="image_url", image_url=image_url_obj) + part = _object_block(block) + assert part["type"] == "uri" + assert part["uri"] == "https://example.com/photo.png" + + +class TestInputAudioMimeType: + """input_audio BlobPart must include mime_type when format is provided.""" + + def test_wav_format_produces_audio_wav_mime_type(self): + block = {"type": "input_audio", "input_audio": {"data": "audiobase64==", "format": "wav"}} + part = _dict_block(block) + assert part["type"] == "blob" + assert part["modality"] == "audio" + assert part["content"] == "audiobase64==" + assert part.get("mime_type") == "audio/wav", ( + f"Expected audio/wav, got {part.get('mime_type')!r}" + ) + + def test_mp3_format_produces_audio_mpeg_mime_type(self): + """MP3 format maps to audio/mpeg per IANA media types.""" + block = {"type": "input_audio", "input_audio": {"data": "mp3base64==", "format": "mp3"}} + part = _dict_block(block) + assert part["type"] == "blob" + assert part.get("mime_type") == "audio/mpeg", ( + f"Expected audio/mpeg, got {part.get('mime_type')!r}" + ) + + def test_ogg_format_produces_audio_ogg_mime_type(self): + block = {"type": "input_audio", "input_audio": {"data": "oggdata==", "format": "ogg"}} + part = _dict_block(block) + assert part.get("mime_type") == "audio/ogg" + + def test_no_format_omits_mime_type(self): + """When format is absent, mime_type should not be present (don't fabricate it).""" + block = {"type": "input_audio", "input_audio": {"data": "rawdata=="}} + part = _dict_block(block) + assert part["type"] == "blob" + assert part["modality"] == "audio" + assert "mime_type" not in part + + def test_object_block_wav_format_produces_mime_type(self): + """SDK object path (_object_block_to_part) must also include mime_type.""" + audio_obj = SimpleNamespace(data="audiobase64==", format="wav") + block = SimpleNamespace(type="input_audio", input_audio=audio_obj) + part = _object_block(block) + assert part["type"] == "blob" + assert part["modality"] == "audio" + assert part["content"] == "audiobase64==" + assert part.get("mime_type") == "audio/wav", ( + f"Expected audio/wav, got {part.get('mime_type')!r}" + ) + + def test_object_block_mp3_format_produces_mpeg_mime_type(self): + audio_obj = SimpleNamespace(data="mp3data==", format="mp3") + block = SimpleNamespace(type="input_audio", input_audio=audio_obj) + part = _object_block(block) + assert part.get("mime_type") == "audio/mpeg" diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_finish_reasons.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_finish_reasons.py new file mode 100644 index 0000000000..d669857ca6 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_finish_reasons.py @@ -0,0 +1,418 @@ +""" +Unit tests for finish_reason attribution on multi-output Responses API spans. + +Tests _extract_response_attributes directly with mock response objects +to verify per-message finish_reason mapping and top-level dedup. +""" + +import json +import pytest +from types import SimpleNamespace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) + + +@pytest.fixture +def span(): + provider = TracerProvider() + tracer = provider.get_tracer("test") + otel_span = tracer.start_span("test-span") + yield otel_span + otel_span.end() + + +def _make_response(output, finish_reason="stop", status=None, model="gpt-4o"): + kwargs = dict( + output=output, + model=model, + id="resp-test", + temperature=None, + max_output_tokens=None, + top_p=None, + frequency_penalty=None, + usage=None, + ) + if status is not None: + kwargs["status"] = status + else: + kwargs["finish_reason"] = finish_reason + return SimpleNamespace(**kwargs) + + +def _text_message(*texts, role="assistant"): + content = [ + SimpleNamespace(type="output_text", text=t) for t in texts + ] + return SimpleNamespace(type="message", content=content, role=role) + + +def _reasoning_and_text_message(reasoning_summary, text, role="assistant"): + content = [ + SimpleNamespace(type="reasoning", summary=[SimpleNamespace(text=reasoning_summary)]), + SimpleNamespace(type="output_text", text=text), + ] + return SimpleNamespace(type="message", content=content, role=role) + + +def _tool_call(name, arguments="{}", call_id="call_0"): + return SimpleNamespace( + type="function_call", name=name, arguments=arguments, call_id=call_id, + ) + + +def _extract(span, response, trace_content=True): + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + return _extract_response_attributes(span, response, trace_content) + + +def _get_output_messages(span): + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + return json.loads(raw) if raw else [] + + +def _get_finish_reasons(span): + return span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + + +class TestMultiOutputFinishReasons: + """Verify per-message and top-level finish_reasons when a Responses-API + reply contains reasoning + text + tool_call outputs.""" + + def test_reasoning_text_and_tool_call(self, span): + response = _make_response( + output=[ + _reasoning_and_text_message("let me think", "Here's what I found"), + _tool_call("get_weather", '{"city": "NYC"}', "call_abc"), + ], + finish_reason="stop", + ) + + _extract(span, response) + + msgs = _get_output_messages(span) + assert len(msgs) == 2 + + # Message 0: text+reasoning → mapped "stop" + assert msgs[0]["role"] == "assistant" + assert msgs[0]["finish_reason"] == "stop" + part_types = [p["type"] for p in msgs[0]["parts"]] + assert "reasoning" in part_types + assert "text" in part_types + + # Message 1: tool call → mapped "tool_call" (singular, per OTel spec) + assert msgs[1]["role"] == "assistant" + assert msgs[1]["finish_reason"] == "tool_call" + assert msgs[1]["parts"][0]["type"] == "tool_call" + assert msgs[1]["parts"][0]["name"] == "get_weather" + + # Top-level: deduped, order-preserved + assert _get_finish_reasons(span) == ("stop", "tool_call") + + def test_tool_calls_mapped_to_singular(self, span): + """'tool_calls' (OpenAI) must map to 'tool_call' (OTel singular).""" + response = _make_response( + output=[_tool_call("search", '{"q": "test"}', "call_1")], + finish_reason="stop", + ) + + _extract(span, response) + + msgs = _get_output_messages(span) + assert msgs[0]["finish_reason"] == "tool_call" + assert _get_finish_reasons(span) == ("tool_call",) + + def test_status_completed_mapped_to_stop(self, span): + """Responses API status='completed' must map to 'stop'.""" + response = _make_response( + output=[_text_message("Done")], + status="completed", + ) + + _extract(span, response) + + msgs = _get_output_messages(span) + assert msgs[0]["finish_reason"] == "stop" + assert _get_finish_reasons(span) == ("stop",) + + def test_multiple_text_messages_dedup_finish_reason(self, span): + """Two text outputs with the same finish_reason should dedup to one top-level entry.""" + response = _make_response( + output=[ + _text_message("Part 1"), + _text_message("Part 2"), + ], + finish_reason="stop", + ) + + _extract(span, response) + + msgs = _get_output_messages(span) + assert len(msgs) == 2 + assert msgs[0]["finish_reason"] == "stop" + assert msgs[1]["finish_reason"] == "stop" + # Deduped: only one "stop" + assert _get_finish_reasons(span) == ("stop",) + + def test_text_and_tool_call_distinct_reasons(self, span): + """Text ('stop') + tool call ('tool_call') → both in top-level tuple.""" + response = _make_response( + output=[ + _text_message("Here you go"), + _tool_call("lookup", '{"id": 1}', "call_2"), + ], + finish_reason="stop", + ) + + _extract(span, response) + + assert _get_finish_reasons(span) == ("stop", "tool_call") + + def test_trace_content_false_still_sets_finish_reasons(self, span): + """When trace_content=False, output messages are omitted but top-level + finish_reasons must still reflect all output item types, including tool calls.""" + response = _make_response( + output=[ + _text_message("secret"), + _tool_call("get_weather", '{"city": "NYC"}', "call_abc"), + ], + finish_reason="stop", + ) + + _extract(span, response, trace_content=False) + + # No output messages (content suppressed) + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes + # finish_reasons must reflect both the text (stop) and the tool call (tool_call) + assert _get_finish_reasons(span) == ("stop", "tool_call") + + def test_incomplete_response_preserves_incomplete_finish_reason(self, span): + """Responses API status='incomplete' must preserve 'incomplete', not remap to 'length'. + + 'incomplete' can be caused by a content filter, not just token limits — mapping + to 'length' would misrepresent the reason and lose information. + """ + response = _make_response( + output=[_text_message("Partial...")], + status="incomplete", + ) + + _extract(span, response) + + msgs = _get_output_messages(span) + assert msgs[0]["finish_reason"] == "incomplete" + assert _get_finish_reasons(span) == ("incomplete",) + + def test_cancelled_response_preserves_cancelled_finish_reason(self, span): + """Responses API status='cancelled' must preserve 'cancelled', not remap to 'error'. + + Cancellation is a distinct lifecycle event from an error; conflating the two + prevents consumers from distinguishing user-initiated cancels from failures. + """ + response = _make_response( + output=[_text_message("Partial")], + status="cancelled", + ) + + _extract(span, response) + + msgs = _get_output_messages(span) + assert msgs[0]["finish_reason"] == "cancelled" + assert _get_finish_reasons(span) == ("cancelled",) + + def test_failed_status_mapped_to_error(self, span): + """Responses API status='failed' must map to 'error'.""" + response = _make_response( + output=[_text_message("Oops")], + status="failed", + ) + + _extract(span, response) + + msgs = _get_output_messages(span) + assert msgs[0]["finish_reason"] == "error" + assert _get_finish_reasons(span) == ("error",) + + +class TestFinishReasonsWithoutContent: + """finish_reason granularity must be preserved when trace_content=False. + + gen_ai.response.finish_reasons is Recommended metadata, not opt-in content. + The code must iterate output items for their inherent finish reasons even + when message content is suppressed. + """ + + def test_tool_call_only_output_with_responses_api_completed_status(self, span): + """Responses API status='completed' maps to 'stop' at the response level. + But when the output contains only a function_call item, the top-level + finish_reasons must show 'tool_call', not the response-level 'stop'.""" + response = _make_response( + output=[_tool_call("get_weather", '{"city": "NYC"}', "call_abc")], + status="completed", + ) + + _extract(span, response, trace_content=False) + + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes + assert _get_finish_reasons(span) == ("tool_call",) + + def test_tool_call_only_without_content_trace_enabled(self, span): + """Same response with trace_content=True must also yield only 'tool_call'.""" + response = _make_response( + output=[_tool_call("search", '{"q": "test"}', "call_1")], + status="completed", + ) + + _extract(span, response, trace_content=True) + + msgs = _get_output_messages(span) + assert msgs[0]["finish_reason"] == "tool_call" + assert _get_finish_reasons(span) == ("tool_call",) + + def test_no_output_falls_back_to_response_level_finish_reason(self, span): + """When the response has no output items, fall back to the response-level reason.""" + response = _make_response(output=[], finish_reason="stop") + + _extract(span, response, trace_content=False) + + assert _get_finish_reasons(span) == ("stop",) + + +class TestToolCallPartOptionalFields: + """P2: Optional fields on tool_call parts must be omitted (not set to '' or null) + when not present in the source data. + + OTel ToolCallRequestPart schema: id is optional (default null), arguments is optional. + Emitting empty-string id or null arguments causes consumer correlation failures. + """ + + def test_function_call_no_call_id_omits_id_from_part(self, span): + """Responses API function_call with no call_id must omit 'id' from the part.""" + output = SimpleNamespace(type="function_call", name="search", arguments='{"q": "test"}') + # No call_id attribute — getattr fallback was "", which is wrong + response = _make_response(output=[output], finish_reason="stop") + + _extract(span, response) + + msgs = _get_output_messages(span) + assert len(msgs) == 1 + part = msgs[0]["parts"][0] + assert part["type"] == "tool_call" + assert "id" not in part or part["id"], ( + f"id must be absent or non-empty when call_id not provided, got: {part}" + ) + + def test_function_call_none_arguments_omits_arguments_key(self, span): + """Responses API function_call with no arguments must omit 'arguments' from the part.""" + output = SimpleNamespace(type="function_call", name="noop", call_id="c1") + # No arguments attribute + response = _make_response(output=[output], finish_reason="stop") + + _extract(span, response) + + msgs = _get_output_messages(span) + assert len(msgs) == 1 + part = msgs[0]["parts"][0] + assert part["type"] == "tool_call" + assert "arguments" not in part, ( + f"arguments must be omitted when None, got: {part}" + ) + + def test_function_call_with_call_id_and_arguments_still_included(self, span): + """Sanity: when call_id and arguments are present, both must be emitted.""" + output = SimpleNamespace( + type="function_call", name="get_weather", + call_id="call_99", arguments='{"city": "NYC"}', + ) + response = _make_response(output=[output], finish_reason="stop") + + _extract(span, response) + + msgs = _get_output_messages(span) + part = msgs[0]["parts"][0] + assert part.get("id") == "call_99" + assert isinstance(part.get("arguments"), dict) + assert part["arguments"]["city"] == "NYC" + + +class TestContentItemTypeClassification: + """Content items inside output.content must be dispatched by their 'type' field first. + + The hasattr(.text) fallback must NOT shadow typed items like 'reasoning' or 'refusal' + that happen to also carry a .text attribute. + """ + + def test_reasoning_item_with_text_attribute_not_misclassified_as_text(self, span): + """A content item with type='reasoning' that also has a .text attribute + must produce a 'reasoning' part, not a 'text' part.""" + from types import SimpleNamespace + + reasoning_item = SimpleNamespace( + type="reasoning", + text="This shadows the type if hasattr fires first", + summary=[SimpleNamespace(text="actual chain-of-thought")], + ) + output = SimpleNamespace(type="message", content=[reasoning_item], role="assistant") + response = _make_response(output=[output], finish_reason="stop") + + _extract(span, response) + + msgs = _get_output_messages(span) + assert len(msgs) == 1 + part = msgs[0]["parts"][0] + assert part["type"] == "reasoning", ( + f"type='reasoning' item with .text was misclassified as '{part['type']}'" + ) + + def test_refusal_item_with_text_attribute_not_misclassified_as_text(self, span): + """A content item with type='refusal' that also has a .text attribute + must produce a 'refusal' part, not a 'text' part.""" + from types import SimpleNamespace + + refusal_item = SimpleNamespace( + type="refusal", + text="I cannot do that", + refusal="I cannot do that", + ) + output = SimpleNamespace(type="message", content=[refusal_item], role="assistant") + response = _make_response(output=[output], finish_reason="stop") + + _extract(span, response) + + msgs = _get_output_messages(span) + assert len(msgs) == 1 + part = msgs[0]["parts"][0] + assert part["type"] == "refusal", ( + f"type='refusal' item with .text was misclassified as '{part['type']}'" + ) + + def test_output_text_item_still_produces_text_part(self, span): + """Sanity check: type='output_text' must still produce a 'text' part.""" + from types import SimpleNamespace + + text_item = SimpleNamespace(type="output_text", text="Hello!") + output = SimpleNamespace(type="message", content=[text_item], role="assistant") + response = _make_response(output=[output], finish_reason="stop") + + _extract(span, response) + + msgs = _get_output_messages(span) + assert msgs[0]["parts"][0]["type"] == "text" + assert msgs[0]["parts"][0]["content"] == "Hello!" + + def test_unknown_typed_item_without_text_still_handled(self, span): + """An item with an unknown type and no .text must fall through to the generic handler.""" + from types import SimpleNamespace + + unknown_item = SimpleNamespace(type="image_file", file_id="file_abc") + output = SimpleNamespace(type="message", content=[unknown_item], role="assistant") + response = _make_response(output=[output], finish_reason="stop") + + _extract(span, response) + + msgs = _get_output_messages(span) + assert len(msgs) == 1 + assert msgs[0]["parts"][0]["type"] == "image_file" diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py index 0c030334eb..7a0fce3ef1 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py @@ -1,3 +1,4 @@ +import json import pytest from unittest.mock import MagicMock from opentelemetry.instrumentation.openai_agents import ( @@ -49,24 +50,19 @@ def test_dict_content_serialization(exporter): spans = exporter.get_finished_spans() - # Look for any spans with prompt/content attributes + # Look for any spans with message content attributes for span in spans: for attr_name, attr_value in span.attributes.items(): - prompt_content_check = ("prompt" in attr_name and "content" in attr_name) or ( - "gen_ai.prompt" in attr_name and "content" in attr_name + prompt_content_check = ( + attr_name in ("gen_ai.input.messages", "gen_ai.output.messages") ) if prompt_content_check: # All content attributes should be strings, not dicts error_msg = f"Attribute {attr_name} should be a string, got {type(attr_value)}: {attr_value}" assert isinstance(attr_value, str), error_msg - # If it looks like JSON, verify it can be parsed - if attr_value.startswith("{") and attr_value.endswith("}"): - try: - json.loads(attr_value) - except json.JSONDecodeError: - # If it fails to parse, that's still fine - just not JSON - pass + # Message attributes must be valid JSON (arrays of message objects) + json.loads(attr_value) # The test passes if no dict type warnings occurred (all content attributes are strings) @@ -87,52 +83,57 @@ def test_agent_spans(exporter, test_agent): # Test agent span attributes (should NOT contain prompts/completions/usage/llm_params) assert agent_span.name == "testAgent.agent" - assert agent_span.kind == agent_span.kind.CLIENT + assert agent_span.kind == agent_span.kind.INTERNAL assert agent_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value assert agent_span.attributes[GenAIAttributes.GEN_AI_AGENT_NAME] == "testAgent" - assert agent_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "openai_agents" + assert agent_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" assert agent_span.status.status_code == StatusCode.OK # Agent span should NOT contain LLM parameters - assert SpanAttributes.LLM_REQUEST_TEMPERATURE not in agent_span.attributes - assert SpanAttributes.LLM_REQUEST_MAX_TOKENS not in agent_span.attributes - assert SpanAttributes.LLM_REQUEST_TOP_P not in agent_span.attributes - assert "openai.agent.model.frequency_penalty" not in agent_span.attributes + assert GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE not in agent_span.attributes + assert GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS not in agent_span.attributes + assert GenAIAttributes.GEN_AI_REQUEST_TOP_P not in agent_span.attributes + assert GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY not in agent_span.attributes # Find the response span (openai.response) - this should contain prompts/completions/usage response_spans = [s for s in spans if s.name == "openai.response"] assert len(response_spans) >= 1, f"Expected at least 1 openai.response span, got {len(response_spans)}" response_span = response_spans[0] - # Test response span attributes (should contain prompts/completions/usage) - # Test proper semantic conventions - assert response_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "response" - assert response_span.attributes["gen_ai.operation.name"] == "response" - assert response_span.attributes["gen_ai.system"] == "openai" + assert response_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat" + assert response_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" - # Test prompts using OpenAI semantic conventions - assert response_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user" - assert response_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == "What is AI?" + # Test input messages (JSON array with parts-based schema) + input_messages = json.loads(response_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "user" + assert "parts" in input_messages[0], "Input messages must use parts-based schema" + assert input_messages[0]["parts"][0]["type"] == "text" + assert input_messages[0]["parts"][0]["content"] == "What is AI?" # Test usage tokens assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] is not None assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] is not None - assert response_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] is not None + assert response_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] is not None assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] > 0 assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] > 0 - assert response_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] > 0 + assert response_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] > 0 - # Test completions using OpenAI semantic conventions - assert response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] is not None - assert len(response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"]) > 0 - assert response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role"] is not None + # Test output messages (JSON array with parts-based schema) + output_messages = json.loads(response_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert "parts" in output_messages[0], "Output messages must use parts-based schema" + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] is not None + assert len(output_messages[0]["parts"][0]["content"]) > 0 + assert output_messages[0]["role"] is not None + assert "finish_reason" in output_messages[0], "Output messages must have finish_reason" # Test model settings are in the response span assert response_span.attributes["gen_ai.request.temperature"] == 0.3 assert response_span.attributes["gen_ai.request.max_tokens"] == 1024 assert response_span.attributes["gen_ai.request.top_p"] == 0.2 - assert response_span.attributes["gen_ai.request.model"] is not None + assert response_span.attributes.get("gen_ai.request.model") is not None or \ + response_span.attributes.get("gen_ai.response.model") is not None # Test proper duration (should be > 0) duration_ms = (response_span.end_time - response_span.start_time) / 1_000_000 @@ -444,60 +445,97 @@ async def get_city_info(city_name: str) -> str: second_response_span = response_spans[1] # The tool call and result appear in the SECOND response span as part of conversation history - # Find the assistant message with tool call + # Parse the input messages JSON array (parts-based schema) + input_messages = json.loads( + second_response_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + ) + tool_call_found = False tool_result_found = False - for i in range(20): # Check conversation history - role_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.role" - if role_key not in second_response_span.attributes: - continue - - role = second_response_span.attributes[role_key] + for msg in input_messages: + role = msg.get("role") + parts = msg.get("parts", []) if role == "assistant" and not tool_call_found: - # Check if this assistant message has tool_calls - tool_call_name_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.name" - if tool_call_name_key in second_response_span.attributes: - tool_call_found = True - # Verify tool call attributes - assert second_response_span.attributes[tool_call_name_key] == "get_city_info", ( - f"Expected tool name 'get_city_info', got '{second_response_span.attributes[tool_call_name_key]}'" - ) - # Verify tool call ID exists - tool_call_id_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.id" - assert tool_call_id_key in second_response_span.attributes, ( - f"Tool call ID not found at {tool_call_id_key}" - ) - tool_call_id = second_response_span.attributes[tool_call_id_key] - assert len(tool_call_id) > 0, "Tool call ID should not be empty" - - # Verify arguments exist and contain city name - tool_call_args_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.arguments" - assert tool_call_args_key in second_response_span.attributes, ( - f"Tool call arguments not found at {tool_call_args_key}" - ) - arguments = second_response_span.attributes[tool_call_args_key] - assert "London" in arguments or "london" in arguments.lower(), ( - f"Expected 'London' in arguments, got: {arguments}" - ) + # Look for tool_call parts + for part in parts: + if part.get("type") == "tool_call": + tool_call_found = True + assert part["name"] == "get_city_info", ( + f"Expected tool name 'get_city_info', got '{part['name']}'" + ) + tool_call_id = part.get("id", "") + assert len(tool_call_id) > 0, "Tool call ID should not be empty" + arguments = part.get("arguments", "") + if isinstance(arguments, dict): + arguments = json.dumps(arguments) + assert "London" in arguments or "london" in arguments.lower(), ( + f"Expected 'London' in arguments, got: {arguments}" + ) + break elif role == "tool" and not tool_result_found: - tool_result_found = True - # Verify tool result attributes - content_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.content" - tool_call_id_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_call_id" - - assert content_key in second_response_span.attributes, f"Tool result content not found at {content_key}" - content = second_response_span.attributes[content_key] - assert len(content) > 0, "Tool result content should not be empty" - assert "London" in content or "9000000" in content or "United Kingdom" in content, ( - f"Expected tool result to contain city info, got: {content}" - ) + # Look for tool_call_response parts + for part in parts: + if part.get("type") == "tool_call_response": + tool_result_found = True + response_text = part.get("response", "") + assert len(response_text) > 0, "Tool result response should not be empty" + assert ( + "London" in response_text + or "9000000" in response_text + or "United Kingdom" in response_text + ), ( + f"Expected tool result to contain city info, got: {response_text}" + ) + tool_call_id = part.get("id", "") + assert len(tool_call_id) > 0, "Tool call ID should not be empty" + break + + assert tool_call_found, "No assistant message with tool_call parts found in second response span" + assert tool_result_found, "No tool message with tool_call_response parts found in second response span" - assert tool_call_id_key in second_response_span.attributes, f"Tool call ID not found at {tool_call_id_key}" - tool_call_id = second_response_span.attributes[tool_call_id_key] - assert len(tool_call_id) > 0, "Tool call ID should not be empty" - assert tool_call_found, "No assistant message with tool_calls found in second response span" - assert tool_result_found, "No tool message found in second response span" +@pytest.mark.vcr +def test_tool_span_operation_name(exporter, function_tool_agent): + """Test that tool/function spans have gen_ai.operation.name set to 'execute_tool'.""" + query = "What is the weather in London?" + + Runner.run_sync(function_tool_agent, query) + + spans = exporter.get_finished_spans() + tool_spans = [s for s in spans if s.name.endswith(".tool")] + + assert len(tool_spans) >= 1, f"Expected at least 1 tool span, found {len(tool_spans)}" + + for tool_span in tool_spans: + assert GenAIAttributes.GEN_AI_OPERATION_NAME in tool_span.attributes, ( + f"Tool span '{tool_span.name}' missing gen_ai.operation.name attribute" + ) + assert tool_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "execute_tool", ( + f"Tool span '{tool_span.name}' has incorrect gen_ai.operation.name: " + f"{tool_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}, expected 'execute_tool'" + ) + + +@pytest.mark.vcr +def test_handoff_span_operation_name(exporter, handoff_agent): + """Test that handoff spans have gen_ai.operation.name set to 'handoff'.""" + query = "Please handle this task by delegating to another agent." + + Runner.run_sync(handoff_agent, query) + + spans = exporter.get_finished_spans() + handoff_spans = [s for s in spans if ".handoff" in s.name] + + assert len(handoff_spans) >= 1, f"Expected at least 1 handoff span, found {len(handoff_spans)}" + + for handoff_span in handoff_spans: + assert GenAIAttributes.GEN_AI_OPERATION_NAME in handoff_span.attributes, ( + f"Handoff span '{handoff_span.name}' missing gen_ai.operation.name attribute" + ) + assert handoff_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "handoff", ( + f"Handoff span '{handoff_span.name}' has incorrect gen_ai.operation.name: " + f"{handoff_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}, expected 'handoff'" + ) diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py index 02a030a72f..2b27104d3a 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py @@ -11,7 +11,6 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.trace import StatusCode -from opentelemetry.semconv_ai import SpanAttributes from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) @@ -117,8 +116,7 @@ def test_speech_span_start_creates_otel_span(self, tracer_provider_and_exporter) assert "openai.realtime.speech" in span_names speech_span = next(s for s in spans if s.name == "openai.realtime.speech") - assert speech_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime" - assert speech_span.attributes["gen_ai.system"] == "openai" + assert speech_span.attributes["gen_ai.provider.name"] == "openai" assert speech_span.attributes["gen_ai.operation.name"] == "speech" assert speech_span.status.status_code == StatusCode.OK @@ -213,8 +211,7 @@ def test_transcription_span_start_creates_otel_span(self, tracer_provider_and_ex assert "openai.realtime.transcription" in span_names transcription_span = next(s for s in spans if s.name == "openai.realtime.transcription") - assert transcription_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime" - assert transcription_span.attributes["gen_ai.system"] == "openai" + assert transcription_span.attributes["gen_ai.provider.name"] == "openai" assert transcription_span.attributes["gen_ai.operation.name"] == "transcription" def test_transcription_span_captures_model_and_format(self, tracer_provider_and_exporter): @@ -306,8 +303,7 @@ def test_speech_group_span_creates_otel_span(self, tracer_provider_and_exporter) assert "openai.realtime.speech_group" in span_names speech_group_span = next(s for s in spans if s.name == "openai.realtime.speech_group") - assert speech_group_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime" - assert speech_group_span.attributes["gen_ai.system"] == "openai" + assert speech_group_span.attributes["gen_ai.provider.name"] == "openai" assert speech_group_span.attributes["gen_ai.operation.name"] == "speech_group" assert speech_group_span.status.status_code == StatusCode.OK diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py index 43d80525dc..b04b983e9e 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py @@ -1,5 +1,6 @@ """Tests for realtime session instrumentation via wrapper patching.""" +import json import pytest from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter @@ -199,10 +200,14 @@ def test_record_completion_creates_llm_span(self, tracer, tracer_provider): assert len(llm_spans) == 1 llm_span = llm_spans[0] - assert llm_span.attributes.get("gen_ai.prompt.0.role") == "user" - assert llm_span.attributes.get("gen_ai.prompt.0.content") == "What is the weather?" - assert llm_span.attributes.get("gen_ai.completion.0.role") == "assistant" - assert llm_span.attributes.get("gen_ai.completion.0.content") == "The weather is sunny." + input_msgs = json.loads(llm_span.attributes.get("gen_ai.input.messages")) + assert input_msgs[0]["role"] == "user" + assert input_msgs[0]["parts"][0]["type"] == "text" + assert input_msgs[0]["parts"][0]["content"] == "What is the weather?" + output_msgs = json.loads(llm_span.attributes.get("gen_ai.output.messages")) + assert output_msgs[0]["role"] == "assistant" + assert output_msgs[0]["parts"][0]["type"] == "text" + assert output_msgs[0]["parts"][0]["content"] == "The weather is sunny." def test_multiple_llm_spans(self, tracer, tracer_provider): """Test that multiple prompt/completion pairs create multiple LLM spans.""" @@ -229,12 +234,16 @@ def test_multiple_llm_spans(self, tracer, tracer_provider): assert len(llm_spans) == 2 # First span should have "Hello" and "Hi there!" - assert llm_spans[0].attributes.get("gen_ai.prompt.0.content") == "Hello" - assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Hi there!" + first_in = json.loads(llm_spans[0].attributes.get("gen_ai.input.messages")) + first_out = json.loads(llm_spans[0].attributes.get("gen_ai.output.messages")) + assert first_in[0]["parts"][0]["content"] == "Hello" + assert first_out[0]["parts"][0]["content"] == "Hi there!" # Second span should have "What is the weather?" and "It's sunny." - assert llm_spans[1].attributes.get("gen_ai.prompt.0.content") == "What is the weather?" - assert llm_spans[1].attributes.get("gen_ai.completion.0.content") == "It's sunny." + second_in = json.loads(llm_spans[1].attributes.get("gen_ai.input.messages")) + second_out = json.loads(llm_spans[1].attributes.get("gen_ai.output.messages")) + assert second_in[0]["parts"][0]["content"] == "What is the weather?" + assert second_out[0]["parts"][0]["content"] == "It's sunny." def test_cleanup_ends_all_spans(self, tracer, tracer_provider): """Test that cleanup ends all remaining spans.""" @@ -331,6 +340,58 @@ def test_duplicate_completion_ignored(self, tracer, tracer_provider): llm_spans = [s for s in spans if s.name == "openai.realtime"] assert len(llm_spans) == 1 + def test_agent_span_has_invoke_agent_operation_name(self, tracer, tracer_provider): + """Agent spans must set gen_ai.operation.name='invoke_agent' per OTel spec.""" + _, exporter = tracer_provider + state = RealtimeTracingState(tracer) + state.start_workflow_span("Test Agent") + state.start_agent_span("Voice Assistant") + + span = state.agent_spans["Voice Assistant"] + span.end() + + finished = exporter.get_finished_spans() + agent = next(s for s in finished if s.name == "Voice Assistant.agent") + assert agent.attributes.get("gen_ai.operation.name") == "invoke_agent" + + state.cleanup() + + def test_tool_result_structured_output_serialized_as_json(self, tracer, tracer_provider): + """Structured tool output must be JSON, not Python repr (str()).""" + import json as json_mod + + _, exporter = tracer_provider + state = RealtimeTracingState(tracer) + state.start_workflow_span("Agent") + state.start_agent_span("Agent") + state.start_tool_span("my_tool", "Agent") + state.end_tool_span("my_tool", output={"key": "value", "num": 42}) + + spans = exporter.get_finished_spans() + tool_span = next(s for s in spans if s.name == "my_tool.tool") + result = tool_span.attributes.get("gen_ai.tool.call.result") + if result is not None: + assert "'" not in result, f"Python repr detected: {result}" + parsed = json_mod.loads(result) + assert parsed == {"key": "value", "num": 42} + + state.cleanup() + + def test_seen_completions_bounded_at_1000(self, tracer, tracer_provider): + """seen_completions must not grow without bound in long sessions.""" + _, exporter = tracer_provider + state = RealtimeTracingState(tracer) + state.start_workflow_span("Agent") + state.start_agent_span("Agent") + state.record_prompt("user", "hello") + + for i in range(2000): + state.record_completion("assistant", f"unique response {i}") + + assert len(state.seen_completions) <= 1000 + + state.cleanup() + class TestRealtimeSessionWrapping: """Tests for the session wrapping functionality.""" @@ -584,7 +645,8 @@ def __init__(self, role, content): spans = exporter.get_finished_spans() llm_spans = [s for s in spans if s.name == "openai.realtime"] assert len(llm_spans) == 1 - assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Hi there!" + out_msgs = json.loads(llm_spans[0].attributes.get("gen_ai.output.messages")) + assert out_msgs[0]["parts"][0]["content"] == "Hi there!" def test_response_done_dict_captures_usage_and_completion(self, tracer, tracer_provider): """Test that response.done with dict data captures usage and completions.""" @@ -646,7 +708,8 @@ def test_response_done_dict_captures_usage_and_completion(self, tracer, tracer_p llm_span = llm_spans[0] assert llm_span.attributes.get("gen_ai.usage.input_tokens") == 42 assert llm_span.attributes.get("gen_ai.usage.output_tokens") == 18 - assert llm_span.attributes.get("gen_ai.completion.0.content") == "It is sunny today." + out_msgs = json.loads(llm_span.attributes.get("gen_ai.output.messages")) + assert out_msgs[0]["parts"][0]["content"] == "It is sunny today." def test_response_done_without_usage_still_captures_completion(self, tracer, tracer_provider): """Test that completions are captured even when usage is absent from response.done.""" @@ -694,5 +757,6 @@ def test_response_done_without_usage_still_captures_completion(self, tracer, tra spans = exporter.get_finished_spans() llm_spans = [s for s in spans if s.name == "openai.realtime"] assert len(llm_spans) == 1 - assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Why did the chicken cross the road?" + output = json.loads(llm_spans[0].attributes.get("gen_ai.output.messages")) + assert output[0]["parts"][0]["content"] == "Why did the chicken cross the road?" assert llm_spans[0].attributes.get("gen_ai.usage.input_tokens") is None diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py index f80a908e36..a1d7f15a4f 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py @@ -272,30 +272,30 @@ async def test_recipe_agents_hierarchy(exporter, recipe_agents): # Verify each response span has prompts, completions, and usage for i, response_span in enumerate(response_spans): - # Check for prompts - has_prompt = any(key.startswith("gen_ai.prompt.") for key in response_span.attributes.keys()) + # Check for input messages (new JSON array format) + has_prompt = "gen_ai.input.messages" in response_span.attributes assert has_prompt, ( f"Response span {i} should have prompt attributes, attributes: {dict(response_span.attributes)}" ) - # Check for completions - has_completion = any(key.startswith("gen_ai.completion.") for key in response_span.attributes.keys()) + # Check for output messages (new JSON array format) + has_completion = "gen_ai.output.messages" in response_span.attributes assert has_completion, ( f"Response span {i} should have completion attributes, attributes: {dict(response_span.attributes)}" ) # Check for usage has_usage = any( - key.startswith("gen_ai.usage.") or key.startswith("llm.usage.") for key in response_span.attributes.keys() + key.startswith("gen_ai.usage.") for key in response_span.attributes.keys() ) assert has_usage, ( f"Response span {i} should have usage attributes, attributes: {dict(response_span.attributes)}" ) # Check specific expected attributes - assert "gen_ai.system" in response_span.attributes, f"Response span {i} should have gen_ai.system" - assert response_span.attributes["gen_ai.system"] == "openai", ( - f"Response span {i} gen_ai.system should be 'openai'" + assert "gen_ai.provider.name" in response_span.attributes, f"Response span {i} should have gen_ai.provider.name" + assert response_span.attributes["gen_ai.provider.name"] == "openai", ( + f"Response span {i} gen_ai.provider.name should be 'openai'" ) pass # Validation passed diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_compliance.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_compliance.py new file mode 100644 index 0000000000..35a01e3380 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_compliance.py @@ -0,0 +1,8 @@ +# ruff: noqa: F401, F403 +""" +Semconv compliance tests re-used from opentelemetry-semantic-conventions-ai. + +Ensures the installed semconv package has the expected constant values. +To add more compliance checks, update _testing.py in that package — not here. +""" +from opentelemetry.semconv_ai._testing import * diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_messages.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_messages.py new file mode 100644 index 0000000000..35fb7dd5c4 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_messages.py @@ -0,0 +1,3635 @@ +""" +Comprehensive OTel GenAI semconv compliance tests for openai-agents instrumentation. + +Tests validate that all message formatting, attribute names, and values conform to +the OTel GenAI semantic conventions (parts-based schema, v1.40.0+). + +Reference schemas: semconv-schemas/gen-ai-input-messages.json, gen-ai-output-messages.json +""" + +import json +import pytest +from unittest.mock import MagicMock, patch +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def tracer_and_exporter(): + """Create a tracer provider with in-memory exporter for unit tests.""" + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + return provider.get_tracer("test"), exporter + + +@pytest.fixture +def processor(tracer_and_exporter): + """Create an OpenTelemetryTracingProcessor with a fresh tracer.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + + tracer, _ = tracer_and_exporter + return OpenTelemetryTracingProcessor(tracer) + + +# --------------------------------------------------------------------------- +# Helper: mock span data objects +# --------------------------------------------------------------------------- + +class MockAgentSpan: + def __init__(self, span_data, trace_id="test-trace", error=None): + self.span_data = span_data + self.trace_id = trace_id + self.error = error + + +class MockGenerationSpanData: + """Mock for agents.GenerationSpanData.""" + + def __init__(self, input=None, response=None): + self.input = input or [] + self.response = response + + +class ResponseSpanData: + """Lightweight stub whose __name__ is 'ResponseSpanData' (no MagicMock mutation).""" + + def __init__(self, input=None, response=None): + self.input = input or [] + self.response = response + + +class MockResponseOutput: + """Mock for a response output item with text content.""" + + def __init__(self, role="assistant", content=None, text=None, name=None, + call_id=None, arguments=None, type=None): + self.role = role + self.content = content + self.text = text + self.name = name + self.call_id = call_id + self.arguments = arguments + if type is None and content is not None: + self.type = "message" + elif type is None and call_id is not None: + self.type = "function_call" + else: + self.type = type + + +class MockContentItem: + """Mock for a content item inside ResponseOutputMessage.""" + + def __init__(self, text=None): + self.text = text + + +class MockUsage: + def __init__(self, input_tokens=10, output_tokens=20, total_tokens=30): + self.input_tokens = input_tokens + self.output_tokens = output_tokens + self.total_tokens = total_tokens + self.prompt_tokens = None + self.completion_tokens = None + + +class MockResponse: + """Mock for the response object from GenerationSpanData.""" + + def __init__(self, output=None, model=None, temperature=None, + max_output_tokens=None, top_p=None, frequency_penalty=None, + usage=None, finish_reason=None, id=None, tools=None): + self.output = output or [] + self.model = model + self.temperature = temperature + self.max_output_tokens = max_output_tokens + self.top_p = top_p + self.frequency_penalty = frequency_penalty + self.usage = usage + self.finish_reason = finish_reason + self.id = id + self.tools = tools or [] + + +class MockFunction: + """Mock for a tool function definition.""" + + def __init__(self, name="", description="", parameters=None): + self.name = name + self.description = description + self.parameters = parameters + + +class MockTool: + """Mock for a tool definition with function wrapper.""" + + def __init__(self, function=None, type="function"): + self.function = function + self.type = type + + +# --------------------------------------------------------------------------- +# P1-1: gen_ai.provider.name replaces gen_ai.system +# --------------------------------------------------------------------------- + +class TestProviderName: + """Verify gen_ai.provider.name is used instead of deprecated gen_ai.system.""" + + def test_generation_span_uses_provider_name(self, tracer_and_exporter): + """GenerationSpanData spans must use gen_ai.provider.name, not gen_ai.system.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import GenerationSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-pn-1" + proc.on_trace_start(mock_trace) + + gen_data = GenerationSpanData(model="gpt-4o", model_config={}) + span = MockAgentSpan(gen_data, trace_id="test-pn-1") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + response_span = next((s for s in spans if s.name == "openai.response"), None) + assert response_span is not None, "Expected openai.response span" + + attrs = dict(response_span.attributes) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs, ( + f"Expected gen_ai.provider.name attribute, got keys: {list(attrs.keys())}" + ) + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + def test_agent_span_uses_provider_name_openai(self, tracer_and_exporter): + """Agent spans must use gen_ai.provider.name = 'openai', NOT 'openai_agents'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import AgentSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-pn-2" + proc.on_trace_start(mock_trace) + + agent_data = AgentSpanData(name="TestAgent", handoffs=[], tools=[], output_type="") + span = MockAgentSpan(agent_data, trace_id="test-pn-2") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + agent_span = next((s for s in spans if s.name == "TestAgent.agent"), None) + assert agent_span is not None, "Expected TestAgent.agent span" + + attrs = dict(agent_span.attributes) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai", ( + f"Agent span provider name should be 'openai', got '{attrs.get(GenAIAttributes.GEN_AI_PROVIDER_NAME)}'" + ) + + def test_workflow_span_uses_provider_name(self, tracer_and_exporter): + """Workflow spans must use gen_ai.provider.name.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-pn-3" + proc.on_trace_start(mock_trace) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + wf_span = next((s for s in spans if s.name == "Agent Workflow"), None) + assert wf_span is not None + + attrs = dict(wf_span.attributes) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + def test_tool_span_uses_provider_name(self, tracer_and_exporter): + """Tool spans must use gen_ai.provider.name.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import FunctionSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-pn-4" + proc.on_trace_start(mock_trace) + + func_data = FunctionSpanData(name="get_weather", input="", output="") + span = MockAgentSpan(func_data, trace_id="test-pn-4") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + tool_span = next((s for s in spans if s.name == "get_weather.tool"), None) + assert tool_span is not None + + attrs = dict(tool_span.attributes) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + def test_handoff_span_uses_provider_name(self, tracer_and_exporter): + """Handoff spans must use gen_ai.provider.name.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import HandoffSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-pn-5" + proc.on_trace_start(mock_trace) + + handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB") + span = MockAgentSpan(handoff_data, trace_id="test-pn-5") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + handoff_span = next((s for s in spans if "handoff" in s.name), None) + assert handoff_span is not None + + attrs = dict(handoff_span.attributes) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + +# --------------------------------------------------------------------------- +# P1-2 / P1-3: Input & Output messages use parts-based schema +# --------------------------------------------------------------------------- + +class TestInputMessagePartsFormat: + """Verify gen_ai.input.messages uses {role, parts} schema.""" + + def test_text_message_has_parts(self, tracer_and_exporter): + """Simple text message must have parts: [{type: 'text', content: '...'}].""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, exporter = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"role": "user", "content": "Hello world"}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None, "gen_ai.input.messages should be set" + messages = json.loads(raw) + + assert len(messages) == 1 + msg = messages[0] + assert msg["role"] == "user" + assert "parts" in msg, f"Message must have 'parts' key, got keys: {list(msg.keys())}" + assert "content" not in msg, "Top-level 'content' key should NOT be present (use parts instead)" + + parts = msg["parts"] + assert len(parts) == 1 + assert parts[0]["type"] == "text" + assert parts[0]["content"] == "Hello world" + + span.end() + + def test_tool_call_message_has_parts(self, tracer_and_exporter): + """Assistant tool call message must use parts with type 'tool_call'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "tool_calls": [{ + "id": "call_123", + "function": { + "name": "get_weather", + "arguments": '{"city": "NYC"}' + } + }] + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + msg = messages[0] + + assert msg["role"] == "assistant" + assert "parts" in msg + assert "tool_calls" not in msg, "Top-level 'tool_calls' must NOT be present (use parts)" + + tool_part = msg["parts"][0] + assert tool_part["type"] == "tool_call" + assert tool_part["id"] == "call_123" + assert tool_part["name"] == "get_weather" + # Arguments must be parsed object, not string + assert isinstance(tool_part["arguments"], dict), ( + f"arguments must be dict (parsed object), got {type(tool_part['arguments'])}" + ) + assert tool_part["arguments"] == {"city": "NYC"} + + span.end() + + def test_tool_result_message_has_parts(self, tracer_and_exporter): + """Tool result message must use parts with type 'tool_call_response'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "tool", + "tool_call_id": "call_123", + "content": "72°F, sunny" + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + msg = messages[0] + + assert msg["role"] == "tool" + assert "parts" in msg + assert "content" not in msg, "Top-level 'content' must NOT be present for tool messages" + assert "tool_call_id" not in msg, "Top-level 'tool_call_id' must NOT be present" + + tool_part = msg["parts"][0] + assert tool_part["type"] == "tool_call_response" + assert tool_part["id"] == "call_123" + assert tool_part["response"] == "72°F, sunny" + + span.end() + + def test_agents_sdk_function_call_format(self, tracer_and_exporter): + """Agents SDK function_call type messages must convert to tool_call parts.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + # Agents SDK format uses dict with 'type' key, no 'role' + input_data = [{ + "type": "function_call", + "id": "fc_1", + "name": "search", + "arguments": '{"q": "test"}', + }] + + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + msg = messages[0] + + assert msg["role"] == "assistant" + assert "parts" in msg + tool_part = msg["parts"][0] + assert tool_part["type"] == "tool_call" + assert tool_part["name"] == "search" + assert isinstance(tool_part["arguments"], dict) + assert tool_part["arguments"]["q"] == "test" + + span.end() + + def test_agents_sdk_function_call_output_format(self, tracer_and_exporter): + """Agents SDK function_call_output type must convert to tool_call_response parts.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + # Agents SDK format uses dict with 'type' key, no 'role' + input_data = [{ + "type": "function_call_output", + "call_id": "fc_1", + "output": "Result data", + }] + + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + msg = messages[0] + + assert msg["role"] == "tool" + assert "parts" in msg + tool_resp_part = msg["parts"][0] + assert tool_resp_part["type"] == "tool_call_response" + assert tool_resp_part["id"] == "fc_1" + assert tool_resp_part["response"] == "Result data" + + span.end() + + def test_list_content_with_tool_calls_preserves_structure(self, tracer_and_exporter): + """List content + tool_calls must preserve structured parts, not stringify.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "content": [ + {"type": "text", "text": "Let me check"}, + {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}, + ], + "tool_calls": [{ + "id": "call_1", + "function": { + "name": "get_weather", + "arguments": '{"city": "NYC"}' + } + }] + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + msg = messages[0] + + assert msg["role"] == "assistant" + assert "parts" in msg + + parts = msg["parts"] + # Expect: text part, uri part (image), tool_call part + assert len(parts) == 3, f"Expected 3 parts (text + image + tool_call), got {len(parts)}: {parts}" + + text_part = parts[0] + assert text_part["type"] == "text" + assert text_part["content"] == "Let me check" + + image_part = parts[1] + assert image_part["type"] == "uri", ( + f"image_url must map to 'uri' part, got type '{image_part['type']}'" + ) + assert image_part["modality"] == "image" + assert image_part["uri"] == "https://example.com/img.png" + + tool_part = parts[2] + assert tool_part["type"] == "tool_call" + assert tool_part["name"] == "get_weather" + + span.end() + + def test_string_content_with_tool_calls(self, tracer_and_exporter): + """String content + tool_calls should produce text part + tool_call part.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "content": "Let me look that up", + "tool_calls": [{ + "id": "call_2", + "function": { + "name": "search", + "arguments": '{"q": "test"}' + } + }] + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + msg = messages[0] + + parts = msg["parts"] + assert len(parts) == 2, f"Expected 2 parts (text + tool_call), got {len(parts)}" + assert parts[0]["type"] == "text" + assert parts[0]["content"] == "Let me look that up" + assert parts[1]["type"] == "tool_call" + + span.end() + + def test_none_content_message(self, tracer_and_exporter): + """Messages with None content should still produce valid parts.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"role": "assistant", "content": None}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + if raw: + messages = json.loads(raw) + if messages: + msg = messages[0] + assert "parts" in msg + + span.end() + + def test_empty_input_data(self, tracer_and_exporter): + """Empty input data should not set the attribute.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + _extract_prompt_attributes(span, [], trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is None, "Empty input should not set gen_ai.input.messages" + + span.end() + + +class TestOutputMessagePartsFormat: + """Verify gen_ai.output.messages uses {role, parts, finish_reason} schema.""" + + def test_text_output_has_parts(self, tracer_and_exporter): + """Text output must be wrapped in parts: [{type: 'text', content: '...'}].""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = MockContentItem(text="Hello!") + output_item = MockResponseOutput(role="assistant", content=[content_item]) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + finish_reason="stop", + id="resp_123", + ) + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + + msg = messages[0] + assert msg["role"] == "assistant" + assert "parts" in msg, f"Output message must have 'parts', got keys: {list(msg.keys())}" + assert "content" not in msg, "Top-level 'content' must NOT be present" + + parts = msg["parts"] + assert len(parts) >= 1 + assert parts[0]["type"] == "text" + assert parts[0]["content"] == "Hello!" + + span.end() + + def test_tool_call_output_has_parts(self, tracer_and_exporter): + """Tool call output must use parts with type 'tool_call'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + output_item = MockResponseOutput( + name="get_weather", call_id="call_456", arguments='{"city": "London"}' + ) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + ) + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + + msg = messages[0] + assert msg["role"] == "assistant" + assert "parts" in msg + assert "tool_calls" not in msg, "Top-level 'tool_calls' must NOT be present" + + tool_part = msg["parts"][0] + assert tool_part["type"] == "tool_call" + assert tool_part["name"] == "get_weather" + assert tool_part["id"] == "call_456" + + span.end() + + def test_output_finish_reason_present(self, tracer_and_exporter): + """Output messages must have finish_reason at message level.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = MockContentItem(text="Done") + output_item = MockResponseOutput(role="assistant", content=[content_item]) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + finish_reason="stop", + ) + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + + msg = messages[0] + assert "finish_reason" in msg, "finish_reason is required per schema" + assert msg["finish_reason"] == "stop" + + span.end() + + def test_output_finish_reason_empty_when_unknown(self, tracer_and_exporter): + """finish_reason must be '' (not fabricated 'stop') when unknown.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = MockContentItem(text="Done") + output_item = MockResponseOutput(role="assistant", content=[content_item]) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + finish_reason=None, + ) + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + + msg = messages[0] + assert "finish_reason" in msg, "finish_reason must always be present (required by schema)" + # When finish_reason is unknown, it should be empty string, NOT fabricated "stop" + assert msg["finish_reason"] == "", ( + f"finish_reason should be '' when unknown, got '{msg['finish_reason']}'" + ) + + span.end() + + def test_message_with_empty_content_and_name_not_tool_call(self, tracer_and_exporter): + """ResponseOutputMessage with empty content + participant name must not become a tool call. + + Semconv: ToolCallRequestPart.name MUST identify a tool, not a participant. + """ + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + from types import SimpleNamespace + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + output = SimpleNamespace( + type="message", content=[], name="CustomerServiceBot", role="assistant", + ) + response = SimpleNamespace( + temperature=None, max_output_tokens=None, top_p=None, + model=None, id=None, frequency_penalty=None, + finish_reason=None, status="completed", + output=[output], usage=None, + ) + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + if raw: + messages = json.loads(raw) + for msg in messages: + for part in msg.get("parts", []): + assert part.get("type") != "tool_call", ( + "Participant name was misclassified as tool call" + ) + + span.end() + + +# --------------------------------------------------------------------------- +# P1-4: Arguments parsed as objects +# --------------------------------------------------------------------------- + +class TestArgumentsParsing: + """Verify tool call arguments are parsed to objects, not kept as strings.""" + + def test_string_arguments_parsed_to_dict(self, tracer_and_exporter): + """JSON string arguments must be parsed to dict.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "tool_calls": [{ + "id": "call_1", + "function": { + "name": "search", + "arguments": '{"query": "weather", "limit": 5}' + } + }] + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + tool_part = messages[0]["parts"][0] + + assert isinstance(tool_part["arguments"], dict), ( + f"Arguments should be parsed to dict, got {type(tool_part['arguments'])}" + ) + assert tool_part["arguments"]["query"] == "weather" + assert tool_part["arguments"]["limit"] == 5 + + span.end() + + def test_dict_arguments_kept_as_dict(self, tracer_and_exporter): + """Dict arguments should stay as dict.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "tool_calls": [{ + "id": "call_1", + "function": { + "name": "search", + "arguments": {"query": "test"} + } + }] + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + tool_part = messages[0]["parts"][0] + + assert isinstance(tool_part["arguments"], dict) + assert tool_part["arguments"]["query"] == "test" + + span.end() + + def test_invalid_json_arguments_fallback(self, tracer_and_exporter): + """Invalid JSON string arguments should have best-effort fallback.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "tool_calls": [{ + "id": "call_1", + "function": { + "name": "search", + "arguments": "not valid json {" + } + }] + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + tool_part = messages[0]["parts"][0] + + # Should not crash, arguments should be present in some form + assert "arguments" in tool_part + + span.end() + + +# --------------------------------------------------------------------------- +# P1-5 / P1-6: Finish reasons +# --------------------------------------------------------------------------- + +class TestFinishReasons: + """Verify finish reason mapping and top-level attribute.""" + + def test_finish_reasons_top_level_attribute(self, tracer_and_exporter): + """gen_ai.response.finish_reasons must be set as top-level span array.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = MockContentItem(text="Done") + output_item = MockResponseOutput(role="assistant", content=[content_item]) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + finish_reason="stop", + ) + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None, ( + "gen_ai.response.finish_reasons must be set as top-level span attribute" + ) + assert isinstance(finish_reasons, (list, tuple)) + assert "stop" in finish_reasons + + span.end() + + def test_finish_reasons_tool_calls_mapped_to_singular(self, tracer_and_exporter): + """OpenAI 'tool_calls' (plural) must map to 'tool_call' (singular).""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + output_item = MockResponseOutput(name="search", call_id="c1", arguments="{}") + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + finish_reason="tool_calls", + ) + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None + + # Must be singular "tool_call", not plural "tool_calls" + assert "tool_call" in finish_reasons, ( + f"Expected 'tool_call' (singular), got {finish_reasons}" + ) + assert "tool_calls" not in finish_reasons + + span.end() + + def test_finish_reasons_none_omits_attribute(self, tracer_and_exporter): + """When finish_reason is None, top-level attr should be omitted (not fabricated).""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = MockContentItem(text="Done") + output_item = MockResponseOutput(role="assistant", content=[content_item]) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + finish_reason=None, + ) + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + # When None, attribute should be omitted entirely + if finish_reasons is not None: + assert "stop" not in finish_reasons, "Must NOT fabricate 'stop' when finish_reason is None" + + span.end() + + def test_finish_reasons_set_without_prompts(self, tracer_and_exporter): + """finish_reasons must be set even when should_send_prompts() is False.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = MockContentItem(text="Done") + output_item = MockResponseOutput(role="assistant", content=[content_item]) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + finish_reason="stop", + ) + + # trace_content=False simulates should_send_prompts() returning False + _extract_response_attributes(span, response, trace_content=False) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None, ( + "gen_ai.response.finish_reasons must be set even when content tracing is disabled" + ) + + span.end() + + def test_tool_call_top_level_matches_per_message(self, tracer_and_exporter): + """Top-level finish_reasons must say 'tool_call' when output contains tool calls. + + Semconv: gen_ai.response.finish_reasons corresponds to each generation. + If the model stopped to emit a tool call, both levels must agree. + """ + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + from types import SimpleNamespace + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + tool_output = SimpleNamespace( + type="function_call", + content=None, + name="get_weather", + arguments='{"city": "London"}', + call_id="call_123", + ) + response = SimpleNamespace( + temperature=None, max_output_tokens=None, top_p=None, + model=None, id=None, frequency_penalty=None, + finish_reason=None, status="completed", + output=[tool_output], usage=None, + ) + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None, "Missing gen_ai.response.finish_reasons" + assert "tool_call" in finish_reasons, ( + f"Expected 'tool_call' in finish_reasons, got {finish_reasons}" + ) + + span.end() + + +# --------------------------------------------------------------------------- +# P1-7: Operation name +# --------------------------------------------------------------------------- + +class TestOperationName: + """Verify gen_ai.operation.name uses well-known OTel values.""" + + def test_generation_span_operation_name_is_chat(self, tracer_and_exporter): + """GenerationSpanData must use operation name 'chat'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import GenerationSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-op-1" + proc.on_trace_start(mock_trace) + + gen_data = GenerationSpanData(model="gpt-4o", model_config={}) + span = MockAgentSpan(gen_data, trace_id="test-op-1") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + resp_span = next((s for s in spans if s.name == "openai.response"), None) + assert resp_span is not None + + assert resp_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", ( + f"Expected 'chat', got '{resp_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]}'" + ) + + def test_response_span_data_operation_name_is_chat(self, tracer_and_exporter): + """ResponseSpanData (Responses API) must use 'chat', same as GenerationSpanData. + + 'generate_content' is the GCP/Gemini well-known value and must not be used for + OpenAI's Responses API, which is a chat completion surface. + """ + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-op-2" + proc.on_trace_start(mock_trace) + + response_data = ResponseSpanData(input=[], response=None) + + span = MockAgentSpan(response_data, trace_id="test-op-2") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + resp_span = next((s for s in spans if s.name == "openai.response"), None) + assert resp_span is not None + + op_name = resp_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) + assert op_name == "chat", ( + f"ResponseSpanData must emit 'chat', got '{op_name}'" + ) + + +# --------------------------------------------------------------------------- +# P2-1 / P2-2: Response model and ID +# --------------------------------------------------------------------------- + +class TestResponseAttributes: + """Verify recommended response attributes are set.""" + + def test_response_model_set(self, tracer_and_exporter): + """gen_ai.response.model should be set from response.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + model="gpt-4o-2024-08-06", + usage=MockUsage(), + ) + + _extract_response_attributes(span, response, trace_content=True) + + assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in span.attributes, ( + "gen_ai.response.model should be set" + ) + assert span.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] == "gpt-4o-2024-08-06" + + span.end() + + def test_response_id_set(self, tracer_and_exporter): + """gen_ai.response.id should be set from response.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + model="gpt-4o", + usage=MockUsage(), + id="resp_abc123", + ) + + _extract_response_attributes(span, response, trace_content=True) + + assert GenAIAttributes.GEN_AI_RESPONSE_ID in span.attributes, ( + "gen_ai.response.id should be set" + ) + assert span.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] == "resp_abc123" + + span.end() + + def test_frequency_penalty_set_on_span(self, tracer_and_exporter): + """gen_ai.request.frequency_penalty should be set as span attribute.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + model="gpt-4o", + frequency_penalty=0.5, + usage=MockUsage(), + ) + + _extract_response_attributes(span, response, trace_content=True) + + assert GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY in span.attributes, ( + "gen_ai.request.frequency_penalty should be set on span" + ) + assert span.attributes[GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + + span.end() + + def test_response_model_does_not_overwrite_request_model(self, tracer_and_exporter): + """response.model must only set gen_ai.response.model, not gen_ai.request.model. + + Semconv: gen_ai.request.model (alias, e.g. 'gpt-4o') and + gen_ai.response.model (served, e.g. 'gpt-4o-2024-08-06') are distinct. + """ + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + from types import SimpleNamespace + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + span.set_attribute(GenAIAttributes.GEN_AI_REQUEST_MODEL, "gpt-4o") + + response = SimpleNamespace( + temperature=None, max_output_tokens=None, top_p=None, + model="gpt-4o-2024-08-06", id=None, frequency_penalty=None, + finish_reason=None, status="completed", output=[], usage=None, + ) + + _extract_response_attributes(span, response, trace_content=True) + + assert span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "gpt-4o", ( + "response.model must not overwrite gen_ai.request.model" + ) + assert span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "gpt-4o-2024-08-06" + + span.end() + + +# --------------------------------------------------------------------------- +# P2-7: Tool definitions preserve full format +# --------------------------------------------------------------------------- + +class TestToolDefinitions: + """Verify tool definitions preserve the source system's representation.""" + + def test_tool_definitions_preserve_type_wrapper(self, tracer_and_exporter): + """Tool definitions should preserve the 'type: function' wrapper.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import GenerationSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-td-1" + proc.on_trace_start(mock_trace) + + gen_data = GenerationSpanData(model="gpt-4o", model_config={}) + gen_data.input = [] + + # Create response with tools + func = MockFunction(name="search", description="Search for data", parameters={"type": "object"}) + tool = MockTool(function=func, type="function") + gen_data.response = MockResponse( + model="gpt-4o", + tools=[tool], + usage=MockUsage(), + ) + + span = MockAgentSpan(gen_data, trace_id="test-td-1") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + resp_span = next((s for s in spans if s.name == "openai.response"), None) + assert resp_span is not None + + raw_defs = resp_span.attributes.get(GenAIAttributes.GEN_AI_TOOL_DEFINITIONS) + assert raw_defs is not None, "gen_ai.tool.definitions must be set when tools are present" + defs = json.loads(raw_defs) + assert len(defs) >= 1 + tool_def = defs[0] + # Per spec: preserve source system's representation + assert "type" in tool_def, "Tool definition should preserve 'type' field" + assert tool_def["type"] == "function" + assert "function" in tool_def, "Tool definition should preserve 'function' wrapper" + + +# --------------------------------------------------------------------------- +# P2-5: Realtime messages parts format +# --------------------------------------------------------------------------- + +class TestRealtimeMessageFormat: + """Verify realtime LLM span messages use parts-based format.""" + + def test_realtime_llm_span_input_uses_parts(self): + """Realtime input messages must use parts-based format.""" + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + + state = RealtimeTracingState(tracer) + state.start_workflow_span("TestAgent") + state.start_agent_span("TestAgent") + + state.record_prompt("user", "What is the weather?") + + with patch( + "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts", + return_value=True, + ): + state.create_llm_span("It's sunny!") + + state.cleanup() + state.end_workflow_span() + + spans = exporter.get_finished_spans() + llm_span = next((s for s in spans if s.name == "openai.realtime"), None) + assert llm_span is not None + + raw_input = llm_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw_input is not None, "gen_ai.input.messages must be set on realtime LLM span" + messages = json.loads(raw_input) + msg = messages[0] + assert "parts" in msg, f"Realtime input must use parts format, got keys: {list(msg.keys())}" + + def test_realtime_llm_span_output_uses_parts(self): + """Realtime output messages must use parts-based format.""" + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + + state = RealtimeTracingState(tracer) + state.start_workflow_span("TestAgent") + state.start_agent_span("TestAgent") + + state.record_prompt("user", "Hello") + + with patch( + "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts", + return_value=True, + ): + state.create_llm_span("Hi there!") + + state.cleanup() + state.end_workflow_span() + + spans = exporter.get_finished_spans() + llm_span = next((s for s in spans if s.name == "openai.realtime"), None) + assert llm_span is not None + + raw_output = llm_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw_output is not None, "gen_ai.output.messages must be set on realtime LLM span" + messages = json.loads(raw_output) + msg = messages[0] + assert "parts" in msg, f"Realtime output must use parts format, got keys: {list(msg.keys())}" + + def test_realtime_does_not_fabricate_stop(self): + """Realtime must NOT fabricate finish_reason 'stop'.""" + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + + state = RealtimeTracingState(tracer) + state.start_workflow_span("TestAgent") + state.start_agent_span("TestAgent") + + state.record_prompt("user", "Test") + + with patch( + "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts", + return_value=True, + ): + state.create_llm_span("Response") + + state.cleanup() + state.end_workflow_span() + + spans = exporter.get_finished_spans() + llm_span = next((s for s in spans if s.name == "openai.realtime"), None) + assert llm_span is not None + + raw_output = llm_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw_output is not None, "gen_ai.output.messages must be set on realtime LLM span" + messages = json.loads(raw_output) + msg = messages[0] + # finish_reason should be empty string, not fabricated "stop" + fr = msg.get("finish_reason") + assert fr == "", ( + f"Realtime should not fabricate finish_reason, got '{fr}'" + ) + + +# --------------------------------------------------------------------------- +# Realtime operation name +# --------------------------------------------------------------------------- + +class TestRealtimeOperationName: + """Verify realtime spans set gen_ai.operation.name.""" + + def test_realtime_llm_span_operation_name(self): + """Realtime LLM span must set gen_ai.operation.name.""" + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + + state = RealtimeTracingState(tracer) + state.start_workflow_span("TestAgent") + state.start_agent_span("TestAgent") + + state.record_prompt("user", "Hello") + + with patch( + "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts", + return_value=True, + ): + state.create_llm_span("Hi there!") + + state.cleanup() + state.end_workflow_span() + + spans = exporter.get_finished_spans() + llm_span = next((s for s in spans if s.name == "openai.realtime"), None) + assert llm_span is not None + + op_name = llm_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) + assert op_name is not None, "gen_ai.operation.name must be set on realtime LLM span" + # "realtime" is a custom extension (no well-known OTel equivalent); + # lock the current value so changes are intentional. + assert op_name == "realtime", ( + f"Expected 'realtime' operation name, got '{op_name}'" + ) + + def test_realtime_audio_span_operation_name(self): + """Realtime audio span must set gen_ai.operation.name.""" + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + + state = RealtimeTracingState(tracer) + state.start_workflow_span("TestAgent") + state.start_agent_span("TestAgent") + + state.start_audio_span("item-1", 0) + state.end_audio_span("item-1", 0) + + state.cleanup() + state.end_workflow_span() + + spans = exporter.get_finished_spans() + audio_span = next( + (s for s in spans if s.name == "openai.realtime" and + s.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "realtime"), + None, + ) + assert audio_span is not None, "Audio span must exist with operation name 'realtime'" + + +# --------------------------------------------------------------------------- +# Realtime provider name +# --------------------------------------------------------------------------- + +class TestRealtimeProviderName: + """Verify realtime spans use gen_ai.provider.name.""" + + def test_realtime_workflow_uses_provider_name(self): + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + + state = RealtimeTracingState(tracer) + state.start_workflow_span("TestAgent") + state.end_workflow_span() + + spans = exporter.get_finished_spans() + wf_span = next((s for s in spans if s.name == "Realtime Session"), None) + assert wf_span is not None + + attrs = dict(wf_span.attributes) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + def test_realtime_agent_span_uses_provider_name(self): + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + + state = RealtimeTracingState(tracer) + state.start_workflow_span("TestAgent") + state.start_agent_span("TestAgent") + state.cleanup() + state.end_workflow_span() + + spans = exporter.get_finished_spans() + agent_span = next((s for s in spans if s.name == "TestAgent.agent"), None) + assert agent_span is not None + + attrs = dict(agent_span.attributes) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + +# --------------------------------------------------------------------------- +# No deprecated gen_ai.system anywhere +# --------------------------------------------------------------------------- + +class TestNoDeprecatedAttributes: + """Ensure no span uses the deprecated gen_ai.system attribute.""" + + def test_no_gen_ai_system_in_generation_span(self, tracer_and_exporter): + """Spans must not contain the deprecated gen_ai.system attribute.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import GenerationSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-dep-1" + proc.on_trace_start(mock_trace) + + gen_data = GenerationSpanData(model="gpt-4o", model_config={}) + span = MockAgentSpan(gen_data, trace_id="test-dep-1") + + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + for s in spans: + attrs = dict(s.attributes) + assert "gen_ai.system" not in attrs, ( + f"Span '{s.name}' uses deprecated 'gen_ai.system' attribute. " + f"Must use 'gen_ai.provider.name' instead." + ) + + +# --------------------------------------------------------------------------- +# P3: Content gating – trace_content=False must suppress content attributes +# --------------------------------------------------------------------------- + +class TestContentGating: + """Verify opt-in content attributes are not emitted when tracing is disabled.""" + + def test_input_messages_suppressed_when_tracing_disabled(self, tracer_and_exporter): + """gen_ai.input.messages must NOT be set when trace_content=False.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"role": "user", "content": "secret prompt"}] + _extract_prompt_attributes(span, input_data, trace_content=False) + + assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in span.attributes + span.end() + + def test_output_messages_suppressed_when_tracing_disabled(self, tracer_and_exporter): + """gen_ai.output.messages must NOT be set when trace_content=False.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MagicMock() + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.model = "gpt-4o" + response.id = "resp_1" + response.frequency_penalty = None + response.finish_reason = "stop" + + content_item = MagicMock() + content_item.type = "output_text" + content_item.text = "secret output" + + output_msg = MagicMock() + output_msg.type = "message" + output_msg.content = [content_item] + output_msg.role = "assistant" + output_msg.name = None # Not a tool call + + response.output = [output_msg] + response.usage = None + response.tools = None + + _extract_response_attributes(span, response, trace_content=False) + + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes + # finish_reasons should still be set (not content-gated) + assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS in span.attributes + span.end() + + def test_tool_definitions_suppressed_when_tracing_disabled( + self, tracer_and_exporter + ): + """gen_ai.tool.definitions must NOT be set when trace_content=False.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import GenerationSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-gate-tools" + proc.on_trace_start(mock_trace) + + gen_data = GenerationSpanData(model="gpt-4o", model_config={}) + span_obj = MockAgentSpan(gen_data, trace_id="test-gate-tools") + + func_mock = MagicMock() + func_mock.name = "lookup" + func_mock.description = "Look something up" + func_mock.parameters = {"type": "object"} + + tool_mock = MagicMock() + tool_mock.function = func_mock + tool_mock.type = "function" + + response_mock = MagicMock() + response_mock.tools = [tool_mock] + response_mock.output = [] + response_mock.usage = None + response_mock.temperature = None + response_mock.max_output_tokens = None + response_mock.top_p = None + response_mock.model = "gpt-4o" + response_mock.id = "resp_1" + response_mock.frequency_penalty = None + response_mock.finish_reason = None + gen_data.response = response_mock + + with patch( + "opentelemetry.instrumentation.openai_agents._hooks.should_send_prompts", + return_value=False, + ): + proc.on_span_start(span_obj) + proc.on_span_end(span_obj) + + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + response_spans = [s for s in spans if "response" in s.name or "chat" in s.name] + for s in response_spans: + assert GenAIAttributes.GEN_AI_TOOL_DEFINITIONS not in s.attributes, ( + f"Span '{s.name}' should not have tool definitions when tracing disabled" + ) + + +# --------------------------------------------------------------------------- +# P3: Invalid tool arguments fallback – must always be object or null +# --------------------------------------------------------------------------- + +class TestInvalidToolArgumentsFallback: + """Ensure _parse_arguments never returns a raw string.""" + + def test_invalid_json_returns_wrapped_object(self): + """Invalid JSON string must produce {_raw: ...} object.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _parse_arguments, + ) + + result = _parse_arguments("not valid json {{{") + assert isinstance(result, dict), f"Expected dict, got {type(result)}" + assert "_raw" in result + assert result["_raw"] == "not valid json {{{" + + def test_json_array_returns_wrapped_object(self): + """JSON array string must produce {_raw: ...} object (not a list).""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _parse_arguments, + ) + + result = _parse_arguments('[1, 2, 3]') + assert isinstance(result, dict), f"Expected dict, got {type(result)}" + assert "_raw" in result + + def test_empty_string_returns_none(self): + """Empty/whitespace string must return None.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _parse_arguments, + ) + + assert _parse_arguments("") is None + assert _parse_arguments(" ") is None + + def test_none_returns_none(self): + """None input must return None.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _parse_arguments, + ) + + assert _parse_arguments(None) is None + + def test_numeric_arg_returns_wrapped_object(self): + """Non-string non-dict input must produce {_raw: ...} object.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _parse_arguments, + ) + + result = _parse_arguments(42) + assert isinstance(result, dict) + assert "_raw" in result + + def test_valid_json_dict_returns_dict(self): + """Valid JSON dict string must parse normally.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _parse_arguments, + ) + + result = _parse_arguments('{"city": "NYC"}') + assert isinstance(result, dict) + assert result == {"city": "NYC"} + + +# --------------------------------------------------------------------------- +# Spec §1: Multimodal content mapping — lock OTel part types +# Ref: openllmetry-semconv-review.md §1 "Provider-Specific Content Block Mapping" +# OpenAI image_url → OTel UriPart {type: "uri", modality: "image", uri: "..."} +# OpenAI input_audio → OTel BlobPart {type: "blob", modality: "audio", ...} +# --------------------------------------------------------------------------- + +class TestMultimodalInputMapping: + """Lock multimodal content blocks to OTel part types per spec.""" + + def test_image_url_maps_to_uri_part(self, tracer_and_exporter): + """Spec §1: OpenAI image_url MUST map to UriPart, NOT 'image_url' type.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "user", + "content": [ + {"type": "text", "text": "What is in this image?"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/img.png"}, + }, + ], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + assert len(parts) == 2 + assert parts[0] == {"type": "text", "content": "What is in this image?"} + # Spec: UriPart — NOT {"type": "image_url", ...} + assert parts[1]["type"] == "uri", ( + f"image_url must map to UriPart (type='uri'), got type='{parts[1]['type']}'" + ) + assert parts[1]["modality"] == "image" + assert parts[1]["uri"] == "https://example.com/img.png" + + span.end() + + def test_input_audio_maps_to_blob_part(self, tracer_and_exporter): + """Spec §1: OpenAI input_audio MUST map to BlobPart, NOT 'input_audio' type.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "user", + "content": [ + { + "type": "input_audio", + "input_audio": {"data": "base64audiodata==", "format": "wav"}, + }, + ], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + assert len(parts) == 1 + # Spec: BlobPart — NOT {"type": "input_audio", ...} + assert parts[0]["type"] == "blob", ( + f"input_audio must map to BlobPart (type='blob'), got type='{parts[0]['type']}'" + ) + assert parts[0]["modality"] == "audio" + assert parts[0]["content"] == "base64audiodata==" + + span.end() + + def test_mixed_text_blocks_mapped(self, tracer_and_exporter): + """Spec §1: Multiple text blocks → multiple TextPart objects.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "user", + "content": [ + {"type": "text", "text": "First paragraph."}, + {"type": "text", "text": "Second paragraph."}, + ], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + assert len(parts) == 2 + assert parts[0] == {"type": "text", "content": "First paragraph."} + assert parts[1] == {"type": "text", "content": "Second paragraph."} + + span.end() + + def test_plain_string_content_produces_text_part(self, tracer_and_exporter): + """Spec §1: Plain string content → single TextPart.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"role": "user", "content": "Hello"}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + assert len(parts) == 1 + assert parts[0] == {"type": "text", "content": "Hello"} + + span.end() + + def test_text_key_is_content_not_text(self, tracer_and_exporter): + """Spec §1: TextPart key is 'content', NOT 'text'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"role": "user", "content": "Check key name"}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + part = messages[0]["parts"][0] + + assert "content" in part, "TextPart must use 'content' key" + assert "text" not in part, ( + "TextPart must NOT use 'text' key — spec requires 'content'" + ) + + span.end() + + def test_unknown_block_type_preserved_as_generic_part(self, tracer_and_exporter): + """Spec §1: Unknown block types → GenericPart with type preserved.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "user", + "content": [ + {"type": "custom_widget", "widget_id": "w1"}, + ], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + part = messages[0]["parts"][0] + + assert part["type"] == "custom_widget", "Unknown type must be preserved" + + span.end() + + def test_sdk_object_image_url_maps_to_uri_part(self, tracer_and_exporter): + """Spec §1: SDK-object image_url blocks also map to UriPart.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _content_block_to_part, + ) + + url_obj = MagicMock() + url_obj.url = "https://example.com/photo.jpg" + block = MagicMock() + block.type = "image_url" + block.image_url = url_obj + + result = _content_block_to_part(block) + + assert result["type"] == "uri" + assert result["modality"] == "image" + assert result["uri"] == "https://example.com/photo.jpg" + + def test_sdk_object_input_audio_maps_to_blob_part(self, tracer_and_exporter): + """Spec §1: SDK-object input_audio blocks also map to BlobPart.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _content_block_to_part, + ) + + audio_obj = MagicMock() + audio_obj.data = "base64data==" + block = MagicMock() + block.type = "input_audio" + block.input_audio = audio_obj + + result = _content_block_to_part(block) + + assert result["type"] == "blob" + assert result["modality"] == "audio" + assert result["content"] == "base64data==" + + def test_unknown_block_preserves_per_field_structure(self): + """Unknown block types must preserve per-field structure, not json.dumps the whole block.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _dict_block_to_part, + ) + + block = {"type": "file", "file_id": "file_abc123", "filename": "data.csv"} + part = _dict_block_to_part(block) + + assert part["type"] == "file" + assert "file_id" in part, f"Expected 'file_id' in part, got: {part}" + assert part["file_id"] == "file_abc123" + + +# --------------------------------------------------------------------------- +# Spec §1: Assistant text + tool_calls combined +# Ref: "Messages can include both text and tool_call parts" +# --------------------------------------------------------------------------- + +class TestAssistantTextWithToolCalls: + """Lock: assistant messages with both text and tool_calls emit both parts.""" + + def test_text_and_tool_call_both_present(self, tracer_and_exporter): + """Spec §1: text content alongside tool_calls → text + tool_call parts.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "content": "Let me look that up.", + "tool_calls": [{ + "id": "call_1", + "function": { + "name": "search", + "arguments": '{"q": "weather"}', + }, + }], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + types = [p["type"] for p in parts] + assert "text" in types, "Missing text part alongside tool_call" + assert "tool_call" in types, "Missing tool_call part" + + text_part = next(p for p in parts if p["type"] == "text") + assert text_part["content"] == "Let me look that up." + + tc_part = next(p for p in parts if p["type"] == "tool_call") + assert tc_part["name"] == "search" + assert isinstance(tc_part["arguments"], dict) + + span.end() + + def test_tool_calls_without_content(self, tracer_and_exporter): + """Spec §1: tool_calls with no text content → only tool_call parts.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "content": None, + "tool_calls": [{ + "id": "call_1", + "function": {"name": "search", "arguments": "{}"}, + }], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + assert len(parts) == 1 + assert parts[0]["type"] == "tool_call" + + span.end() + + +# --------------------------------------------------------------------------- +# Spec §1/§4: Output messages — non-text parts, finish_reason always present +# Ref: "finish_reason in output JSON: required per schema — always set" +# --------------------------------------------------------------------------- + +class TestOutputNonTextParts: + """Lock: output messages handle refusal, reasoning, and finish_reason.""" + + def test_refusal_content_mapped(self, tracer_and_exporter): + """Spec §1: Refusal content → {type: 'text', content: '...'} (standard TextPart).""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MagicMock() + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.model = "gpt-4o" + response.id = "resp_1" + response.frequency_penalty = None + response.finish_reason = "stop" + + content_item = MagicMock() + content_item.type = "refusal" + content_item.refusal = "I cannot help with that." + content_item.text = None + + output_msg = MagicMock() + output_msg.type = "message" + output_msg.content = [content_item] + output_msg.role = "assistant" + output_msg.name = None + + response.output = [output_msg] + response.usage = None + response.tools = None + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + assert len(parts) == 1 + assert parts[0]["type"] == "refusal" + assert parts[0]["content"] == "I cannot help with that." + + span.end() + + def test_output_finish_reason_always_present_in_json(self, tracer_and_exporter): + """Spec §4: finish_reason key MUST always exist in output JSON (even if unknown).""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MagicMock() + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.model = "gpt-4o" + response.id = "resp_1" + response.frequency_penalty = None + response.finish_reason = None # Unknown/absent + response.status = None + + content_item = MagicMock() + content_item.type = "output_text" + content_item.text = "Hello" + + output_msg = MagicMock() + output_msg.type = "message" + output_msg.content = [content_item] + output_msg.role = "assistant" + output_msg.name = None + + response.output = [output_msg] + response.usage = None + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + + assert "finish_reason" in messages[0], ( + "finish_reason key must always be present in output JSON per schema" + ) + assert messages[0]["finish_reason"] == "" + + span.end() + + def test_output_finish_reason_mapped_value(self, tracer_and_exporter): + """Spec §4: finish_reason in JSON uses mapped OTel value.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MagicMock() + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.model = "gpt-4o" + response.id = "resp_1" + response.frequency_penalty = None + response.finish_reason = "tool_calls" # OpenAI plural + + content_item = MagicMock() + content_item.type = "output_text" + content_item.text = "Calling tool" + + output_msg = MagicMock() + output_msg.type = "message" + output_msg.content = [content_item] + output_msg.role = "assistant" + output_msg.name = None + + response.output = [output_msg] + response.usage = None + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + + # Spec §4: tool_calls → tool_call (singular) + assert messages[0]["finish_reason"] == "tool_call" + + span.end() + + def test_reasoning_content_mapped(self, tracer_and_exporter): + """Spec §1: Reasoning content → {type: 'reasoning', content: '...'}.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MagicMock() + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.model = "gpt-4o" + response.id = "resp_1" + response.frequency_penalty = None + response.finish_reason = "stop" + + content_item = MagicMock() + content_item.type = "reasoning" + content_item.text = None + + summary_item = MagicMock() + summary_item.text = "The user wants weather info" + content_item.summary = [summary_item] + + output_msg = MagicMock() + output_msg.type = "message" + output_msg.content = [content_item] + output_msg.role = "assistant" + output_msg.name = None + + response.output = [output_msg] + response.usage = None + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + parts = messages[0]["parts"] + + assert len(parts) == 1 + assert parts[0]["type"] == "reasoning" + assert "weather" in parts[0]["content"] + + span.end() + + def test_reasoning_summary_dict_items_extract_text(self, tracer_and_exporter): + """Dict-form reasoning summary items must extract 'text' field, not dump repr.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + from types import SimpleNamespace + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = SimpleNamespace( + type="reasoning", + summary=[{"text": "The model considered options."}], + ) + output = SimpleNamespace( + type="message", content=[content_item], role="assistant", + ) + response = SimpleNamespace( + temperature=None, max_output_tokens=None, top_p=None, + model=None, id=None, frequency_penalty=None, + finish_reason=None, status="completed", + output=[output], usage=None, + ) + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + reasoning_parts = [ + p for msg in messages for p in msg.get("parts", []) + if p.get("type") == "reasoning" + ] + assert len(reasoning_parts) >= 1 + assert "{'text'" not in reasoning_parts[0]["content"], ( + "Dict repr leaked into reasoning content" + ) + assert "The model considered options" in reasoning_parts[0]["content"] + + span.end() + + +# --------------------------------------------------------------------------- +# Spec §2: Roles — only OTel-valid roles emitted +# --------------------------------------------------------------------------- + +class TestRoles: + """Lock: only valid OTel roles (system, user, assistant, tool) emitted.""" + + def test_system_role_preserved(self, tracer_and_exporter): + """Spec §2: system role kept inline in input messages for OpenAI.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hi"}, + ] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + + roles = [m["role"] for m in messages] + assert "system" in roles + assert "user" in roles + + span.end() + + def test_developer_role_preserved(self, tracer_and_exporter): + """Spec §2: provider-specific roles like 'developer' are allowed.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"role": "developer", "content": "Be concise."}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + + assert messages[0]["role"] == "developer" + + span.end() + + +# --------------------------------------------------------------------------- +# Spec §4: finish_reasons top-level span attribute — comprehensive +# --------------------------------------------------------------------------- + +class TestFinishReasonTopLevel: + """Lock: gen_ai.response.finish_reasons as top-level span attribute.""" + + def test_finish_reasons_not_gated_by_content(self, tracer_and_exporter): + """Spec §4: finish_reasons set even when should_send_prompts()=False.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MagicMock() + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.model = "gpt-4o" + response.id = "resp_1" + response.frequency_penalty = None + response.finish_reason = "stop" + response.output = [] + response.usage = None + + _extract_response_attributes(span, response, trace_content=False) + + # finish_reasons is metadata, NOT content — must be set + assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS in span.attributes + assert span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ( + "stop", + ) + # But output messages must NOT be set + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes + + span.end() + + def test_none_finish_reason_omits_attribute(self, tracer_and_exporter): + """Spec §4: None finish_reason → attribute omitted, NOT fabricated.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MagicMock() + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.model = "gpt-4o" + response.id = "resp_1" + response.frequency_penalty = None + response.finish_reason = None + response.output = [] + response.usage = None + + _extract_response_attributes(span, response, trace_content=True) + + assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS not in span.attributes + + span.end() + + +# --------------------------------------------------------------------------- +# Spec §1: _msg_to_dict with SDK objects (not just dicts) +# --------------------------------------------------------------------------- + +class TestMsgToDict: + """Lock: _msg_to_dict normalizes SDK objects to plain dicts.""" + + def test_sdk_object_normalized(self): + """Spec §1: SDK objects with attributes are normalized to dicts.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _msg_to_dict, + ) + + obj = MagicMock() + obj.role = "user" + obj.content = "Hello" + # Only set some attrs + del obj.tool_call_id + del obj.tool_calls + + result = _msg_to_dict(obj) + assert isinstance(result, dict) + assert result["role"] == "user" + assert result["content"] == "Hello" + + def test_dict_passed_through(self): + """Spec §1: dict messages are returned as-is.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _msg_to_dict, + ) + + msg = {"role": "user", "content": "Hello"} + result = _msg_to_dict(msg) + assert result is msg # Same reference, not a copy + + +# --------------------------------------------------------------------------- +# Spec §1: Tool call round-trip (request → response) +# --------------------------------------------------------------------------- + +class TestToolCallRoundTrip: + """Lock: tool_call → tool_call_response forms a complete round trip.""" + + def test_full_round_trip(self, tracer_and_exporter): + """Spec §1: tool_call request and response correlate via id.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [ + { + "role": "assistant", + "tool_calls": [{ + "id": "call_abc", + "function": { + "name": "get_weather", + "arguments": '{"city": "NYC"}', + }, + }], + }, + { + "role": "tool", + "tool_call_id": "call_abc", + "content": '{"temp": 72}', + }, + ] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + + # Message 1: assistant with tool_call + assert messages[0]["role"] == "assistant" + tc = messages[0]["parts"][0] + assert tc["type"] == "tool_call" + assert tc["id"] == "call_abc" + assert tc["name"] == "get_weather" + assert tc["arguments"] == {"city": "NYC"} + + # Message 2: tool response correlating via same id + assert messages[1]["role"] == "tool" + resp = messages[1]["parts"][0] + assert resp["type"] == "tool_call_response" + assert resp["id"] == "call_abc" + assert resp["response"] == '{"temp": 72}' + + span.end() + + +# --------------------------------------------------------------------------- +# Spec: _convert_agents_sdk_message unknown type returns (None, []) +# --------------------------------------------------------------------------- + +class TestAgentsSdkUnknownType: + """Lock: unknown Agents SDK message types are silently skipped.""" + + def test_unknown_type_skipped(self, tracer_and_exporter): + """Unknown Agents SDK type must not produce a message.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"type": "unknown_sdk_type", "data": "foo"}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + # No messages should be set (unknown type skipped) + assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in span.attributes + + span.end() + + +# --------------------------------------------------------------------------- +# P1-1: input_text / output_text blocks must map to TextPart in input path +# --------------------------------------------------------------------------- + +class TestInputTextOutputTextMapping: + """Verify Responses API input_text/output_text blocks map to TextPart.""" + + def test_dict_input_text_maps_to_text_part(self): + """input_text dict block must produce {type: 'text', content: '...'}.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _dict_block_to_part, + ) + + block = {"type": "input_text", "text": "Hello from user"} + result = _dict_block_to_part(block) + + assert result["type"] == "text", ( + f"input_text should map to type='text', got '{result['type']}'" + ) + assert result["content"] == "Hello from user", ( + f"input_text content should be the text value, got '{result.get('content')}'" + ) + assert "data" not in result, ( + "input_text should NOT fall through to generic path with 'data' key" + ) + + def test_dict_output_text_maps_to_text_part(self): + """output_text dict block must produce {type: 'text', content: '...'}.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _dict_block_to_part, + ) + + block = {"type": "output_text", "text": "Here is my response"} + result = _dict_block_to_part(block) + + assert result["type"] == "text", ( + f"output_text should map to type='text', got '{result['type']}'" + ) + assert result["content"] == "Here is my response" + assert "data" not in result + + def test_object_input_text_maps_to_text_part(self): + """input_text SDK object must produce {type: 'text', content: '...'}.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _object_block_to_part, + ) + + block = MagicMock() + block.type = "input_text" + block.text = "Hello from user" + + result = _object_block_to_part(block) + + assert result["type"] == "text", ( + f"input_text object should map to type='text', got '{result['type']}'" + ) + assert result["content"] == "Hello from user" + assert "data" not in result + + def test_object_output_text_maps_to_text_part(self): + """output_text SDK object must produce {type: 'text', content: '...'}.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _object_block_to_part, + ) + + block = MagicMock() + block.type = "output_text" + block.text = "Here is my response" + + result = _object_block_to_part(block) + + assert result["type"] == "text", ( + f"output_text object should map to type='text', got '{result['type']}'" + ) + assert result["content"] == "Here is my response" + assert "data" not in result + + def test_input_text_in_full_input_message_pipeline(self, tracer_and_exporter): + """input_text blocks in chat messages must produce valid TextPart in gen_ai.input.messages.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [ + {"role": "user", "content": [{"type": "input_text", "text": "Hello, can you help me?"}]}, + {"role": "assistant", "content": [{"type": "output_text", "text": "Of course!"}]}, + ] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + + # User message: input_text → TextPart + user_parts = messages[0]["parts"] + assert user_parts[0]["type"] == "text", ( + f"input_text in pipeline should be type='text', got '{user_parts[0]['type']}'" + ) + assert user_parts[0]["content"] == "Hello, can you help me?" + + # Assistant message: output_text → TextPart + assistant_parts = messages[1]["parts"] + assert assistant_parts[0]["type"] == "text", ( + f"output_text in pipeline should be type='text', got '{assistant_parts[0]['type']}'" + ) + assert assistant_parts[0]["content"] == "Of course!" + + span.end() + + +# --------------------------------------------------------------------------- +# P2 (was P1-2): gen_ai.request.model set at span start from span_data.model +# --------------------------------------------------------------------------- + +class TestRequestModelAtSpanStart: + """Verify gen_ai.request.model is set at span creation from span_data.""" + + def test_request_model_set_from_span_data(self, tracer_and_exporter): + """gen_ai.request.model must be set at span start from span_data.model.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import GenerationSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-reqmodel-1" + proc.on_trace_start(mock_trace) + + gen_data = GenerationSpanData(model="gpt-4o-mini", model_config={}) + span = MockAgentSpan(gen_data, trace_id="test-reqmodel-1") + + proc.on_span_start(span) + # Don't call on_span_end — check span attributes right after creation + otel_span = proc._otel_spans.get(span) + assert otel_span is not None, "OTel span should exist after on_span_start" + + attrs = dict(otel_span.attributes) + assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs, ( + f"gen_ai.request.model should be set at span start, got keys: {list(attrs.keys())}" + ) + assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "gpt-4o-mini" + + # Clean up + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + def test_request_model_fallback_when_response_model_missing(self, tracer_and_exporter): + """gen_ai.request.model must persist even if response.model is None.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import GenerationSpanData + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + mock_trace = MagicMock() + mock_trace.trace_id = "test-reqmodel-2" + proc.on_trace_start(mock_trace) + + gen_data = GenerationSpanData(model="gpt-4o", model_config={}) + # Simulate a response with no model + gen_data.response = MagicMock() + gen_data.response.model = None + gen_data.response.id = None + gen_data.response.temperature = None + gen_data.response.max_output_tokens = None + gen_data.response.top_p = None + gen_data.response.frequency_penalty = None + gen_data.response.finish_reason = None + gen_data.response.output = [] + gen_data.response.usage = None + gen_data.response.tools = [] + + span = MockAgentSpan(gen_data, trace_id="test-reqmodel-2") + proc.on_span_start(span) + proc.on_span_end(span) + proc.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + response_span = next((s for s in spans if s.name == "openai.response"), None) + assert response_span is not None + + attrs = dict(response_span.attributes) + assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs, ( + "gen_ai.request.model should be set even when response.model is None" + ) + assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "gpt-4o" + + +# --------------------------------------------------------------------------- +# P2-1: Tool-call response parts include response key even when content=None +# --------------------------------------------------------------------------- + +class TestToolResponseNoneContent: + """Verify tool_call_response includes response key when content is None.""" + + def test_tool_response_part_has_response_key_when_none(self): + """tool_call_response must include 'response' key even when content is None.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _build_tool_response_part, + ) + + part = _build_tool_response_part("call_123", None) + + assert part["type"] == "tool_call_response" + assert part["id"] == "call_123" + assert "response" in part, ( + "tool_call_response must include 'response' key even when content is None" + ) + assert part["response"] == "" + + def test_tool_response_part_has_response_key_when_present(self): + """tool_call_response with content must include 'response' key.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _build_tool_response_part, + ) + + part = _build_tool_response_part("call_456", "72 degrees") + + assert part["type"] == "tool_call_response" + assert part["id"] == "call_456" + assert part["response"] == "72 degrees" + + def test_tool_response_none_content_in_full_pipeline(self, tracer_and_exporter): + """Tool message with content=None must still produce response key in gen_ai.input.messages.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [ + {"role": "tool", "tool_call_id": "call_789", "content": None}, + ] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + + tool_part = messages[0]["parts"][0] + assert tool_part["type"] == "tool_call_response" + assert "response" in tool_part, ( + "tool_call_response must include 'response' key even with None content" + ) + + span.end() + + def test_structured_dict_result_preserved(self): + """Dict tool result should be kept as-is, not stringified.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _build_tool_response_part, + ) + + part = _build_tool_response_part("call_1", {"status": "ok", "count": 5}) + assert isinstance(part["response"], dict) + assert part["response"] == {"status": "ok", "count": 5} + + def test_structured_list_result_preserved(self): + """List tool result should be kept as-is, not stringified.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _build_tool_response_part, + ) + + part = _build_tool_response_part("call_2", [1, 2, 3]) + assert isinstance(part["response"], list) + + +# --------------------------------------------------------------------------- +# P2-2: Realtime LLM spans set response metadata +# --------------------------------------------------------------------------- + +class TestRealtimeResponseMetadata: + """Verify realtime LLM spans set recommended response attributes.""" + + def test_realtime_span_sets_response_model(self, tracer_and_exporter): + """Realtime LLM spans should set gen_ai.response.model.""" + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + tracer, exporter = tracer_and_exporter + state = RealtimeTracingState(tracer) + state.model_name = "gpt-4o-realtime-preview-2024-12-17" + + # Create a parent span for context + parent = tracer.start_span("parent") + state.pending_prompts.append(("user", "Hello")) + state.prompt_start_time = 1000 + + state.create_llm_span("Hi there!") + parent.end() + + finished = exporter.get_finished_spans() + rt_span = next((s for s in finished if s.name == "openai.realtime"), None) + assert rt_span is not None + + attrs = dict(rt_span.attributes) + assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in attrs, ( + "Realtime LLM span should set gen_ai.response.model" + ) + + def test_realtime_span_sets_finish_reason_empty(self, tracer_and_exporter): + """Realtime LLM spans should use '' finish_reason, NOT fabricate 'stop'.""" + from opentelemetry.instrumentation.openai_agents._realtime_wrappers import ( + RealtimeTracingState, + ) + + tracer, exporter = tracer_and_exporter + state = RealtimeTracingState(tracer) + state.model_name = "gpt-4o-realtime-preview" + + parent = tracer.start_span("parent") + state.pending_prompts.append(("user", "Hello")) + state.prompt_start_time = 1000 + + with patch( + "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts", + return_value=True, + ): + state.create_llm_span("Hi there!") + + parent.end() + + finished = exporter.get_finished_spans() + rt_span = next((s for s in finished if s.name == "openai.realtime"), None) + assert rt_span is not None + + raw = rt_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + assert messages[0]["finish_reason"] == "", ( + f"Realtime finish_reason should be '' (not fabricated), got '{messages[0].get('finish_reason')}'" + ) + + +# --------------------------------------------------------------------------- +# F1: BlobPart must use "content" key, NOT "data" +# OTel spec: BlobPart.required = ["type", "modality", "content"] +# Upstream refs: opentelemetry-python-contrib Blob dataclass uses "content", +# Bedrock/OpenAI instrumentations use "content" for blob parts. +# --------------------------------------------------------------------------- + +class TestBlobPartContentKey: + """F1: BlobPart must use 'content' key per OTel GenAI semconv.""" + + def test_dict_input_audio_blob_uses_content_key(self, tracer_and_exporter): + """Dict input_audio block must produce BlobPart with 'content', NOT 'data'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _dict_block_to_part, + ) + + block = { + "type": "input_audio", + "input_audio": {"data": "base64audiodata==", "format": "wav"}, + } + result = _dict_block_to_part(block) + + assert result["type"] == "blob" + assert result["modality"] == "audio" + assert "content" in result, ( + "BlobPart must use 'content' key per OTel spec, not 'data'" + ) + assert "data" not in result, ( + "BlobPart must NOT use 'data' key — spec requires 'content'" + ) + assert result["content"] == "base64audiodata==" + + def test_object_input_audio_blob_uses_content_key(self, tracer_and_exporter): + """SDK-object input_audio block must produce BlobPart with 'content', NOT 'data'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _object_block_to_part, + ) + + audio_obj = MagicMock() + audio_obj.data = "base64data==" + block = MagicMock() + block.type = "input_audio" + block.input_audio = audio_obj + + result = _object_block_to_part(block) + + assert result["type"] == "blob" + assert result["modality"] == "audio" + assert "content" in result, ( + "BlobPart must use 'content' key per OTel spec, not 'data'" + ) + assert "data" not in result, ( + "BlobPart must NOT use 'data' key — spec requires 'content'" + ) + assert result["content"] == "base64data==" + + def test_blob_content_key_in_full_pipeline(self, tracer_and_exporter): + """BlobPart 'content' key must survive through the full input message pipeline.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "user", + "content": [ + { + "type": "input_audio", + "input_audio": {"data": "YXVkaW9kYXRh", "format": "mp3"}, + }, + ], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + blob_part = messages[0]["parts"][0] + + assert blob_part["type"] == "blob" + assert "content" in blob_part, "BlobPart must use 'content' in pipeline output" + assert "data" not in blob_part, "BlobPart must NOT use 'data' in pipeline output" + assert blob_part["content"] == "YXVkaW9kYXRh" + + span.end() + + +# --------------------------------------------------------------------------- +# F2: gen_ai.tool.call.arguments/result must use json.dumps(), NOT str() +# str(dict) produces Python repr with single quotes — not valid JSON. +# All other structured attributes in this package use json.dumps(). +# --------------------------------------------------------------------------- + +class TestToolCallArgumentsSerialization: + """F2: Tool call arguments/result must be valid JSON, not Python repr.""" + + def test_dict_input_serialized_as_json(self, tracer_and_exporter): + """Dict tool input must be serialized with json.dumps(), not str().""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import FunctionSpanData + + tracer, _ = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + func_data = FunctionSpanData( + name="get_weather", + input={"city": "London"}, + output="72F", + ) + otel_span = proc._start_function_span(func_data, parent_context=None) + proc._end_function_span(otel_span, func_data, trace_content=True) + otel_span.end() + + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + raw_args = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS] + # Must be valid JSON (double quotes), NOT Python repr (single quotes) + assert '"city"' in raw_args, ( + f"Expected JSON with double quotes, got: {raw_args}" + ) + assert "'" not in raw_args or raw_args == raw_args, ( + f"str() produces single quotes; expected json.dumps(): {raw_args}" + ) + # Must parse as valid JSON + parsed = json.loads(raw_args) + assert parsed == {"city": "London"} + + def test_dict_output_serialized_as_json(self, tracer_and_exporter): + """Dict tool output must be serialized with json.dumps(), not str().""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import FunctionSpanData + + tracer, _ = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + func_data = FunctionSpanData( + name="get_weather", + input="query", + output={"temp": 72, "unit": "F"}, + ) + otel_span = proc._start_function_span(func_data, parent_context=None) + proc._end_function_span(otel_span, func_data, trace_content=True) + otel_span.end() + + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + raw_result = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT] + parsed = json.loads(raw_result) + assert parsed == {"temp": 72, "unit": "F"} + + def test_string_input_kept_as_is(self, tracer_and_exporter): + """String tool input must be kept as-is (already a string).""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import FunctionSpanData + + tracer, _ = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + func_data = FunctionSpanData( + name="echo", + input='{"already": "json"}', + output="done", + ) + otel_span = proc._start_function_span(func_data, parent_context=None) + proc._end_function_span(otel_span, func_data, trace_content=True) + otel_span.end() + + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + raw_args = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS] + assert raw_args == '{"already": "json"}' + + def test_list_output_serialized_as_json(self, tracer_and_exporter): + """List tool output must be serialized with json.dumps().""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + from agents import FunctionSpanData + + tracer, _ = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + func_data = FunctionSpanData( + name="search", + input="query", + output=["result1", "result2"], + ) + otel_span = proc._start_function_span(func_data, parent_context=None) + proc._end_function_span(otel_span, func_data, trace_content=True) + otel_span.end() + + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + raw_result = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT] + parsed = json.loads(raw_result) + assert parsed == ["result1", "result2"] + + +# --------------------------------------------------------------------------- +# F3: Responses API status → finish_reason mapping +# The Responses API uses "status" ("completed"/"failed"/"cancelled"/"incomplete"), +# NOT "finish_reason". Must map status to OTel finish reasons. +# Upstream ref: opentelemetry-python-contrib _finish_reason_from_status() +# --------------------------------------------------------------------------- + +class TestResponsesApiStatusMapping: + """F3: Map Responses API 'status' to finish_reason when finish_reason absent.""" + + def test_completed_status_maps_to_stop(self, tracer_and_exporter): + """Responses API status='completed' must map to finish_reason='stop'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + output=[], + model="gpt-4o", + usage=MockUsage(), + ) + # Responses API: no finish_reason, but has status + del response.finish_reason + response.status = "completed" + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None, ( + "status='completed' must produce gen_ai.response.finish_reasons" + ) + assert "stop" in finish_reasons, ( + f"status='completed' must map to 'stop', got {finish_reasons}" + ) + + span.end() + + def test_failed_status_maps_to_error(self, tracer_and_exporter): + """Responses API status='failed' must map to OTel finish_reason='error'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + output=[], + model="gpt-4o", + usage=MockUsage(), + ) + del response.finish_reason + response.status = "failed" + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None + assert "error" in finish_reasons + + span.end() + + def test_cancelled_response_preserves_cancelled_finish_reason(self, tracer_and_exporter): + """Responses API status='cancelled' must preserve 'cancelled', not remap to 'error'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + output=[], + model="gpt-4o", + usage=MockUsage(), + ) + del response.finish_reason + response.status = "cancelled" + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None + assert "cancelled" in finish_reasons + + span.end() + + def test_incomplete_response_preserves_incomplete_finish_reason(self, tracer_and_exporter): + """Responses API status='incomplete' must preserve 'incomplete', not remap to 'length'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + output=[], + model="gpt-4o", + usage=MockUsage(), + ) + del response.finish_reason + response.status = "incomplete" + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None + assert "incomplete" in finish_reasons + + span.end() + + def test_status_not_used_when_finish_reason_present(self, tracer_and_exporter): + """When finish_reason is present, status must NOT override it.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + response = MockResponse( + output=[], + model="gpt-4o", + usage=MockUsage(), + finish_reason="stop", + ) + response.status = "completed" # Both present — finish_reason wins + + _extract_response_attributes(span, response, trace_content=True) + + finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert finish_reasons is not None + assert "stop" in finish_reasons + + span.end() + + def test_completed_status_maps_to_stop_in_output_messages(self, tracer_and_exporter): + """status='completed' → output message finish_reason='stop'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_response_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + content_item = MockContentItem(text="Done") + output_item = MockResponseOutput(role="assistant", content=[content_item]) + response = MockResponse( + output=[output_item], + model="gpt-4o", + usage=MockUsage(), + ) + del response.finish_reason + response.status = "completed" + + _extract_response_attributes(span, response, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + messages = json.loads(raw) + assert messages[0]["finish_reason"] == "stop", ( + f"status='completed' should produce finish_reason='stop' in output, " + f"got '{messages[0]['finish_reason']}'" + ) + + span.end() + + +# --------------------------------------------------------------------------- +# Missing finish_reason mapping tests +# --------------------------------------------------------------------------- + +class TestFinishReasonMappingCompleteness: + """Cover finish_reason mappings missing from original test suite.""" + + def test_length_mapping(self): + """'length' must map to 'length'.""" + from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason + assert _map_finish_reason("length") == "length" + + def test_content_filter_mapping(self): + """'content_filter' must map to 'content_filter'.""" + from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason + assert _map_finish_reason("content_filter") == "content_filter" + + def test_error_mapping(self): + """'error' must map to 'error'.""" + from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason + assert _map_finish_reason("error") == "error" + + def test_unknown_finish_reason_passes_through(self): + """Unknown/new finish reason values must pass through unchanged.""" + from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason + assert _map_finish_reason("some_new_reason") == "some_new_reason" + + def test_function_call_maps_to_tool_call(self): + """Legacy 'function_call' must map to 'tool_call'.""" + from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason + assert _map_finish_reason("function_call") == "tool_call" + + +# --------------------------------------------------------------------------- +# P2-3: Agents SDK function_call id — omit when absent +# --------------------------------------------------------------------------- + +class TestAgentsSdkFunctionCallIdOmitted: + """P2-3: Agents SDK function_call with no 'id' must emit no 'id' key in the part. + + The id field is optional (OTel ToolCallRequestPart.id defaults null). + Emitting empty-string id breaks tool_call / tool_call_response correlation. + """ + + def test_agents_sdk_function_call_no_id_omits_id(self, tracer_and_exporter): + """Agents SDK function_call without an 'id' key must not emit 'id' in the part.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "type": "function_call", + "name": "search", + "arguments": '{"q": "test"}', + # Intentionally no "id" key + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + part = messages[0]["parts"][0] + + assert part["type"] == "tool_call" + assert "id" not in part or part["id"], ( + f"id must be absent or non-empty when no id in source, got: {part}" + ) + span.end() + + def test_agents_sdk_function_call_with_id_still_included(self, tracer_and_exporter): + """Sanity: when 'id' is present, it must appear in the part.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "type": "function_call", + "id": "fc_1", + "name": "search", + "arguments": '{"q": "test"}', + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + messages = json.loads(raw) + part = messages[0]["parts"][0] + + assert part.get("id") == "fc_1" + span.end() + + +# --------------------------------------------------------------------------- +# P3-4 (updated): ResponseSpanData → gen_ai.operation.name = "chat" +# --------------------------------------------------------------------------- + +class TestResponseSpanDataOperationName: + """ResponseSpanData (Responses API) must use operation.name='chat'. + + Both GenerationSpanData (Chat Completions) and ResponseSpanData (Responses + API) are chat completion surfaces. Using 'generate_content' (a GCP/Gemini + well-known value) for an OpenAI span is incorrect. + """ + + def test_response_span_data_uses_chat(self, tracer_and_exporter): + """_start_generation_span with ResponseSpanData must emit 'chat'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + span_data = ResponseSpanData(input=[], response=None) + span_data.model = "gpt-4o" + + otel_span = proc._start_generation_span(parent_context=None, span_data=span_data) + attrs = dict(otel_span.attributes) + otel_span.end() + + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", ( + f"ResponseSpanData must use 'chat', got '{attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}'" + ) + + def test_generation_span_data_keeps_chat(self, tracer_and_exporter): + """_start_generation_span with GenerationSpanData (or no span_data) must keep 'chat'.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + + tracer, exporter = tracer_and_exporter + proc = OpenTelemetryTracingProcessor(tracer) + + # No span_data → Chat Completions default + otel_span = proc._start_generation_span(parent_context=None, span_data=None) + attrs = dict(otel_span.attributes) + otel_span.end() + + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat" + + +# --------------------------------------------------------------------------- +# P3-6: tool_call part with empty name — name key must still be emitted +# --------------------------------------------------------------------------- + +class TestToolCallPartEmptyName: + """P3-6: OTel ToolCallRequestPart.name is required. + + The prior `if tc.get("name"):` guard silently dropped the key when name="" + (empty string is falsy). The key must be emitted even for empty strings + so that downstream consumers can observe the malformed call rather than + getting a part with no name at all. + """ + + def test_empty_string_tool_name_emits_name_key(self, tracer_and_exporter): + """Tool call with name='' must produce a part with name key present.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_prompt_attributes, + ) + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "tool_calls": [{ + "id": "call_x", + "function": { + "name": "", + "arguments": "{}", + } + }] + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + part = messages[0]["parts"][0] + + assert part["type"] == "tool_call" + assert "name" in part, ( + "name key must be present even when tool name is empty string — " + "required field per OTel ToolCallRequestPart schema" + ) + + def test_missing_tool_name_falls_back_to_empty_string(self, tracer_and_exporter): + """Tool call with no name must emit name='' rather than omitting the key. + + OTel ToolCallRequestPart requires 'name'; omitting it produces a schema-invalid part. + """ + from opentelemetry.instrumentation.openai_agents._hooks import _tool_call_to_part + + part = _tool_call_to_part({"type": "tool_call"}) # no name key in source + assert "name" in part, "name must always be present on tool_call parts" + assert part["name"] == "" + + +# --------------------------------------------------------------------------- +# tool_call_response parts must omit 'id' when call_id is absent +# --------------------------------------------------------------------------- + +class TestToolResponsePartIdOmitted: + """tool_call_response parts must omit 'id' when call_id is absent or None. + + OTel ToolCallResponsePart: id is optional (absent vs. null are distinct in + JSON Schema). Emitting "id": null causes schema violations and can break + consumer correlation logic that checks for key presence. + """ + + def test_build_tool_response_part_omits_id_when_call_id_is_none(self): + """_build_tool_response_part(None, ...) must not include an 'id' key.""" + from opentelemetry.instrumentation.openai_agents._hooks import _build_tool_response_part + + part = _build_tool_response_part(None, "tool result") + + assert "id" not in part, ( + f"'id' must be absent when call_id is None, got {part!r}" + ) + assert part["type"] == "tool_call_response" + assert part["response"] == "tool result" + + def test_build_tool_response_part_includes_id_when_call_id_present(self): + """_build_tool_response_part with a real call_id must include 'id'.""" + from opentelemetry.instrumentation.openai_agents._hooks import _build_tool_response_part + + part = _build_tool_response_part("call_abc", "output") + + assert part["id"] == "call_abc" + + def test_agents_sdk_function_call_output_without_call_id_omits_id(self, tracer_and_exporter): + """Agents SDK function_call_output message with no call_id must omit id from the part.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"type": "function_call_output", "output": "the result"}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert len(msgs) == 1 + part = msgs[0]["parts"][0] + assert part["type"] == "tool_call_response" + assert "id" not in part, ( + f"'id' must be absent (not null) when call_id is missing, got {part!r}" + ) + span.end() + + def test_agents_sdk_function_call_output_with_call_id_includes_id(self, tracer_and_exporter): + """Agents SDK function_call_output with a call_id must include 'id' in the part.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"type": "function_call_output", "call_id": "call_99", "output": "done"}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + msgs = json.loads(raw) + part = msgs[0]["parts"][0] + assert part["id"] == "call_99" + span.end() + + +# --------------------------------------------------------------------------- +# Input messages with empty parts must be excluded +# --------------------------------------------------------------------------- + +class TestEmptyPartsExcluded: + """Messages that produce no parts must not appear in gen_ai.input.messages. + + A message with a role but no content and no tool_calls yields parts=[]. + Emitting {"role": "assistant", "parts": []} adds noise and may confuse + consumers that assume each message carries at least one part. + """ + + def test_message_with_no_content_and_no_tool_calls_excluded(self, tracer_and_exporter): + """A role-only message (no content, no tool_calls) must be excluded.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{"role": "assistant"}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + if raw is not None: + msgs = json.loads(raw) + for msg in msgs: + assert msg.get("parts"), ( + f"Message with empty parts must be excluded, got {msg!r}" + ) + span.end() + + def test_empty_message_excluded_valid_message_kept(self, tracer_and_exporter): + """Only the valid message is emitted when mixed with an empty-parts message.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [ + {"role": "assistant"}, # no content → parts=[] + {"role": "user", "content": "Hello"}, # valid + ] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert len(msgs) == 1, f"Expected 1 message (empty-parts excluded), got {len(msgs)}: {msgs}" + assert msgs[0]["role"] == "user" + span.end() + + def test_content_none_assistant_with_tool_calls_still_emitted(self, tracer_and_exporter): + """An assistant message with tool_calls but no text content must still be emitted.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "content": None, + "tool_calls": [{"id": "c1", "function": {"name": "search", "arguments": "{}"}}], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert len(msgs) == 1 + assert msgs[0]["parts"][0]["type"] == "tool_call" + + +# --------------------------------------------------------------------------- +# tool_call part must always carry a name (required by OTel ToolCallRequestPart) +# --------------------------------------------------------------------------- + +class TestToolCallNameAlwaysPresent: + """Every tool_call part in gen_ai.input.messages must include a 'name' field. + + OTel ToolCallRequestPart requires 'name'. When the upstream SDK object or + dict does not provide one, the instrumentation must fall back to an empty + string rather than silently omitting the key. + """ + + def test_tool_call_with_no_name_in_function_wrapper_gets_empty_name(self, tracer_and_exporter): + """A tool_call whose function wrapper has no name must produce name='' not a missing key.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "tool_calls": [{"id": "c1", "function": {"arguments": '{"x": 1}'}}], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + part = msgs[0]["parts"][0] + assert part["type"] == "tool_call" + assert "name" in part, f"'name' must always be present on tool_call parts, got: {part}" + span.end() + + def test_tool_call_with_none_name_attribute_gets_empty_name(self, tracer_and_exporter): + """A tool_call object whose name attribute resolves to None must produce name=''.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + from types import SimpleNamespace + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + tool_call = SimpleNamespace(id="c2", name=None, arguments="{}") + input_data = [{"role": "assistant", "tool_calls": [tool_call]}] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + part = msgs[0]["parts"][0] + assert part["type"] == "tool_call" + assert "name" in part, f"'name' must always be present on tool_call parts, got: {part}" + span.end() + + def test_tool_call_with_valid_name_preserves_name(self, tracer_and_exporter): + """Sanity: a tool_call with a proper name must still emit that name.""" + from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes + + tracer, _ = tracer_and_exporter + span = tracer.start_span("test") + + input_data = [{ + "role": "assistant", + "tool_calls": [{"id": "c3", "function": {"name": "get_weather", "arguments": '{"city": "NYC"}'}}], + }] + _extract_prompt_attributes(span, input_data, trace_content=True) + + raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + msgs = json.loads(raw) + part = msgs[0]["parts"][0] + assert part["name"] == "get_weather" + span.end() + span.end() diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_tracing_processor.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_tracing_processor.py new file mode 100644 index 0000000000..442a98d595 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_tracing_processor.py @@ -0,0 +1,1060 @@ +""" +TDD tests for on_span_start / on_span_end refactoring. + +Tests target the extracted helper methods: + on_span_start handlers: + _start_agent_span, _start_handoff_span, _start_function_span, + _start_generation_span, _start_realtime_span + on_span_end helpers: + _extract_tool_definitions (pure function) + _end_generation_span (method) + _set_realtime_io_attributes (method) + +These tests are written BEFORE the implementation (TDD). +""" + +import json +import pytest +from unittest.mock import MagicMock +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.trace import SpanKind +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv_ai import SpanAttributes, TraceloopSpanKindValues + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def tracer_and_exporter(): + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + return provider.get_tracer("test-refactor"), exporter + + +@pytest.fixture +def processor(tracer_and_exporter): + from opentelemetry.instrumentation.openai_agents._hooks import ( + OpenTelemetryTracingProcessor, + ) + tracer, _ = tracer_and_exporter + return OpenTelemetryTracingProcessor(tracer) + + +# --------------------------------------------------------------------------- +# Helpers: mock SDK span_data objects +# --------------------------------------------------------------------------- + +class MockAgentSpan: + """Minimal mock of an Agents SDK span object.""" + def __init__(self, span_data, trace_id="test-trace", error=None): + self.span_data = span_data + self.trace_id = trace_id + self.error = error + + +# --------------------------------------------------------------------------- +# Tests: _start_agent_span +# --------------------------------------------------------------------------- + +class TestStartAgentSpan: + """Unit tests for the extracted _start_agent_span handler.""" + + def test_returns_span_with_agent_attributes(self, tracer_and_exporter, processor): + """Must return a span named '{name}.agent' with correct attributes.""" + from agents import AgentSpanData + + tracer, exporter = tracer_and_exporter + agent_data = AgentSpanData(name="MyAgent", handoffs=[], tools=[], output_type="") + + otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t1") + + assert otel_span is not None + assert otel_span.name == "MyAgent.agent" + attrs = dict(otel_span.attributes) + assert attrs[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value + assert attrs[GenAIAttributes.GEN_AI_AGENT_NAME] == "MyAgent" + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + otel_span.end() + + def test_unknown_agent_name_defaults(self, tracer_and_exporter, processor): + """Agent with no name → 'unknown_agent'.""" + from agents import AgentSpanData + + agent_data = AgentSpanData(name=None, handoffs=[], tools=[], output_type="") + + otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t2") + + assert otel_span.name == "unknown_agent.agent" + assert otel_span.attributes[GenAIAttributes.GEN_AI_AGENT_NAME] == "unknown_agent" + + otel_span.end() + + def test_handoff_parent_attribute_set(self, tracer_and_exporter, processor): + """When a reverse handoff exists, handoff_parent must be set.""" + from agents import AgentSpanData + + # Pre-seed the reverse handoff dict + processor._reverse_handoffs_dict["TargetAgent:t3"] = "SourceAgent" + + agent_data = AgentSpanData(name="TargetAgent", handoffs=[], tools=[], output_type="") + otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t3") + + attrs = dict(otel_span.attributes) + assert attrs.get("gen_ai.agent.handoff_parent") == "SourceAgent" + # Consumed from the dict + assert "TargetAgent:t3" not in processor._reverse_handoffs_dict + + otel_span.end() + + def test_handoffs_list_serialized(self, tracer_and_exporter, processor): + """Handoff targets should be serialized as JSON attributes.""" + from agents import AgentSpanData + + mock_handoff_agent = MagicMock() + mock_handoff_agent.name = "AgentB" + mock_handoff_agent.instructions = "Help the user" + + agent_data = AgentSpanData(name="AgentA", handoffs=[mock_handoff_agent], tools=[], output_type="") + otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t4") + + attrs = dict(otel_span.attributes) + handoffs = json.loads(attrs["openai.agent.handoffs"]) + assert isinstance(handoffs, list) + assert handoffs[0]["name"] == "AgentB" + assert handoffs[0]["instructions"] == "Help the user" + + otel_span.end() + + def test_span_kind_is_internal(self, tracer_and_exporter, processor): + """Agent spans must be INTERNAL kind (in-process orchestration, not a remote call).""" + from agents import AgentSpanData + + agent_data = AgentSpanData(name="Agent", handoffs=[], tools=[], output_type="") + otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t5") + + assert otel_span.kind == SpanKind.INTERNAL + + otel_span.end() + + def test_agent_span_has_invoke_agent_operation_name(self, tracer_and_exporter, processor): + """Agent spans must set gen_ai.operation.name='invoke_agent' per OTel Agent Spans spec.""" + from agents import AgentSpanData + + agent_data = AgentSpanData(name="Agent", handoffs=[], tools=[], output_type="") + otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t6") + + attrs = dict(otel_span.attributes) + assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent" + + otel_span.end() + + def test_handoffs_collapsed_to_single_json_array(self, tracer_and_exporter, processor): + """Handoffs must be a single 'openai.agent.handoffs' JSON array, not indexed attributes.""" + from agents import AgentSpanData + + mock_a = MagicMock() + mock_a.name = "AgentA" + mock_a.instructions = "Does A" + mock_b = MagicMock() + mock_b.name = "AgentB" + mock_b.instructions = "Does B" + + agent_data = AgentSpanData(name="Router", handoffs=[mock_a, mock_b], tools=[], output_type="") + otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t7") + + attrs = dict(otel_span.attributes) + assert "openai.agent.handoff0" not in attrs + assert "openai.agent.handoff1" not in attrs + + handoffs = json.loads(attrs["openai.agent.handoffs"]) + assert isinstance(handoffs, list) and len(handoffs) == 2 + assert handoffs[0]["name"] == "AgentA" + assert handoffs[1]["name"] == "AgentB" + + otel_span.end() + + +# --------------------------------------------------------------------------- +# Tests: _start_handoff_span +# --------------------------------------------------------------------------- + +class TestStartHandoffSpan: + """Unit tests for the extracted _start_handoff_span handler.""" + + def test_returns_span_with_handoff_attributes(self, tracer_and_exporter, processor): + """Must create a span named '{from} → {to}.handoff'.""" + from agents import HandoffSpanData + + handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB") + + otel_span = processor._start_handoff_span( + handoff_data, parent_context=None, trace_id="t1", + ) + + assert otel_span is not None + assert otel_span.name == "AgentA → AgentB.handoff" + attrs = dict(otel_span.attributes) + assert attrs[SpanAttributes.TRACELOOP_SPAN_KIND] == "handoff" + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + otel_span.end() + + def test_from_and_to_agent_attributes(self, tracer_and_exporter, processor): + """from_agent and to_agent must be set as attributes.""" + from agents import HandoffSpanData + from opentelemetry.instrumentation.openai_agents.utils import ( + GEN_AI_HANDOFF_FROM_AGENT, + GEN_AI_HANDOFF_TO_AGENT, + ) + + handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB") + + otel_span = processor._start_handoff_span( + handoff_data, parent_context=None, trace_id="t2", + ) + + attrs = dict(otel_span.attributes) + assert attrs[GEN_AI_HANDOFF_FROM_AGENT] == "AgentA" + assert attrs[GEN_AI_HANDOFF_TO_AGENT] == "AgentB" + + otel_span.end() + + def test_registers_reverse_handoff(self, tracer_and_exporter, processor): + """Must register reverse handoff for the target agent.""" + from agents import HandoffSpanData + + handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB") + + processor._start_handoff_span( + handoff_data, parent_context=None, trace_id="trace-123", + ) + + assert processor._reverse_handoffs_dict.get("AgentB:trace-123") == "AgentA" + + def test_unknown_agents_fallback(self, tracer_and_exporter, processor): + """None agent names → 'unknown' in span name.""" + from agents import HandoffSpanData + + handoff_data = HandoffSpanData(from_agent=None, to_agent=None) + + otel_span = processor._start_handoff_span( + handoff_data, parent_context=None, trace_id="t3", + ) + + assert "unknown" in otel_span.name + + otel_span.end() + + def test_span_kind_is_internal(self, tracer_and_exporter, processor): + """Handoff spans must be INTERNAL kind.""" + from agents import HandoffSpanData + + handoff_data = HandoffSpanData(from_agent="A", to_agent="B") + otel_span = processor._start_handoff_span( + handoff_data, parent_context=None, trace_id="t4", + ) + + assert otel_span.kind == SpanKind.INTERNAL + + otel_span.end() + + +# --------------------------------------------------------------------------- +# Tests: _start_function_span +# --------------------------------------------------------------------------- + +class TestStartFunctionSpan: + """Unit tests for the extracted _start_function_span handler.""" + + def test_returns_span_with_tool_attributes(self, tracer_and_exporter, processor): + """Must return a span named '{tool}.tool' with tool attributes.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData(name="get_weather", input="", output="") + + otel_span = processor._start_function_span(func_data, parent_context=None) + + assert otel_span is not None + assert otel_span.name == "get_weather.tool" + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_TOOL_NAME] == "get_weather" + assert attrs[GenAIAttributes.GEN_AI_TOOL_TYPE] == "function" + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + assert attrs[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.TOOL.value + + otel_span.end() + + def test_unknown_tool_name_defaults(self, tracer_and_exporter, processor): + """Tool with no name → 'unknown_tool'.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData(name=None, input="", output="") + + otel_span = processor._start_function_span(func_data, parent_context=None) + + assert otel_span.name == "unknown_tool.tool" + + otel_span.end() + + def test_description_attribute_set(self, tracer_and_exporter, processor): + """Non-generic descriptions must appear as GEN_AI_TOOL_DESCRIPTION.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData(name="search", input="", output="") + func_data.description = "Search the web for information" + + otel_span = processor._start_function_span(func_data, parent_context=None) + + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_TOOL_DESCRIPTION] == "Search the web for information" + + otel_span.end() + + def test_generic_description_filtered_out(self, tracer_and_exporter, processor): + """Descriptions starting with 'Represents a Function Span' must be ignored.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData(name="search", input="", output="") + func_data.description = "Represents a Function Span for search" + + otel_span = processor._start_function_span(func_data, parent_context=None) + + attrs = dict(otel_span.attributes) + assert GenAIAttributes.GEN_AI_TOOL_DESCRIPTION not in attrs + + otel_span.end() + + def test_span_kind_is_internal(self, tracer_and_exporter, processor): + """Function/tool spans must be INTERNAL kind.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData(name="tool", input="", output="") + otel_span = processor._start_function_span(func_data, parent_context=None) + + assert otel_span.kind == SpanKind.INTERNAL + + otel_span.end() + + +# --------------------------------------------------------------------------- +# Tests: _end_function_span +# --------------------------------------------------------------------------- + +class TestEndFunctionSpan: + """Unit tests for _end_function_span — sets tool call arguments/result.""" + + def test_sets_tool_call_arguments_and_result(self, tracer_and_exporter, processor): + """Must set gen_ai.tool.call.arguments and gen_ai.tool.call.result.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData( + name="get_weather", input='{"city": "NYC"}', output='{"temp": 72}' + ) + otel_span = processor._start_function_span(func_data, parent_context=None) + processor._end_function_span(otel_span, func_data, trace_content=True) + otel_span.end() + + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS] == '{"city": "NYC"}' + assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT] == '{"temp": 72}' + + def test_content_gated_when_false(self, tracer_and_exporter, processor): + """Must NOT set arguments/result when trace_content is False.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData( + name="get_weather", input='{"city": "NYC"}', output='{"temp": 72}' + ) + otel_span = processor._start_function_span(func_data, parent_context=None) + processor._end_function_span(otel_span, func_data, trace_content=False) + otel_span.end() + + attrs = dict(otel_span.attributes) + assert GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS not in attrs + assert GenAIAttributes.GEN_AI_TOOL_CALL_RESULT not in attrs + + def test_none_input_output_omitted(self, tracer_and_exporter, processor): + """None input/output must not produce attributes.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData(name="noop", input=None, output=None) + otel_span = processor._start_function_span(func_data, parent_context=None) + processor._end_function_span(otel_span, func_data, trace_content=True) + otel_span.end() + + attrs = dict(otel_span.attributes) + assert GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS not in attrs + assert GenAIAttributes.GEN_AI_TOOL_CALL_RESULT not in attrs + + def test_non_string_output_coerced(self, tracer_and_exporter, processor): + """Non-string output must be str()-converted.""" + from agents import FunctionSpanData + + func_data = FunctionSpanData(name="calc", input="2+2", output=4) + otel_span = processor._start_function_span(func_data, parent_context=None) + processor._end_function_span(otel_span, func_data, trace_content=True) + otel_span.end() + + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "2+2" + assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT] == "4" + + +# --------------------------------------------------------------------------- +# Tests: _start_generation_span +# --------------------------------------------------------------------------- + +class TestStartGenerationSpan: + """Unit tests for the extracted _start_generation_span handler.""" + + def test_returns_span_with_chat_attributes(self, tracer_and_exporter, processor): + """GenerationSpanData (no span_data) → operation_name=chat.""" + otel_span = processor._start_generation_span(parent_context=None) + + assert otel_span is not None + assert otel_span.name == "openai.response" + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat" + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + otel_span.end() + + def test_response_span_data_uses_chat_operation(self, tracer_and_exporter, processor): + """ResponseSpanData (OpenAI Responses API) must emit 'chat' as operation name. + + 'generate_content' is the GCP/Gemini well-known value; it must not be used for + an OpenAI Responses API span, which is a chat completion surface. + """ + class ResponseSpanData: + model = "gpt-4o" + + otel_span = processor._start_generation_span( + parent_context=None, span_data=ResponseSpanData() + ) + + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", ( + f"ResponseSpanData (OpenAI Responses API) must emit 'chat', " + f"got '{attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}'" + ) + + otel_span.end() + + def test_span_kind_is_client(self, tracer_and_exporter, processor): + """Generation/response spans must be CLIENT kind.""" + otel_span = processor._start_generation_span(parent_context=None) + + assert otel_span.kind == SpanKind.CLIENT + + otel_span.end() + + +# --------------------------------------------------------------------------- +# Tests: _start_realtime_span +# --------------------------------------------------------------------------- + +class TestStartRealtimeSpan: + """Unit tests for the extracted _start_realtime_span handler.""" + + def test_speech_span_attributes(self, tracer_and_exporter, processor): + """Speech span must have correct name and operation.""" + span_data = MagicMock() + span_data.model = "gpt-4o-realtime-preview" + + otel_span = processor._start_realtime_span( + span_data, parent_context=None, + span_name="openai.realtime.speech", operation="speech", + ) + + assert otel_span is not None + assert otel_span.name == "openai.realtime.speech" + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "speech" + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "gpt-4o-realtime-preview" + + otel_span.end() + + def test_transcription_span_attributes(self, tracer_and_exporter, processor): + """Transcription span must have correct name and operation.""" + span_data = MagicMock() + span_data.model = "whisper-1" + + otel_span = processor._start_realtime_span( + span_data, parent_context=None, + span_name="openai.realtime.transcription", operation="transcription", + ) + + assert otel_span.name == "openai.realtime.transcription" + attrs = dict(otel_span.attributes) + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "transcription" + assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "whisper-1" + + otel_span.end() + + def test_speech_group_span_no_model(self, tracer_and_exporter, processor): + """Speech group span with no model → model attribute omitted.""" + span_data = MagicMock(spec=[]) # no attributes at all + + otel_span = processor._start_realtime_span( + span_data, parent_context=None, + span_name="openai.realtime.speech_group", operation="speech_group", + ) + + assert otel_span.name == "openai.realtime.speech_group" + attrs = dict(otel_span.attributes) + assert GenAIAttributes.GEN_AI_REQUEST_MODEL not in attrs + + otel_span.end() + + def test_span_kind_is_client(self, tracer_and_exporter, processor): + """All realtime spans must be CLIENT kind.""" + span_data = MagicMock(spec=[]) + + otel_span = processor._start_realtime_span( + span_data, parent_context=None, + span_name="openai.realtime.speech", operation="speech", + ) + + assert otel_span.kind == SpanKind.CLIENT + + otel_span.end() + + +# --------------------------------------------------------------------------- +# Tests: _extract_tool_definitions (pure function) +# --------------------------------------------------------------------------- + +class TestExtractToolDefinitions: + """Unit tests for the extracted _extract_tool_definitions helper.""" + + def test_function_wrapped_tool(self): + """Tool with .function wrapper → {type, function: {name, description, parameters}}.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_tool_definitions, + ) + + func = MagicMock() + func.name = "get_weather" + func.description = "Get weather data" + func.parameters = {"type": "object", "properties": {"city": {"type": "string"}}} + + tool = MagicMock() + tool.function = func + tool.type = "function" + + result = _extract_tool_definitions([tool]) + + assert len(result) == 1 + assert result[0]["type"] == "function" + assert result[0]["function"]["name"] == "get_weather" + assert result[0]["function"]["description"] == "Get weather data" + assert "properties" in result[0]["function"]["parameters"] + + def test_direct_function_tool(self): + """Tool with direct .name (no .function wrapper) → wrapped {type, function} shape.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_tool_definitions, + ) + + tool = MagicMock(spec=["name", "description", "parameters"]) + tool.name = "search" + tool.description = "Search the web" + tool.parameters = {"type": "object"} + + result = _extract_tool_definitions([tool]) + + assert len(result) == 1 + assert result[0]["type"] == "function" + assert result[0]["function"]["name"] == "search" + assert result[0]["function"]["description"] == "Search the web" + + def test_empty_tools_list(self): + """Empty tools list → empty result.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_tool_definitions, + ) + + result = _extract_tool_definitions([]) + assert result == [] + + def test_none_tools(self): + """None tools → empty result.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_tool_definitions, + ) + + result = _extract_tool_definitions(None) + assert result == [] + + def test_mixed_tool_formats(self): + """Mix of function-wrapped and direct tools.""" + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_tool_definitions, + ) + + func = MagicMock() + func.name = "tool_a" + func.description = "Tool A" + func.parameters = {} + + wrapped = MagicMock() + wrapped.function = func + wrapped.type = "function" + + direct = MagicMock(spec=["name", "description"]) + direct.name = "tool_b" + direct.description = "Tool B" + + result = _extract_tool_definitions([wrapped, direct]) + assert len(result) == 2 + for d in result: + assert d["type"] == "function" + assert "function" in d + names = {r["function"]["name"] for r in result} + assert names == {"tool_a", "tool_b"} + + def test_both_branches_produce_consistent_wrapped_shape(self): + """Both function-wrapped and direct-function tools must produce the same shape. + + Semconv note [14]: gen_ai.tool.definitions should use source system's + representation. For OpenAI that's always {type:'function', function:{...}}. + """ + from opentelemetry.instrumentation.openai_agents._hooks import ( + _extract_tool_definitions, + ) + from types import SimpleNamespace + + wrapped = SimpleNamespace( + type="function", + function=SimpleNamespace( + name="search", description="Search things", parameters={"type": "object"} + ), + ) + direct = SimpleNamespace( + name="lookup", description="Look up things", parameters={"type": "object"} + ) + + defs = _extract_tool_definitions([wrapped, direct]) + + assert len(defs) == 2 + for d in defs: + assert d["type"] == "function", f"Missing type wrapper: {d}" + assert "function" in d, f"Missing function wrapper: {d}" + assert "name" in d["function"] + + +# --------------------------------------------------------------------------- +# Tests: _end_generation_span +# --------------------------------------------------------------------------- + +class TestEndGenerationSpan: + """Unit tests for the extracted _end_generation_span method.""" + + def test_extracts_prompt_attributes(self, tracer_and_exporter, processor): + """Must call _extract_prompt_attributes with input data.""" + tracer, exporter = tracer_and_exporter + otel_span = tracer.start_span("test-gen") + + span_data = MagicMock() + span_data.input = [{"role": "user", "content": "Hello"}] + span_data.response = None + + processor._end_generation_span(otel_span, span_data, trace_content=True) + + raw = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + messages = json.loads(raw) + assert messages[0]["role"] == "user" + + otel_span.end() + + def test_extracts_tool_definitions(self, tracer_and_exporter, processor): + """Must extract and set tool definitions from response.tools.""" + tracer, exporter = tracer_and_exporter + otel_span = tracer.start_span("test-gen") + + func = MagicMock() + func.name = "search" + func.description = "Search" + func.parameters = {} + tool = MagicMock() + tool.function = func + tool.type = "function" + + response = MagicMock() + response.tools = [tool] + response.output = [] + response.model = "gpt-4o" + response.id = "resp-1" + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.frequency_penalty = None + response.finish_reason = "stop" + response.usage = None + + span_data = MagicMock() + span_data.input = [] + span_data.response = response + span_data.tools = None + + processor._end_generation_span(otel_span, span_data, trace_content=True) + + raw = otel_span.attributes.get(GenAIAttributes.GEN_AI_TOOL_DEFINITIONS) + assert raw is not None + defs = json.loads(raw) + assert len(defs) == 1 + assert defs[0]["function"]["name"] == "search" + + otel_span.end() + + def test_no_tool_definitions_when_content_gated(self, tracer_and_exporter, processor): + """Tool definitions must NOT be set when trace_content=False.""" + tracer, exporter = tracer_and_exporter + otel_span = tracer.start_span("test-gen") + + func = MagicMock() + func.name = "search" + func.description = "Search" + func.parameters = {} + tool = MagicMock() + tool.function = func + tool.type = "function" + + response = MagicMock() + response.tools = [tool] + response.output = [] + response.model = "gpt-4o" + response.id = "resp-1" + response.temperature = None + response.max_output_tokens = None + response.top_p = None + response.frequency_penalty = None + response.finish_reason = "stop" + response.usage = None + + span_data = MagicMock() + span_data.input = [] + span_data.response = response + + processor._end_generation_span(otel_span, span_data, trace_content=False) + + assert GenAIAttributes.GEN_AI_TOOL_DEFINITIONS not in otel_span.attributes + + otel_span.end() + + def test_extracts_response_attributes(self, tracer_and_exporter, processor): + """Must extract response model, id, etc.""" + tracer, exporter = tracer_and_exporter + otel_span = tracer.start_span("test-gen") + + content_item = MagicMock() + content_item.type = "output_text" + content_item.text = "Hello!" + + output_msg = MagicMock() + output_msg.type = "message" + output_msg.content = [content_item] + output_msg.role = "assistant" + output_msg.name = None + + response = MagicMock() + response.tools = [] + response.output = [output_msg] + response.model = "gpt-4o-mini" + response.id = "resp-abc" + response.temperature = 0.7 + response.max_output_tokens = 100 + response.top_p = 1.0 + response.frequency_penalty = None + response.finish_reason = "stop" + response.usage = None + + span_data = MagicMock() + span_data.input = [] + span_data.response = response + + processor._end_generation_span(otel_span, span_data, trace_content=True) + + assert otel_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "gpt-4o-mini" + assert otel_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_ID) == "resp-abc" + + otel_span.end() + + def test_no_response_no_crash(self, tracer_and_exporter, processor): + """span_data.response=None must not raise.""" + tracer, exporter = tracer_and_exporter + otel_span = tracer.start_span("test-gen") + + span_data = MagicMock() + span_data.input = [] + span_data.response = None + + # Should not raise + processor._end_generation_span(otel_span, span_data, trace_content=True) + + otel_span.end() + + def test_tools_sourced_from_span_data_over_response(self, tracer_and_exporter, processor): + """Tool definitions should come from span_data (request), not response. + + Tools are request metadata; the response may not always echo them. + """ + from types import SimpleNamespace + + tracer, exporter = tracer_and_exporter + otel_span = tracer.start_span("test-gen") + + request_tool = SimpleNamespace( + type="function", + function=SimpleNamespace(name="from_request", description="Request", parameters={}), + ) + response_tool = SimpleNamespace( + type="function", + function=SimpleNamespace(name="from_response", description="Response", parameters={}), + ) + + span_data = SimpleNamespace( + input=[], + response=SimpleNamespace( + temperature=None, max_output_tokens=None, top_p=None, + model="gpt-4o", id="resp_1", frequency_penalty=None, + finish_reason=None, status="completed", output=[], usage=None, + tools=[response_tool], + ), + tools=[request_tool], + model="gpt-4o", + ) + + processor._end_generation_span(otel_span, span_data, trace_content=True) + + raw = otel_span.attributes.get(GenAIAttributes.GEN_AI_TOOL_DEFINITIONS) + assert raw is not None + defs = json.loads(raw) + tool_names = [d["function"]["name"] for d in defs] + assert "from_request" in tool_names, f"Expected request tool, got: {tool_names}" + + otel_span.end() + + +# --------------------------------------------------------------------------- +# Tests: _set_realtime_io_attributes +# --------------------------------------------------------------------------- + +class TestSetRealtimeIOAttributes: + """Unit tests for the extracted _set_realtime_io_attributes method.""" + + def test_speech_span_input_and_output(self, tracer_and_exporter, processor): + """SpeechSpanData with input text and output text → both messages set.""" + tracer, _ = tracer_and_exporter + otel_span = tracer.start_span("test-rt") + + span_data = MagicMock() + span_data.input = "What is the weather?" + span_data.output = "It's sunny." + + processor._set_realtime_io_attributes(otel_span, span_data, has_output=True) + + raw_in = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw_in is not None + in_msgs = json.loads(raw_in) + assert in_msgs[0]["role"] == "user" + assert in_msgs[0]["parts"][0]["content"] == "What is the weather?" + + raw_out = otel_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw_out is not None + out_msgs = json.loads(raw_out) + assert out_msgs[0]["role"] == "assistant" + assert out_msgs[0]["parts"][0]["content"] == "It's sunny." + + otel_span.end() + + def test_transcription_span_input_and_output(self, tracer_and_exporter, processor): + """TranscriptionSpanData with audio input (non-binary) and text output.""" + tracer, _ = tracer_and_exporter + otel_span = tracer.start_span("test-rt") + + span_data = MagicMock() + span_data.input = "audio-description-text" + span_data.output = "Transcribed text here" + + processor._set_realtime_io_attributes(otel_span, span_data, has_output=True) + + raw_in = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw_in is not None + + raw_out = otel_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw_out is not None + + otel_span.end() + + def test_binary_input_skipped(self, tracer_and_exporter, processor): + """Binary input (bytes/bytearray) must NOT be set as input message.""" + tracer, _ = tracer_and_exporter + otel_span = tracer.start_span("test-rt") + + span_data = MagicMock() + span_data.input = b"\x00\x01\x02" + span_data.output = "Transcribed" + + processor._set_realtime_io_attributes(otel_span, span_data, has_output=True) + + assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in otel_span.attributes + + otel_span.end() + + def test_binary_output_skipped(self, tracer_and_exporter, processor): + """Binary output (bytes/bytearray) must NOT be set as output message.""" + tracer, _ = tracer_and_exporter + otel_span = tracer.start_span("test-rt") + + span_data = MagicMock() + span_data.input = "Hello" + span_data.output = b"\x00\x01\x02" + + processor._set_realtime_io_attributes(otel_span, span_data, has_output=True) + + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in otel_span.attributes + + otel_span.end() + + def test_speech_group_no_output(self, tracer_and_exporter, processor): + """SpeechGroupSpanData with has_output=False → only input set.""" + tracer, _ = tracer_and_exporter + otel_span = tracer.start_span("test-rt") + + span_data = MagicMock() + span_data.input = "Group input" + span_data.output = None + + processor._set_realtime_io_attributes(otel_span, span_data, has_output=False) + + raw_in = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw_in is not None + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in otel_span.attributes + + otel_span.end() + + def test_none_input_skipped(self, tracer_and_exporter, processor): + """None input → no input message attribute.""" + tracer, _ = tracer_and_exporter + otel_span = tracer.start_span("test-rt") + + span_data = MagicMock() + span_data.input = None + span_data.output = "Output text" + + processor._set_realtime_io_attributes(otel_span, span_data, has_output=True) + + assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in otel_span.attributes + + otel_span.end() + + def test_output_has_finish_reason_empty(self, tracer_and_exporter, processor): + """Realtime output messages must include finish_reason: '' (empty string).""" + tracer, _ = tracer_and_exporter + otel_span = tracer.start_span("test-rt") + + span_data = MagicMock() + span_data.input = None + span_data.output = "Some output" + + processor._set_realtime_io_attributes(otel_span, span_data, has_output=True) + + raw_out = otel_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + out_msgs = json.loads(raw_out) + assert out_msgs[0]["finish_reason"] == "" + + otel_span.end() + + +# --------------------------------------------------------------------------- +# Integration: on_span_start/on_span_end still work end-to-end +# (These confirm refactoring doesn't break the public API) +# --------------------------------------------------------------------------- + +class TestOnSpanStartEndToEnd: + """Verify on_span_start dispatches correctly after refactoring.""" + + def _run_span(self, processor, exporter, span_data, trace_id="e2e-trace"): + mock_trace = MagicMock() + mock_trace.trace_id = trace_id + processor.on_trace_start(mock_trace) + + span = MockAgentSpan(span_data, trace_id=trace_id) + processor.on_span_start(span) + processor.on_span_end(span) + processor.on_trace_end(mock_trace) + + return exporter.get_finished_spans() + + def test_agent_span_created(self, tracer_and_exporter, processor): + from agents import AgentSpanData + _, exporter = tracer_and_exporter + + spans = self._run_span( + processor, exporter, + AgentSpanData(name="TestAgent", handoffs=[], tools=[], output_type=""), + ) + names = [s.name for s in spans] + assert "TestAgent.agent" in names + + def test_handoff_span_created(self, tracer_and_exporter, processor): + from agents import HandoffSpanData + _, exporter = tracer_and_exporter + + spans = self._run_span( + processor, exporter, + HandoffSpanData(from_agent="A", to_agent="B"), + ) + names = [s.name for s in spans] + assert any("handoff" in n for n in names) + + def test_function_span_created(self, tracer_and_exporter, processor): + from agents import FunctionSpanData + _, exporter = tracer_and_exporter + + spans = self._run_span( + processor, exporter, + FunctionSpanData(name="my_tool", input="", output=""), + ) + names = [s.name for s in spans] + assert "my_tool.tool" in names + + def test_generation_span_created(self, tracer_and_exporter, processor): + from agents import GenerationSpanData + _, exporter = tracer_and_exporter + + spans = self._run_span( + processor, exporter, + GenerationSpanData(model="gpt-4o", model_config={}), + ) + names = [s.name for s in spans] + assert "openai.response" in names + + def test_error_status_propagated(self, tracer_and_exporter, processor): + from agents import FunctionSpanData + _, exporter = tracer_and_exporter + + mock_trace = MagicMock() + mock_trace.trace_id = "err-trace" + processor.on_trace_start(mock_trace) + + span_data = FunctionSpanData(name="fail_tool", input="", output="") + span = MockAgentSpan(span_data, trace_id="err-trace", error=RuntimeError("boom")) + processor.on_span_start(span) + processor.on_span_end(span) + processor.on_trace_end(mock_trace) + + spans = exporter.get_finished_spans() + tool_span = next(s for s in spans if s.name == "fail_tool.tool") + assert tool_span.status.status_code.name == "ERROR" diff --git a/packages/opentelemetry-instrumentation-openai-agents/uv.lock b/packages/opentelemetry-instrumentation-openai-agents/uv.lock index 0714339f61..a91d806b71 100644 --- a/packages/opentelemetry-instrumentation-openai-agents/uv.lock +++ b/packages/opentelemetry-instrumentation-openai-agents/uv.lock @@ -698,15 +698,12 @@ wheels = [ ] [[package]] -name = "griffe" -version = "1.15.0" +name = "griffelib" +version = "2.0.2" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0d/0c/3a471b6e31951dce2360477420d0a8d1e00dea6cf33b70f3e8c3ab6e28e1/griffe-1.15.0.tar.gz", hash = "sha256:7726e3afd6f298fbc3696e67958803e7ac843c1cfe59734b6251a40cdbfb5eea", size = 424112, upload-time = "2025-11-10T15:03:15.52Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/82/74f4a3310cdabfbb10da554c3a672847f1ed33c6f61dd472681ce7f1fe67/griffelib-2.0.2.tar.gz", hash = "sha256:3cf20b3bc470e83763ffbf236e0076b1211bac1bc67de13daf494640f2de707e", size = 166461, upload-time = "2026-03-27T11:34:51.091Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/83/3b1d03d36f224edded98e9affd0467630fc09d766c0e56fb1498cbb04a9b/griffe-1.15.0-py3-none-any.whl", hash = "sha256:6f6762661949411031f5fcda9593f586e6ce8340f0ba88921a0f2ef7a81eb9a3", size = 150705, upload-time = "2025-11-10T15:03:13.549Z" }, + { url = "https://files.pythonhosted.org/packages/11/8c/c9138d881c79aa0ea9ed83cbd58d5ca75624378b38cee225dcf5c42cc91f/griffelib-2.0.2-py3-none-any.whl", hash = "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1", size = 142357, upload-time = "2026-03-27T11:34:46.275Z" }, ] [[package]] @@ -1244,7 +1241,7 @@ wheels = [ [[package]] name = "openai" -version = "2.20.0" +version = "2.32.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1256,27 +1253,28 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6e/5a/f495777c02625bfa18212b6e3b73f1893094f2bf660976eb4bc6f43a1ca2/openai-2.20.0.tar.gz", hash = "sha256:2654a689208cd0bf1098bb9462e8d722af5cbe961e6bba54e6f19fb843d88db1", size = 642355, upload-time = "2026-02-10T19:02:54.145Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/59/bdcc6b759b8c42dd73afaf5bf8f902c04b37987a5514dbc1c64dba390fef/openai-2.32.0.tar.gz", hash = "sha256:c54b27a9e4cb8d51f0dd94972ffd1a04437efeb259a9e60d8922b8bd26fe55e0", size = 693286, upload-time = "2026-04-15T22:28:19.434Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/a0/cf4297aa51bbc21e83ef0ac018947fa06aea8f2364aad7c96cbf148590e6/openai-2.20.0-py3-none-any.whl", hash = "sha256:38d989c4b1075cd1f76abc68364059d822327cf1a932531d429795f4fc18be99", size = 1098479, upload-time = "2026-02-10T19:02:52.157Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c1/d6e64ccd0536bf616556f0cad2b6d94a8125f508d25cfd814b1d2db4e2f1/openai-2.32.0-py3-none-any.whl", hash = "sha256:4dcc9badeb4bf54ad0d187453742f290226d30150890b7890711bda4f32f192f", size = 1162570, upload-time = "2026-04-15T22:28:17.714Z" }, ] [[package]] name = "openai-agents" -version = "0.8.4" +version = "0.14.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "griffe" }, + { name = "griffelib" }, { name = "mcp" }, { name = "openai" }, { name = "pydantic" }, { name = "requests" }, { name = "types-requests" }, { name = "typing-extensions" }, + { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ed/e0/9fa9eac9baf2816bc63cee28967d35a7ed9dc2f25e9fd2004f48ed6c8820/openai_agents-0.8.4.tar.gz", hash = "sha256:5d4c4861aedd56a82b15c6ddf6c53031a39859a222f08bbd5645d5967efa05e8", size = 2389744, upload-time = "2026-02-11T19:14:30.75Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e2/2c/74fea5c1b20f107d19742af400045542e17eb449d044f950758a6f68f32f/openai_agents-0.14.2.tar.gz", hash = "sha256:fd4e1ae84e00aef4339ad39913468a0da34cba8ff5382925d38028ac3b435ef2", size = 5288888, upload-time = "2026-04-18T00:17:39.709Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/55/dc/10df015aebb0797a8367aab65200ac4f5221df20bbae76930f5b6ac8e001/openai_agents-0.8.4-py3-none-any.whl", hash = "sha256:2383c6e8e59ed4146b89d1b6f53e34e55caf94bc14ae3fd704e7aad5021f4ff1", size = 380662, upload-time = "2026-02-11T19:14:28.864Z" }, + { url = "https://files.pythonhosted.org/packages/0b/02/bb3ddff9bca543cbcca0d362a645b03a0708b6c2cd6eb620d5f3de810bb3/openai_agents-0.14.2-py3-none-any.whl", hash = "sha256:acd75132f2ec70413a4e95680b629ccbc6f4f5d0d63414980153de09d409e695", size = 807364, upload-time = "2026-04-18T00:17:37.505Z" }, ] [[package]] @@ -1309,7 +1307,7 @@ wheels = [ [[package]] name = "opentelemetry-instrumentation-openai-agents" -version = "0.53.3" +version = "0.59.2" source = { editable = "." } dependencies = [ { name = "opentelemetry-api" }, @@ -1360,7 +1358,7 @@ dev = [ ] test = [ { name = "litellm", specifier = ">=1.71.2,<2" }, - { name = "openai-agents", specifier = ">=0.6.9" }, + { name = "openai-agents", specifier = ">=0.14.2" }, { name = "opentelemetry-sdk", specifier = ">=1.38.0,<2" }, { name = "pytest", specifier = ">=8.2.2,<9" }, { name = "pytest-asyncio", specifier = ">=1.0.0,<2" }, @@ -2491,6 +2489,65 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/d7/f79b05a5d728f8786876a7d75dfb0c5cae27e428081b2d60152fb52f155f/vcrpy-8.1.1-py3-none-any.whl", hash = "sha256:2d16f31ad56493efb6165182dd99767207031b0da3f68b18f975545ede8ac4b9", size = 42445, upload-time = "2026-01-04T19:22:02.532Z" }, ] +[[package]] +name = "websockets" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423, upload-time = "2025-03-05T20:01:35.363Z" }, + { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080, upload-time = "2025-03-05T20:01:37.304Z" }, + { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329, upload-time = "2025-03-05T20:01:39.668Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312, upload-time = "2025-03-05T20:01:41.815Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319, upload-time = "2025-03-05T20:01:43.967Z" }, + { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631, upload-time = "2025-03-05T20:01:46.104Z" }, + { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016, upload-time = "2025-03-05T20:01:47.603Z" }, + { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426, upload-time = "2025-03-05T20:01:48.949Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360, upload-time = "2025-03-05T20:01:50.938Z" }, + { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388, upload-time = "2025-03-05T20:01:52.213Z" }, + { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830, upload-time = "2025-03-05T20:01:53.922Z" }, + { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423, upload-time = "2025-03-05T20:01:56.276Z" }, + { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082, upload-time = "2025-03-05T20:01:57.563Z" }, + { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330, upload-time = "2025-03-05T20:01:59.063Z" }, + { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878, upload-time = "2025-03-05T20:02:00.305Z" }, + { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883, upload-time = "2025-03-05T20:02:03.148Z" }, + { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252, upload-time = "2025-03-05T20:02:05.29Z" }, + { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521, upload-time = "2025-03-05T20:02:07.458Z" }, + { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958, upload-time = "2025-03-05T20:02:09.842Z" }, + { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918, upload-time = "2025-03-05T20:02:11.968Z" }, + { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388, upload-time = "2025-03-05T20:02:13.32Z" }, + { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828, upload-time = "2025-03-05T20:02:14.585Z" }, + { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" }, + { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" }, + { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" }, + { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" }, + { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" }, + { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" }, + { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" }, + { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" }, + { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" }, + { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" }, + { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" }, + { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" }, + { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" }, + { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" }, + { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" }, + { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109, upload-time = "2025-03-05T20:03:17.769Z" }, + { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343, upload-time = "2025-03-05T20:03:19.094Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599, upload-time = "2025-03-05T20:03:21.1Z" }, + { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207, upload-time = "2025-03-05T20:03:23.221Z" }, + { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155, upload-time = "2025-03-05T20:03:25.321Z" }, + { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884, upload-time = "2025-03-05T20:03:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, +] + [[package]] name = "wrapt" version = "1.17.3" diff --git a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py index 453e34118d..4650a19e1e 100644 --- a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py +++ b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py @@ -377,6 +377,45 @@ def test_core_metrics_unchanged(self): assert Meters.LLM_OPERATION_DURATION == "gen_ai.client.operation.duration" +# --------------------------------------------------------------------------- +# Upstream OTel GenAI constants — message & tool attributes +# --------------------------------------------------------------------------- + + +class TestUpstreamGenAIMessageAttributes: + """Verify upstream OTel constants for messages/tools are importable and correct.""" + + def test_gen_ai_input_messages(self): + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + assert GenAIAttributes.GEN_AI_INPUT_MESSAGES == "gen_ai.input.messages" + + def test_gen_ai_output_messages(self): + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES == "gen_ai.output.messages" + + def test_gen_ai_tool_definitions(self): + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + assert GenAIAttributes.GEN_AI_TOOL_DEFINITIONS == "gen_ai.tool.definitions" + + def test_gen_ai_provider_name(self): + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + assert GenAIAttributes.GEN_AI_PROVIDER_NAME == "gen_ai.provider.name" + + def test_gen_ai_operation_name(self): + from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + ) + assert GenAIAttributes.GEN_AI_OPERATION_NAME == "gen_ai.operation.name" + + class TestMetersVendorNamespacesKept: """ Vendor-qualified metric names (llm.openai.*, llm.anthropic.*, llm.watsonx.*)