diff --git a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py
index e3805cb209..5282560386 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py
+++ b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_hooks.py
@@ -24,6 +24,8 @@
     dont_throw,
     GEN_AI_HANDOFF_FROM_AGENT,
     GEN_AI_HANDOFF_TO_AGENT,
+    GEN_AI_HANDOFF_PARENT_AGENT,
+    OPENAI_AGENT_HANDOFFS,
 )
 
 try:
@@ -45,139 +47,346 @@
     SpeechGroupSpanData = None
 
 
-def _extract_prompt_attributes(otel_span, input_data, trace_content: bool):
+# ---------------------------------------------------------------------------
+# Finish-reason mapping: OpenAI → OTel GenAI semconv
+# ---------------------------------------------------------------------------
+_FINISH_REASON_MAP = {
+    "stop": "stop",
+    "tool_calls": "tool_call",    # plural → singular per OTel spec
+    "function_call": "tool_call",  # legacy → OTel value
+    "length": "length",
+    "content_filter": "content_filter",
+    "error": "error",
+    # Responses API uses status instead of finish_reason
+    "completed": "stop",
+    "failed": "error",
+    "cancelled": "cancelled",  # distinct from error; preserved as extension string
+    "incomplete": "incomplete",  # may be content_filter or token limit; preserve semantics
+}
+
+
+def _map_finish_reason(raw):
+    """Map a provider-specific finish reason to the OTel enum value."""
+    if raw is None:
+        return None
+    return _FINISH_REASON_MAP.get(raw, raw)
+
+
+def _parse_arguments(args):
+    """Best-effort parse of tool-call arguments to a dict (object) or None.
+
+    Per OTel spec, arguments must be objects, never raw JSON strings.
+    Falls back to ``{"_raw": args}`` when the string is not valid JSON
+    or parses to a non-dict type.
     """
-    Extract prompt/input data from messages and set them as span attributes.
+    if args is None:
+        return None
+    if isinstance(args, dict):
+        return args
+    if isinstance(args, str):
+        if not args.strip():
+            return None
+        try:
+            parsed = json.loads(args)
+            if isinstance(parsed, dict):
+                return parsed
+            # Parsed OK but not a dict (e.g. array, scalar) – wrap
+            return {"_raw": args}
+        except (json.JSONDecodeError, ValueError):
+            return {"_raw": args}
+    return {"_raw": str(args)}
+
+
+def _normalize_tool_call(tool_call):
+    """Normalize a tool call (object or dict) into a flat {id, name, arguments} dict."""
+    if isinstance(tool_call, dict):
+        tc = dict(tool_call)
+        if "function" in tc:
+            function = tc["function"]
+            if isinstance(function, dict):
+                tc = {
+                    "id": tc.get("id"),
+                    "name": function.get("name"),
+                    "arguments": function.get("arguments"),
+                }
+            else:
+                tc = {
+                    "id": tc.get("id"),
+                    "name": getattr(function, "name", None),
+                    "arguments": getattr(function, "arguments", None),
+                }
+        return tc
+    # Object with attributes
+    tc_dict: dict = {}
+    if hasattr(tool_call, "id"):
+        tc_dict["id"] = tool_call.id
+    if hasattr(tool_call, "function"):
+        func = tool_call.function
+        if hasattr(func, "name"):
+            tc_dict["name"] = func.name
+        if hasattr(func, "arguments"):
+            tc_dict["arguments"] = func.arguments
+    elif hasattr(tool_call, "name"):
+        tc_dict["name"] = tool_call.name
+    if hasattr(tool_call, "arguments") and "arguments" not in tc_dict:
+        tc_dict["arguments"] = tool_call.arguments
+    return tc_dict
+
+
+_MESSAGE_ATTRS = (
+    "role", "content", "tool_call_id", "tool_calls",
+    "type", "name", "arguments", "call_id", "output",
+)
+
+
+def _msg_to_dict(message) -> dict:
+    """Normalize a message (dict or SDK object) into a plain dict."""
+    if isinstance(message, dict):
+        return message
+    return {
+        attr: getattr(message, attr)
+        for attr in _MESSAGE_ATTRS
+        if hasattr(message, attr)
+    }
+
+
+def _stringify_content(content) -> str:
+    """Coerce non-string content to a string for simple text parts."""
+    if isinstance(content, str):
+        return content
+    return json.dumps(content)
+
+
+def _reasoning_text(s):
+    """Extract text from a reasoning summary item (object or dict)."""
+    if isinstance(s, dict):
+        return s.get("text", "")
+    return getattr(s, "text", str(s))
+
+
+def _content_block_to_part(block) -> dict:
+    """Convert a single multimodal content block to an OTel part.
+
+    Handles dict blocks (OpenAI chat format) and SDK objects.
+    """
+    if isinstance(block, str):
+        return {"type": "text", "content": block}
+
+    if isinstance(block, dict):
+        return _dict_block_to_part(block)
+
+    return _object_block_to_part(block)
+
+
+def _url_to_part(url: str) -> dict:
+    """Dispatch an image URL to UriPart or BlobPart depending on scheme.
+
+    data: URLs carry inline base64 content and must be BlobPart per OTel spec.
+    All other URLs (https:, http:, gs:, …) become UriPart.
+    """
+    if url.startswith("data:"):
+        # data:<mime>;base64,<content>  or  data:<mime>,<content>
+        header, _, content = url.partition(",")
+        mime: str | None = None
+        if header.startswith("data:"):
+            mime_part = header[5:]  # strip "data:"
+            mime = mime_part.split(";")[0] or None
+        part: dict = {"type": "blob", "modality": "image", "content": content}
+        if mime:
+            part["mime_type"] = mime
+        return part
+    return {"type": "uri", "modality": "image", "uri": url}
+
+
+_AUDIO_MIME: dict = {
+    "mp3": "audio/mpeg",
+    "wav": "audio/wav",
+    "ogg": "audio/ogg",
+    "flac": "audio/flac",
+    "webm": "audio/webm",
+    "m4a": "audio/mp4",
+}
+
+
+def _audio_blob_part(data: str, fmt: str | None) -> dict:
+    """Build a BlobPart for audio data, including mime_type when format is known."""
+    part: dict = {"type": "blob", "modality": "audio", "content": data}
+    if fmt:
+        mime = _AUDIO_MIME.get(fmt) or f"audio/{fmt}"
+        part["mime_type"] = mime
+    return part
+
+
+def _dict_block_to_part(block: dict) -> dict:
+    """Map a dict-based content block (OpenAI format) to an OTel part.
+
+    Spec mapping (openllmetry-semconv-review.md §1 / Part Types):
+      OpenAI image_url data URL → OTel BlobPart {type:blob, modality:image, ...}
+      OpenAI image_url https URL → OTel UriPart  {type:uri, modality:image, uri:...}
+      OpenAI input_audio → OTel BlobPart {type:blob, modality:audio, mime_type:...}
+    """
+    btype = block.get("type", "text")
+    if btype in ("text", "input_text", "output_text"):
+        return {"type": "text", "content": block.get("text", "")}
+    if btype == "image_url":
+        url_info = block.get("image_url", {})
+        url = (
+            url_info.get("url", "")
+            if isinstance(url_info, dict)
+            else str(url_info)
+        )
+        return _url_to_part(url)
+    if btype == "input_audio":
+        audio_info = block.get("input_audio", {})
+        if isinstance(audio_info, dict):
+            data = audio_info.get("data", "")
+            fmt = audio_info.get("format")
+        else:
+            data = str(audio_info)
+            fmt = None
+        return _audio_blob_part(data, fmt)
+    return {"type": btype, **{k: v for k, v in block.items() if k != "type"}}
+
+
+def _object_block_to_part(block) -> dict:
+    """Map an SDK-object content block via getattr."""
+    btype = getattr(block, "type", "text")
+    if btype in ("text", "input_text", "output_text"):
+        return {
+            "type": "text",
+            "content": getattr(block, "text", str(block)),
+        }
+    if btype == "image_url":
+        url_obj = getattr(block, "image_url", None)
+        url = getattr(url_obj, "url", str(url_obj)) if url_obj else ""
+        return _url_to_part(url)
+    if btype == "input_audio":
+        audio_obj = getattr(block, "input_audio", None)
+        data = getattr(audio_obj, "data", str(audio_obj)) if audio_obj else ""
+        fmt = getattr(audio_obj, "format", None) if audio_obj else None
+        return _audio_blob_part(data, fmt)
+    return {"type": btype, "content": str(block)}
+
+
+def _content_to_parts(content) -> list:
+    """Convert message content (str | list | scalar) into a list of OTel parts."""
+    if isinstance(content, str):
+        return [{"type": "text", "content": content}]
+    if isinstance(content, list):
+        return [_content_block_to_part(block) for block in content]
+    return [{"type": "text", "content": str(content)}]
+
+
+def _tool_call_to_part(tool_call) -> dict:
+    """Convert a single tool call to an OTel tool_call part."""
+    tc = _normalize_tool_call(tool_call)
+    part: dict = {"type": "tool_call"}
+    if tc.get("id"):
+        part["id"] = tc["id"]
+    # name is required by OTel ToolCallRequestPart; fall back to "" rather than omit
+    part["name"] = tc.get("name") or ""
+    if tc.get("arguments") is not None:
+        part["arguments"] = _parse_arguments(tc["arguments"])
+    return part
+
+
+def _build_tool_response_part(call_id, content) -> dict:
+    """Build a tool_call_response part from an id and optional content."""
+    part: dict = {"type": "tool_call_response"}
+    if call_id is not None:
+        part["id"] = call_id
+    if content is None:
+        part["response"] = ""
+    elif isinstance(content, (dict, list)):
+        part["response"] = content
+    else:
+        part["response"] = _stringify_content(content)
+    return part
+
+
+def _convert_chat_message(msg: dict):
+    """Convert a role-based chat message to (role, parts) or None."""
+    role = msg["role"]
+    content = msg.get("content")
+    tool_call_id = msg.get("tool_call_id")
+    tool_calls = msg.get("tool_calls")
+
+    if role == "tool" and tool_call_id:
+        return role, [_build_tool_response_part(tool_call_id, content)]
+
+    parts = []
+    if tool_calls:
+        if content is not None:
+            if isinstance(content, list):
+                parts.extend(_content_to_parts(content))
+            else:
+                text = _stringify_content(content)
+                if text:
+                    parts.append({"type": "text", "content": text})
+        parts.extend(_tool_call_to_part(tc) for tc in tool_calls)
+    elif content is not None:
+        parts = _content_to_parts(content)
+
+    return role, parts
+
+
+def _convert_agents_sdk_message(msg: dict):
+    """Convert an Agents SDK type-based message to (role, parts) or None."""
+    msg_type = msg["type"]
+    if msg_type == "function_call":
+        part: dict = {"type": "tool_call", "name": msg.get("name", "")}
+        call_id = msg.get("id")
+        if call_id:
+            part["id"] = call_id
+        if msg.get("arguments") is not None:
+            part["arguments"] = _parse_arguments(msg["arguments"])
+        return "assistant", [part]
+
+    if msg_type == "function_call_output":
+        part = _build_tool_response_part(
+            msg.get("call_id"),
+            msg.get("output"),
+        )
+        return "tool", [part]
+
+    return None, []
+
+
+def _extract_prompt_attributes(otel_span, input_data, trace_content: bool):
+    """Set ``gen_ai.input.messages`` using the OTel parts-based schema.
 
     Handles both OpenAI chat format (role/content) and Agents SDK format
     (type/function_call/function_call_output).
+
+    Only emitted when *trace_content* is True (opt-in content attribute).
     """
-    if not input_data:
+    if not input_data or not trace_content:
         return
 
-    for i, message in enumerate(input_data):
-        prefix = f"{GenAIAttributes.GEN_AI_PROMPT}.{i}"
-
-        # Convert message to dict for unified handling
-        if isinstance(message, dict):
-            msg = message
-        else:
-            # Convert object to dict
-            msg = {}
-            for attr in [
-                "role",
-                "content",
-                "tool_call_id",
-                "tool_calls",
-                "type",
-                "name",
-                "arguments",
-                "call_id",
-                "output",
-            ]:
-                if hasattr(message, attr):
-                    msg[attr] = getattr(message, attr)
-
-        # Determine message format and extract data
-        role = None
-        content = None
-        tool_call_id = None
-        tool_calls = None
+    messages = []
+    for message in input_data:
+        msg = _msg_to_dict(message)
 
         if "role" in msg:
-            # Standard OpenAI chat format
-            role = msg["role"]
-            content = msg.get("content")
-            tool_call_id = msg.get("tool_call_id")
-            tool_calls = msg.get("tool_calls")
+            role, parts = _convert_chat_message(msg)
         elif "type" in msg:
-            # OpenAI Agents SDK format
-            msg_type = msg["type"]
-            if msg_type == "function_call":
-                # Tool calls are assistant messages
-                role = "assistant"
-                # Create tool_calls structure matching OpenAI SDK format
-                tool_calls = [
-                    {
-                        "id": msg.get("id", ""),
-                        "name": msg.get("name", ""),
-                    } | (
-                        {"arguments": msg.get("arguments", "")}
-                        if trace_content else {}
-                    )
-                ]
-            elif (
-                msg_type == "function_call_output"
-                and trace_content
-            ):
-                # Tool outputs are tool messages
-                role = "tool"
-                content = msg.get("output")
-                tool_call_id = msg.get("call_id")
-
-        # Set role attribute
-        if role:
-            otel_span.set_attribute(f"{prefix}.role", role)
-
-        # Set content attribute
-        if content is not None and trace_content:
-            if not isinstance(content, str):
-                content = json.dumps(content)
-            otel_span.set_attribute(f"{prefix}.content", content)
-
-        # Set tool_call_id for tool result messages
-        if tool_call_id:
-            otel_span.set_attribute(f"{prefix}.tool_call_id", tool_call_id)
-
-        # Set tool_calls for assistant messages with tool calls
-        if tool_calls:
-            for j, tool_call in enumerate(tool_calls):
-                # Convert to dict if needed
-                if not isinstance(tool_call, dict):
-                    tc_dict = {}
-                    if hasattr(tool_call, "id"):
-                        tc_dict["id"] = tool_call.id
-                    if hasattr(tool_call, "function"):
-                        func = tool_call.function
-                        if hasattr(func, "name"):
-                            tc_dict["name"] = func.name
-                        if hasattr(func, "arguments"):
-                            tc_dict["arguments"] = func.arguments
-                    elif hasattr(tool_call, "name"):
-                        tc_dict["name"] = tool_call.name
-                    if hasattr(tool_call, "arguments"):
-                        tc_dict["arguments"] = tool_call.arguments
-                    tool_call = tc_dict
-
-                # Extract function details if nested (standard OpenAI format)
-                if "function" in tool_call:
-                    function = tool_call["function"]
-                    tool_call = {
-                        "id": tool_call.get("id"),
-                        "name": function.get("name"),
-                        "arguments": function.get("arguments"),
-                    }
-
-                # Set tool call attributes
-                if tool_call.get("id"):
-                    otel_span.set_attribute(
-                        f"{prefix}.tool_calls.{j}.id", tool_call["id"]
-                    )
-                if tool_call.get("name"):
-                    otel_span.set_attribute(
-                        f"{prefix}.tool_calls.{j}.name", tool_call["name"]
-                    )
-                if tool_call.get("arguments") and trace_content:
-                    args = tool_call["arguments"]
-                    if not isinstance(args, str):
-                        args = json.dumps(args)
-                    otel_span.set_attribute(f"{prefix}.tool_calls.{j}.arguments", args)
+            role, parts = _convert_agents_sdk_message(msg)
+        else:
+            continue
+
+        if role and parts:
+            messages.append({"role": role, "parts": parts})
+
+    if messages:
+        otel_span.set_attribute(
+            GenAIAttributes.GEN_AI_INPUT_MESSAGES, json.dumps(messages)
+        )
 
 
 def _extract_response_attributes(otel_span, response, trace_content: bool):
     """
     Extract model settings, completions, and usage from a response object
-    and set them as span attributes.
+    and set them as span attributes using the OTel parts-based schema.
 
     Returns a dict of model_settings for potential use by parent spans.
     """
@@ -208,76 +417,146 @@ def _extract_response_attributes(otel_span, response, trace_content: bool):
 
     if hasattr(response, "model") and response.model:
         model_settings["model"] = response.model
-        otel_span.set_attribute(GenAIAttributes.GEN_AI_REQUEST_MODEL, response.model)
+        otel_span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_MODEL, response.model)
+
+    if hasattr(response, "id") and response.id:
+        otel_span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_ID, response.id)
 
     if (
         hasattr(response, "frequency_penalty")
         and response.frequency_penalty is not None
     ):
         model_settings["frequency_penalty"] = response.frequency_penalty
+        otel_span.set_attribute(
+            GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+            response.frequency_penalty,
+        )
 
-    # Extract completions from response.output
+    # Map finish reason (top-level fallback)
+    raw_finish_reason = getattr(response, "finish_reason", None)
+    if raw_finish_reason is None:
+        raw_finish_reason = getattr(response, "status", None)
+    mapped_finish_reason = _map_finish_reason(raw_finish_reason)
+
+    # Extract completions from response.output.
+    # gen_ai.response.finish_reasons is Recommended metadata (not opt-in content),
+    # so we always iterate output items to collect per-item finish reasons, even
+    # when trace_content=False.  Message content is only serialised when trace_content
+    # is True.
     if hasattr(response, "output") and response.output:
-        for i, output in enumerate(response.output):
-            if hasattr(output, "content") and output.content and trace_content:
-                # Text message with content array (ResponseOutputMessage)
-                content_text = ""
-                for content_item in output.content:
-                    if hasattr(content_item, "text"):
-                        content_text += content_item.text
-
-                if content_text:
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", content_text
-                    )
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role",
-                        getattr(output, "role", "assistant"),
-                    )
+        output_messages = []
+        per_item_reasons: list = []
 
-            elif hasattr(output, "name"):
-                # Function/tool call (ResponseFunctionToolCall)
-                tool_name = getattr(output, "name", "unknown_tool")
-                arguments = getattr(output, "arguments", "{}")
-                tool_call_id = getattr(output, "call_id", f"call_{i}")
+        for output in response.output:
+            item_type = getattr(output, "type", None)
+
+            if item_type == "function_call" or (
+                item_type is None and getattr(output, "call_id", None)
+            ):
+                # Function/tool call always contributes "tool_call" regardless of
+                # the response-level finish_reason.
+                item_reason = _map_finish_reason("tool_calls")
+                per_item_reasons.append(item_reason)
 
-                otel_span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role", "assistant"
-                )
-                otel_span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.finish_reason",
-                    "tool_calls",
-                )
-                otel_span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.tool_calls.0.name",
-                    tool_name,
-                )
-                otel_span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.tool_calls.0.id",
-                    tool_call_id,
-                )
                 if trace_content:
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.tool_calls.0.arguments",
-                        arguments,
-                    )
+                    tool_name = getattr(output, "name", "unknown_tool")
+                    tool_call_id = getattr(output, "call_id", None)
+                    part: dict = {"type": "tool_call", "name": tool_name}
+                    if tool_call_id:
+                        part["id"] = tool_call_id
+                    raw_args = getattr(output, "arguments", None)
+                    if raw_args is not None:
+                        part["arguments"] = _parse_arguments(raw_args)
+                    output_messages.append({
+                        "role": "assistant",
+                        "parts": [part],
+                        "finish_reason": item_reason,
+                    })
+
+            elif hasattr(output, "content") and output.content:
+                # Text message with content array (ResponseOutputMessage)
+                item_reason = mapped_finish_reason or ""
+                per_item_reasons.append(item_reason)
 
-            elif hasattr(output, "text") and trace_content:
+                if trace_content:
+                    parts = []
+                    for content_item in output.content:
+                        ci_type = getattr(content_item, "type", None)
+                        # Check known types first; use hasattr(.text) only as last resort
+                        # to avoid misclassifying reasoning/refusal items that also carry .text
+                        if ci_type == "output_text":
+                            parts.append({
+                                "type": "text",
+                                "content": getattr(content_item, "text", ""),
+                            })
+                        elif ci_type == "refusal":
+                            parts.append({
+                                "type": "refusal",
+                                "content": getattr(content_item, "refusal", ""),
+                            })
+                        elif ci_type == "reasoning":
+                            summary = getattr(content_item, "summary", None)
+                            text = ""
+                            if isinstance(summary, list):
+                                text = " ".join(_reasoning_text(s) for s in summary)
+                            elif summary:
+                                text = str(summary)
+                            parts.append({"type": "reasoning", "content": text})
+                        elif ci_type is not None:
+                            parts.append({
+                                "type": ci_type,
+                                "content": str(content_item),
+                            })
+                        elif hasattr(content_item, "text") and content_item.text:
+                            parts.append({
+                                "type": "text",
+                                "content": content_item.text,
+                            })
+                    output_messages.append({
+                        "role": getattr(output, "role", "assistant"),
+                        "parts": parts,
+                        "finish_reason": item_reason,
+                    })
+
+            elif hasattr(output, "text"):
                 # Direct text content
-                otel_span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", output.text
-                )
-                otel_span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role",
-                    getattr(output, "role", "assistant"),
-                )
+                item_reason = mapped_finish_reason or ""
+                per_item_reasons.append(item_reason)
 
-            # Add finish reason if available (for non-tool-call cases)
-            if hasattr(response, "finish_reason") and not hasattr(output, "name"):
-                otel_span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.finish_reason",
-                    response.finish_reason,
-                )
+                if trace_content:
+                    parts = []
+                    if output.text:
+                        parts.append({"type": "text", "content": output.text})
+                    output_messages.append({
+                        "role": getattr(output, "role", "assistant"),
+                        "parts": parts,
+                        "finish_reason": item_reason,
+                    })
+
+        if trace_content and output_messages:
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages)
+            )
+
+        # Set top-level finish_reasons from per-item discovery; fall back to the
+        # response-level reason if no output items provided reasons.
+        meaningful_reasons = list(dict.fromkeys(r for r in per_item_reasons if r))
+        if meaningful_reasons:
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS,
+                tuple(meaningful_reasons),
+            )
+        elif mapped_finish_reason is not None:
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS,
+                (mapped_finish_reason,),
+            )
+    else:
+        if mapped_finish_reason is not None:
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS,
+                (mapped_finish_reason,),
+            )
 
     # Extract usage data
     if hasattr(response, "usage") and response.usage:
@@ -304,12 +583,45 @@ def _extract_response_attributes(otel_span, response, trace_content: bool):
 
         if hasattr(usage, "total_tokens") and usage.total_tokens is not None:
             otel_span.set_attribute(
-                SpanAttributes.LLM_USAGE_TOTAL_TOKENS, usage.total_tokens
+                SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens
             )
 
     return model_settings
 
 
+def _extract_tool_definitions(tools):
+    """Extract tool/function specs into a JSON-serializable list.
+
+    Handles both function-wrapped tools (tool.function.name) and
+    direct function tools (tool.name).
+    """
+    if not tools:
+        return []
+    tool_defs = []
+    for tool in tools:
+        if hasattr(tool, "function"):
+            function = tool.function
+            func_def = {
+                "name": getattr(function, "name", ""),
+                "description": getattr(function, "description", ""),
+            }
+            if hasattr(function, "parameters"):
+                func_def["parameters"] = function.parameters
+            tool_def = {
+                "type": getattr(tool, "type", "function"),
+                "function": func_def,
+            }
+            tool_defs.append(tool_def)
+        elif hasattr(tool, "name"):
+            func_def = {"name": tool.name}
+            if hasattr(tool, "description"):
+                func_def["description"] = tool.description
+            if hasattr(tool, "parameters"):
+                func_def["parameters"] = tool.parameters
+            tool_defs.append({"type": "function", "function": func_def})
+    return tool_defs
+
+
 class OpenTelemetryTracingProcessor(TracingProcessor):
     """
     A tracing processor that creates OpenTelemetry spans for OpenAI Agents.
@@ -323,7 +635,6 @@ def __init__(self, tracer: Tracer):
         self._root_spans: Dict[str, Any] = {}  # trace_id -> root span
         self._otel_spans: Dict[str, Any] = {}  # agents span -> otel span
         self._span_contexts: Dict[str, Any] = {}  # agents span -> context token
-        self._last_model_settings: Dict[str, Any] = {}
         self._reverse_handoffs_dict: OrderedDict[str, str] = OrderedDict()
 
     @dont_throw
@@ -332,10 +643,10 @@ def on_trace_start(self, trace):
         # Create a root "Agent Workflow" span for the entire trace
         workflow_span = self.tracer.start_span(
             "Agent Workflow",
-            kind=SpanKind.CLIENT,
+            kind=SpanKind.INTERNAL,
             attributes={
                 SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.WORKFLOW.value,
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
                 SpanAttributes.TRACELOOP_WORKFLOW_NAME: "Agent Workflow",
             },
         )
@@ -375,177 +686,30 @@ def on_span_start(self, span):
         otel_span = None
 
         if isinstance(span_data, AgentSpanData):
-            agent_name = getattr(span_data, "name", None) or "unknown_agent"
-
-            if set_agent_name is not None:
-                set_agent_name(agent_name)
-
-            handoff_parent = None
-            trace_id = getattr(span, "trace_id", None)
-            if trace_id:
-                handoff_key = f"{agent_name}:{trace_id}"
-                if parent_agent_name := self._reverse_handoffs_dict.pop(
-                    handoff_key, None
-                ):
-                    handoff_parent = parent_agent_name
-
-            attributes = {
-                SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.AGENT.value,
-                GenAIAttributes.GEN_AI_AGENT_NAME: agent_name,
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
-            }
-
-            if handoff_parent:
-                attributes["gen_ai.agent.handoff_parent"] = handoff_parent
-
-            if hasattr(span_data, "handoffs") and span_data.handoffs:
-                for i, handoff_agent in enumerate(span_data.handoffs):
-                    handoff_info = {
-                        "name": getattr(handoff_agent, "name", "unknown"),
-                        "instructions": getattr(
-                            handoff_agent, "instructions", "No instructions"
-                        ),
-                    }
-                    attributes[f"openai.agent.handoff{i}"] = json.dumps(handoff_info)
-
-            otel_span = self.tracer.start_span(
-                f"{agent_name}.agent",
-                kind=SpanKind.CLIENT,
-                context=parent_context,
-                attributes=attributes,
-            )
+            otel_span = self._start_agent_span(span_data, parent_context, trace_id)
 
         elif isinstance(span_data, HandoffSpanData):
-            from_agent = getattr(span_data, "from_agent", None)
-            to_agent = getattr(span_data, "to_agent", None)
-
-            from_agent = from_agent or "unknown"
-
-            to_agent = to_agent or "unknown"
-
-            trace_id = getattr(span, "trace_id", None)
-            if to_agent and to_agent != "unknown" and trace_id:
-                handoff_key = f"{to_agent}:{trace_id}"
-                self._reverse_handoffs_dict[handoff_key] = from_agent
-
-                if len(self._reverse_handoffs_dict) > 1000:
-                    self._reverse_handoffs_dict.popitem(last=False)
-
-            from_agent_span = self._find_agent_span(from_agent)
-            if from_agent_span:
-                parent_context = set_span_in_context(from_agent_span)
-
-            handoff_attributes = {
-                SpanAttributes.TRACELOOP_SPAN_KIND: "handoff",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
-            }
-
-            if from_agent and from_agent != "unknown":
-                handoff_attributes[GEN_AI_HANDOFF_FROM_AGENT] = from_agent
-                handoff_attributes[GenAIAttributes.GEN_AI_AGENT_NAME] = from_agent
-            if to_agent and to_agent != "unknown":
-                handoff_attributes[GEN_AI_HANDOFF_TO_AGENT] = to_agent
-
-            otel_span = self.tracer.start_span(
-                f"{from_agent} → {to_agent}.handoff",
-                kind=SpanKind.INTERNAL,
-                context=parent_context,
-                attributes=handoff_attributes,
-            )
+            otel_span = self._start_handoff_span(span_data, parent_context, trace_id)
 
         elif isinstance(span_data, FunctionSpanData):
-            tool_name = getattr(span_data, "name", None) or "unknown_tool"
-
-            current_agent_span = self._find_current_agent_span()
-            if current_agent_span:
-                parent_context = set_span_in_context(current_agent_span)
-
-            tool_attributes = {
-                SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.TOOL.value,
-                GenAIAttributes.GEN_AI_TOOL_NAME: tool_name,
-                GenAIAttributes.GEN_AI_TOOL_TYPE: "function",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.tool.name": tool_name,
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.tool.type": "function",
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.tool.strict_json_schema": True,
-            }
+            agent_ctx = self._resolve_agent_parent(parent_context)
+            otel_span = self._start_function_span(span_data, agent_ctx)
 
-            if hasattr(span_data, "description") and span_data.description:
-                # Only use description if it's not a generic class description
-                desc = span_data.description
-                if desc and not desc.startswith("Represents a Function Span"):
-                    tool_attributes[GenAIAttributes.GEN_AI_TOOL_DESCRIPTION] = desc
-
-            otel_span = self.tracer.start_span(
-                f"{tool_name}.tool",
-                kind=SpanKind.INTERNAL,
-                context=parent_context,
-                attributes=tool_attributes,
-            )
-
-        elif type(span_data).__name__ == "ResponseSpanData":
-            current_agent_span = self._find_current_agent_span()
-            if current_agent_span:
-                parent_context = set_span_in_context(current_agent_span)
-
-            response_attributes = {
-                SpanAttributes.LLM_REQUEST_TYPE: "response",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
-                GenAIAttributes.GEN_AI_OPERATION_NAME: "response",
-            }
-
-            otel_span = self.tracer.start_span(
-                "openai.response",
-                kind=SpanKind.CLIENT,
-                context=parent_context,
-                attributes=response_attributes,
-                start_time=time.time_ns(),
-            )
-
-        elif isinstance(span_data, GenerationSpanData):
-            current_agent_span = self._find_current_agent_span()
-            if current_agent_span:
-                parent_context = set_span_in_context(current_agent_span)
-
-            response_attributes = {
-                SpanAttributes.LLM_REQUEST_TYPE: "chat",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
-                GenAIAttributes.GEN_AI_OPERATION_NAME: "chat",
-            }
-
-            otel_span = self.tracer.start_span(
-                "openai.response",
-                kind=SpanKind.CLIENT,
-                context=parent_context,
-                attributes=response_attributes,
-                start_time=time.time_ns(),
-            )
+        elif (
+            type(span_data).__name__ == "ResponseSpanData"
+            or isinstance(span_data, GenerationSpanData)
+        ):
+            agent_ctx = self._resolve_agent_parent(parent_context)
+            otel_span = self._start_generation_span(agent_ctx, span_data)
 
         elif (
             _has_realtime_spans
             and SpeechSpanData
             and isinstance(span_data, SpeechSpanData)
         ):
-            current_agent_span = self._find_current_agent_span()
-            if current_agent_span:
-                parent_context = set_span_in_context(current_agent_span)
-
-            speech_attributes = {
-                SpanAttributes.LLM_REQUEST_TYPE: "realtime",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
-                GenAIAttributes.GEN_AI_OPERATION_NAME: "speech",
-            }
-
-            model = getattr(span_data, "model", None)
-            if model:
-                speech_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model
-
-            otel_span = self.tracer.start_span(
-                "openai.realtime.speech",
-                kind=SpanKind.CLIENT,
-                context=parent_context,
-                attributes=speech_attributes,
-                start_time=time.time_ns(),
+            agent_ctx = self._resolve_agent_parent(parent_context)
+            otel_span = self._start_realtime_span(
+                span_data, agent_ctx, "openai.realtime.speech", "speech",
             )
 
         elif (
@@ -553,26 +717,9 @@ def on_span_start(self, span):
             and TranscriptionSpanData
             and isinstance(span_data, TranscriptionSpanData)
         ):
-            current_agent_span = self._find_current_agent_span()
-            if current_agent_span:
-                parent_context = set_span_in_context(current_agent_span)
-
-            transcription_attributes = {
-                SpanAttributes.LLM_REQUEST_TYPE: "realtime",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
-                GenAIAttributes.GEN_AI_OPERATION_NAME: "transcription",
-            }
-
-            model = getattr(span_data, "model", None)
-            if model:
-                transcription_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model
-
-            otel_span = self.tracer.start_span(
-                "openai.realtime.transcription",
-                kind=SpanKind.CLIENT,
-                context=parent_context,
-                attributes=transcription_attributes,
-                start_time=time.time_ns(),
+            agent_ctx = self._resolve_agent_parent(parent_context)
+            otel_span = self._start_realtime_span(
+                span_data, agent_ctx, "openai.realtime.transcription", "transcription",
             )
 
         elif (
@@ -580,22 +727,9 @@ def on_span_start(self, span):
             and SpeechGroupSpanData
             and isinstance(span_data, SpeechGroupSpanData)
         ):
-            current_agent_span = self._find_current_agent_span()
-            if current_agent_span:
-                parent_context = set_span_in_context(current_agent_span)
-
-            speech_group_attributes = {
-                SpanAttributes.LLM_REQUEST_TYPE: "realtime",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
-                GenAIAttributes.GEN_AI_OPERATION_NAME: "speech_group",
-            }
-
-            otel_span = self.tracer.start_span(
-                "openai.realtime.speech_group",
-                kind=SpanKind.CLIENT,
-                context=parent_context,
-                attributes=speech_group_attributes,
-                start_time=time.time_ns(),
+            agent_ctx = self._resolve_agent_parent(parent_context)
+            otel_span = self._start_realtime_span(
+                span_data, agent_ctx, "openai.realtime.speech_group", "speech_group",
             )
 
         if otel_span:
@@ -607,7 +741,7 @@ def on_span_start(self, span):
     @dont_throw
     def on_span_end(self, span):
         """Called when a span ends - finish OpenTelemetry span."""
-        from agents import GenerationSpanData
+        from agents import FunctionSpanData, GenerationSpanData
 
         if not span or not hasattr(span, "span_data"):
             return
@@ -620,152 +754,18 @@ def on_span_end(self, span):
                 type(span_data).__name__ == "ResponseSpanData"
                 or isinstance(span_data, GenerationSpanData)
             ):
-                # Extract prompt data from input
-                input_data = getattr(span_data, "input", [])
-                _extract_prompt_attributes(otel_span, input_data, trace_content)
-
-                # Add function/tool specifications to the request using OpenAI semantic conventions
-                response = getattr(span_data, "response", None)
-                if (
-                    response
-                    and hasattr(response, "tools")
-                    and response.tools
-                ):
-                    # Extract tool specifications
-                    for i, tool in enumerate(response.tools):
-                        if hasattr(tool, "function"):
-                            function = tool.function
-                            otel_span.set_attribute(
-                                f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.name",
-                                getattr(function, "name", ""),
-                            )
-                            otel_span.set_attribute(
-                                f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.description",
-                                getattr(function, "description", ""),
-                            )
-                            if hasattr(function, "parameters"):
-                                otel_span.set_attribute(
-                                    f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.parameters",
-                                    json.dumps(function.parameters),
-                                )
-                        elif hasattr(tool, "name"):
-                            # Direct function format
-                            otel_span.set_attribute(
-                                f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.name",
-                                tool.name,
-                            )
-                            if hasattr(tool, "description"):
-                                otel_span.set_attribute(
-                                    f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.description",
-                                    tool.description,
-                                )
-                            if hasattr(tool, "parameters"):
-                                otel_span.set_attribute(
-                                    f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.parameters",
-                                    json.dumps(tool.parameters),
-                                )
-
-                if response:
-                    model_settings = _extract_response_attributes(otel_span, response, trace_content)
-                    self._last_model_settings = model_settings
-
-            # Legacy fallback for other span types
-            elif span_data:
-                input_data = getattr(span_data, "input", [])
-                _extract_prompt_attributes(otel_span, input_data, trace_content)
-
-                response = getattr(span_data, "response", None)
-                if response:
-                    model_settings = _extract_response_attributes(otel_span, response, trace_content)
-                    self._last_model_settings = model_settings
-
-            elif (
-                _has_realtime_spans
-                and SpeechSpanData
-                and isinstance(span_data, SpeechSpanData)
-                and trace_content
-            ):
-                input_text = getattr(span_data, "input", None)
-                if input_text:
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", input_text
-                    )
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", "user"
-                    )
-
-                output_audio = getattr(span_data, "output", None)
-                if output_audio:
-                    if not isinstance(output_audio, (bytes, bytearray)):
-                        otel_span.set_attribute(
-                            f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content",
-                            str(output_audio),
-                        )
-
-            elif (
-                _has_realtime_spans
-                and TranscriptionSpanData
-                and isinstance(span_data, TranscriptionSpanData)
-                and trace_content
-            ):
-                input_audio = getattr(span_data, "input", None)
-                if input_audio:
-                    if not isinstance(input_audio, (bytes, bytearray)):
-                        otel_span.set_attribute(
-                            f"{GenAIAttributes.GEN_AI_PROMPT}.0.content",
-                            str(input_audio),
-                        )
-
-                output_text = getattr(span_data, "output", None)
-                if output_text:
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", output_text
-                    )
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", "assistant"
-                    )
+                self._end_generation_span(otel_span, span_data, trace_content)
 
-            elif (
-                _has_realtime_spans
-                and SpeechGroupSpanData
-                and isinstance(span_data, SpeechGroupSpanData)
-                and trace_content
-            ):
-                input_text = getattr(span_data, "input", None)
-                if input_text:
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", input_text
-                    )
-                    otel_span.set_attribute(
-                        f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", "user"
-                    )
+            elif span_data and isinstance(span_data, FunctionSpanData):
+                self._end_function_span(otel_span, span_data, trace_content)
 
-            elif span_data and type(span_data).__name__ == "AgentSpanData":
-                # For agent spans, add the model settings we stored from the response span
-                if hasattr(self, "_last_model_settings") and self._last_model_settings:
-                    for key, value in self._last_model_settings.items():
-                        if key == "temperature":
-                            otel_span.set_attribute(
-                                GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE, value
-                            )
-                        elif key == "max_tokens":
-                            otel_span.set_attribute(
-                                GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS, value
-                            )
-                        elif key == "top_p":
-                            otel_span.set_attribute(
-                                GenAIAttributes.GEN_AI_REQUEST_TOP_P, value
-                            )
-                        elif key == "model":
-                            otel_span.set_attribute(
-                                GenAIAttributes.GEN_AI_REQUEST_MODEL, value
-                            )
-                        elif key == "frequency_penalty":
-                            otel_span.set_attribute(
-                                "openai.agent.model.frequency_penalty", value
-                            )
-                        # Note: prompt_attributes, completion_attributes, and usage tokens are now
-                        # on response spans only
+            elif trace_content and span_data and _has_realtime_spans:
+                if SpeechSpanData and isinstance(span_data, SpeechSpanData):
+                    self._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+                elif TranscriptionSpanData and isinstance(span_data, TranscriptionSpanData):
+                    self._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+                elif SpeechGroupSpanData and isinstance(span_data, SpeechGroupSpanData):
+                    self._set_realtime_io_attributes(otel_span, span_data, has_output=False)
 
             if hasattr(span, "error") and span.error:
                 otel_span.set_status(Status(StatusCode.ERROR, str(span.error)))
@@ -778,6 +778,237 @@ def on_span_end(self, span):
                 context.detach(self._span_contexts[span])
                 del self._span_contexts[span]
 
+    # ------------------------------------------------------------------
+    # on_span_start handlers (extracted from the former if-elif chain)
+    # ------------------------------------------------------------------
+
+    def _resolve_agent_parent(self, fallback_context):
+        """Resolve parent context, preferring the current agent span."""
+        current = self._find_current_agent_span()
+        if current:
+            return set_span_in_context(current)
+        return fallback_context
+
+    def _start_agent_span(self, span_data, parent_context, trace_id):
+        """Create an OTel span for an AgentSpanData."""
+        agent_name = getattr(span_data, "name", None) or "unknown_agent"
+
+        if set_agent_name is not None:
+            set_agent_name(agent_name)
+
+        handoff_parent = None
+        if trace_id:
+            handoff_key = f"{agent_name}:{trace_id}"
+            if parent_agent_name := self._reverse_handoffs_dict.pop(
+                handoff_key, None
+            ):
+                handoff_parent = parent_agent_name
+
+        attributes = {
+            SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.AGENT.value,
+            GenAIAttributes.GEN_AI_AGENT_NAME: agent_name,
+            GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+            GenAIAttributes.GEN_AI_OPERATION_NAME: "invoke_agent",
+        }
+
+        if handoff_parent:
+            attributes[GEN_AI_HANDOFF_PARENT_AGENT] = handoff_parent
+
+        if hasattr(span_data, "handoffs") and span_data.handoffs:
+            handoffs_list = []
+            trace_content = should_send_prompts()
+            for handoff_agent in span_data.handoffs:
+                handoff = {"name": getattr(handoff_agent, "name", "unknown")}
+                if trace_content:
+                    handoff["instructions"] = getattr(
+                        handoff_agent, "instructions", "No instructions"
+                    )
+                handoffs_list.append(handoff)
+            attributes[OPENAI_AGENT_HANDOFFS] = json.dumps(handoffs_list)
+
+        return self.tracer.start_span(
+            f"{agent_name}.agent",
+            kind=SpanKind.INTERNAL,
+            context=parent_context,
+            attributes=attributes,
+        )
+
+    def _start_handoff_span(self, span_data, parent_context, trace_id):
+        """Create an OTel span for a HandoffSpanData."""
+        from_agent = getattr(span_data, "from_agent", None) or "unknown"
+        to_agent = getattr(span_data, "to_agent", None) or "unknown"
+
+        if to_agent and to_agent != "unknown" and trace_id:
+            handoff_key = f"{to_agent}:{trace_id}"
+            self._reverse_handoffs_dict[handoff_key] = from_agent
+
+            if len(self._reverse_handoffs_dict) > 1000:
+                self._reverse_handoffs_dict.popitem(last=False)
+
+        from_agent_span = self._find_agent_span(from_agent)
+        if from_agent_span:
+            parent_context = set_span_in_context(from_agent_span)
+
+        handoff_attributes = {
+            SpanAttributes.TRACELOOP_SPAN_KIND: "handoff",
+            GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+            GenAIAttributes.GEN_AI_OPERATION_NAME: "handoff",
+        }
+
+        if from_agent and from_agent != "unknown":
+            handoff_attributes[GEN_AI_HANDOFF_FROM_AGENT] = from_agent
+            handoff_attributes[GenAIAttributes.GEN_AI_AGENT_NAME] = from_agent
+        if to_agent and to_agent != "unknown":
+            handoff_attributes[GEN_AI_HANDOFF_TO_AGENT] = to_agent
+
+        return self.tracer.start_span(
+            f"{from_agent} → {to_agent}.handoff",
+            kind=SpanKind.INTERNAL,
+            context=parent_context,
+            attributes=handoff_attributes,
+        )
+
+    def _start_function_span(self, span_data, parent_context):
+        """Create an OTel span for a FunctionSpanData."""
+        tool_name = getattr(span_data, "name", None) or "unknown_tool"
+
+        tool_attributes = {
+            SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.TOOL.value,
+            GenAIAttributes.GEN_AI_TOOL_NAME: tool_name,
+            GenAIAttributes.GEN_AI_TOOL_TYPE: "function",
+            GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+            GenAIAttributes.GEN_AI_OPERATION_NAME: "execute_tool",
+        }
+
+        if hasattr(span_data, "description") and span_data.description:
+            # Only use description if it's not a generic class description
+            desc = span_data.description
+            if desc and not desc.startswith("Represents a Function Span"):
+                tool_attributes[GenAIAttributes.GEN_AI_TOOL_DESCRIPTION] = desc
+
+        return self.tracer.start_span(
+            f"{tool_name}.tool",
+            kind=SpanKind.INTERNAL,
+            context=parent_context,
+            attributes=tool_attributes,
+        )
+
+    def _start_generation_span(self, parent_context, span_data=None):
+        """Create an OTel span for a GenerationSpanData or ResponseSpanData."""
+        attributes = {
+            GenAIAttributes.GEN_AI_OPERATION_NAME: "chat",
+            GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+        }
+        model = getattr(span_data, "model", None) if span_data else None
+        if model:
+            attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model
+        return self.tracer.start_span(
+            "openai.response",
+            kind=SpanKind.CLIENT,
+            context=parent_context,
+            attributes=attributes,
+            start_time=time.time_ns(),
+        )
+
+    def _start_realtime_span(self, span_data, parent_context, span_name, operation):
+        """Create an OTel span for a realtime span (Speech/Transcription/SpeechGroup).
+
+        NOTE: "speech", "transcription", "speech_group" are OpenAI
+        Realtime API-specific operations with no well-known OTel
+        equivalents.  Kept as custom operation names intentionally.
+        """
+        attributes = {
+            GenAIAttributes.GEN_AI_OPERATION_NAME: operation,
+            GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+        }
+
+        model = getattr(span_data, "model", None)
+        if model:
+            attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model
+
+        return self.tracer.start_span(
+            span_name,
+            kind=SpanKind.CLIENT,
+            context=parent_context,
+            attributes=attributes,
+            start_time=time.time_ns(),
+        )
+
+    # ------------------------------------------------------------------
+    # on_span_end helpers (extracted from the former if-elif chain)
+    # ------------------------------------------------------------------
+
+    def _end_generation_span(self, otel_span, span_data, trace_content):
+        """Handle on_span_end logic for generation/response spans."""
+        input_data = getattr(span_data, "input", [])
+        _extract_prompt_attributes(otel_span, input_data, trace_content)
+
+        response = getattr(span_data, "response", None)
+        tools = getattr(span_data, "tools", None) or (
+            getattr(response, "tools", None) if response else None
+        )
+        if trace_content and tools:
+            tool_defs = _extract_tool_definitions(tools)
+            if tool_defs:
+                otel_span.set_attribute(
+                    GenAIAttributes.GEN_AI_TOOL_DEFINITIONS, json.dumps(tool_defs)
+                )
+
+        if response:
+            _extract_response_attributes(otel_span, response, trace_content)
+
+    def _end_function_span(self, otel_span, span_data, trace_content):
+        """Handle on_span_end logic for function/tool spans.
+
+        Sets ``gen_ai.tool.call.arguments`` and ``gen_ai.tool.call.result``
+        from ``FunctionSpanData.input`` / ``.output``.  Both are content
+        attributes and are only emitted when *trace_content* is True.
+        """
+        if not trace_content:
+            return
+
+        tool_input = getattr(span_data, "input", None)
+        if tool_input is not None:
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS,
+                tool_input if isinstance(tool_input, str) else json.dumps(tool_input),
+            )
+
+        tool_output = getattr(span_data, "output", None)
+        if tool_output is not None:
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_TOOL_CALL_RESULT,
+                tool_output if isinstance(tool_output, str) else json.dumps(tool_output),
+            )
+
+    def _set_realtime_io_attributes(self, otel_span, span_data, has_output=True):
+        """Set input/output message attributes for realtime spans."""
+        input_val = getattr(span_data, "input", None)
+        if input_val and not isinstance(input_val, (bytes, bytearray)):
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_INPUT_MESSAGES,
+                json.dumps([{"role": "user", "parts": [{"type": "text", "content": str(input_val)}]}]),
+            )
+
+        if not has_output:
+            return
+
+        output_val = getattr(span_data, "output", None)
+        if output_val and not isinstance(output_val, (bytes, bytearray)):
+            out_msg = {
+                "role": "assistant",
+                "parts": [{"type": "text", "content": str(output_val)}],
+                "finish_reason": "",
+            }
+            otel_span.set_attribute(
+                GenAIAttributes.GEN_AI_OUTPUT_MESSAGES,
+                json.dumps([out_msg]),
+            )
+
+    # ------------------------------------------------------------------
+    # Span lookup helpers
+    # ------------------------------------------------------------------
+
     def _find_agent_span(self, agent_name: str):
         """Find the OpenTelemetry span for a given agent."""
         for agents_span, otel_span in self._otel_spans.items():
diff --git a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py
index 108fc30701..48d6a9e442 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py
+++ b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py
@@ -4,8 +4,10 @@
 so we need to patch the RealtimeSession class directly to add OpenTelemetry tracing.
 """
 
+import json
 import logging
 import time
+from collections import OrderedDict
 from typing import Dict, Any, Optional, List, Tuple
 from opentelemetry.trace import Tracer, Status, StatusCode, SpanKind, Span
 from opentelemetry.trace import set_span_in_context
@@ -108,7 +110,8 @@ def __init__(self, tracer: Tracer):
         self.prompt_agent_name: Optional[str] = None
         self.starting_agent_name: Optional[str] = None
         self.model_name: str = "gpt-4o-realtime-preview"
-        self.seen_completions: set = set()
+        self._seen_completions: OrderedDict = OrderedDict()
+        self._seen_completions_max: int = 1000
         self.pending_usage: Optional[Dict[str, int]] = None
 
     def start_workflow_span(self, agent_name: str):
@@ -116,10 +119,10 @@ def start_workflow_span(self, agent_name: str):
         self.starting_agent_name = agent_name
         self.workflow_span = self.tracer.start_span(
             "Realtime Session",
-            kind=SpanKind.CLIENT,
+            kind=SpanKind.INTERNAL,
             attributes={
                 SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.WORKFLOW.value,
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
                 SpanAttributes.TRACELOOP_WORKFLOW_NAME: "Realtime Session",
             },
         )
@@ -170,12 +173,13 @@ def start_agent_span(self, agent_name: str):
 
         span = self.tracer.start_span(
             f"{agent_name}.agent",
-            kind=SpanKind.CLIENT,
+            kind=SpanKind.INTERNAL,
             context=parent_context,
             attributes={
                 SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.AGENT.value,
                 GenAIAttributes.GEN_AI_AGENT_NAME: agent_name,
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+                GenAIAttributes.GEN_AI_OPERATION_NAME: "invoke_agent",
             },
         )
         self.agent_spans[agent_name] = span
@@ -202,7 +206,8 @@ def start_tool_span(self, tool_name: str, agent_name: Optional[str] = None):
                 SpanAttributes.TRACELOOP_SPAN_KIND: TraceloopSpanKindValues.TOOL.value,
                 GenAIAttributes.GEN_AI_TOOL_NAME: tool_name,
                 GenAIAttributes.GEN_AI_TOOL_TYPE: "function",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+                GenAIAttributes.GEN_AI_OPERATION_NAME: "execute_tool",
             },
         )
         self.tool_spans[tool_name] = span
@@ -214,8 +219,9 @@ def end_tool_span(
         """End a tool span."""
         if tool_name in self.tool_spans:
             span = self.tool_spans[tool_name]
-            if output is not None:
-                span.set_attribute(GenAIAttributes.GEN_AI_TOOL_CALL_RESULT, str(output))
+            if output is not None and should_send_prompts():
+                result = output if isinstance(output, str) else json.dumps(output, default=str)
+                span.set_attribute(GenAIAttributes.GEN_AI_TOOL_CALL_RESULT, result)
             if error:
                 span.set_status(Status(StatusCode.ERROR, str(error)))
             else:
@@ -239,7 +245,8 @@ def create_handoff_span(self, from_agent: str, to_agent: str):
             context=parent_context,
             attributes={
                 SpanAttributes.TRACELOOP_SPAN_KIND: "handoff",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai_agents",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
+                GenAIAttributes.GEN_AI_OPERATION_NAME: "handoff",
                 GEN_AI_HANDOFF_FROM_AGENT: from_agent,
                 GEN_AI_HANDOFF_TO_AGENT: to_agent,
             },
@@ -258,8 +265,8 @@ def start_audio_span(self, item_id: str, content_index: int):
             kind=SpanKind.CLIENT,
             context=parent_context,
             attributes={
-                SpanAttributes.LLM_REQUEST_TYPE: "realtime",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
+                GenAIAttributes.GEN_AI_OPERATION_NAME: "realtime",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
             },
         )
         self.audio_spans[span_key] = span
@@ -309,14 +316,20 @@ def record_usage(self, usage: Any):
                 "total_tokens": getattr(usage, "total_tokens", 0) or 0,
             }
 
+    @property
+    def seen_completions(self):
+        return self._seen_completions
+
     def record_completion(self, role: str, content: str):
         """Record a completion message - creates an LLM span with prompt and completion."""
         if not content:
             return
-        content_hash = hash(content[:100])
-        if content_hash in self.seen_completions:
+        content_hash = hash(content)
+        if content_hash in self._seen_completions:
             return
-        self.seen_completions.add(content_hash)
+        self._seen_completions[content_hash] = None
+        if len(self._seen_completions) > self._seen_completions_max:
+            self._seen_completions.popitem(last=False)
         self.create_llm_span(content)
 
     def create_llm_span(self, completion_content: str):
@@ -351,13 +364,16 @@ def create_llm_span(self, completion_content: str):
             context=parent_context,
             start_time=start_time,
             attributes={
-                SpanAttributes.LLM_REQUEST_TYPE: "realtime",
-                SpanAttributes.LLM_SYSTEM: "openai",
-                GenAIAttributes.GEN_AI_SYSTEM: "openai",
+                GenAIAttributes.GEN_AI_OPERATION_NAME: "realtime",
+                GenAIAttributes.GEN_AI_PROVIDER_NAME: "openai",
                 GenAIAttributes.GEN_AI_REQUEST_MODEL: model_name_str,
             },
         )
 
+        span.set_attribute(
+            GenAIAttributes.GEN_AI_RESPONSE_MODEL, model_name_str,
+        )
+
         if self.pending_usage:
             if self.pending_usage.get("input_tokens"):
                 span.set_attribute(
@@ -369,27 +385,38 @@ def create_llm_span(self, completion_content: str):
                     GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS,
                     self.pending_usage["output_tokens"],
                 )
+            if self.pending_usage.get("total_tokens"):
+                span.set_attribute(
+                    SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS,
+                    self.pending_usage["total_tokens"],
+                )
             self.pending_usage = None
 
         if should_send_prompts():
             if prompt_content:
+                input_msg = {
+                    "role": prompt_role or "user",
+                    "parts": [{"type": "text", "content": prompt_content}],
+                }
                 span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", prompt_role or "user"
-                )
-                span.set_attribute(
-                    f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", prompt_content
+                    GenAIAttributes.GEN_AI_INPUT_MESSAGES,
+                    json.dumps([input_msg]),
                 )
 
+            out_msg = {
+                "role": "assistant",
+                "parts": [{"type": "text", "content": completion_content}],
+                "finish_reason": "",
+            }
             span.set_attribute(
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", "assistant"
-            )
-            span.set_attribute(
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", completion_content
-            )
-            span.set_attribute(
-                f"{GenAIAttributes.GEN_AI_COMPLETION}.0.finish_reason", "stop"
+                GenAIAttributes.GEN_AI_OUTPUT_MESSAGES,
+                json.dumps([out_msg]),
             )
 
+        # Realtime API does not provide finish reasons; set top-level
+        # attribute only when a meaningful value is available (consistent
+        # with _hooks.py which omits the attribute when mapped value is None).
+
         span.set_status(Status(StatusCode.OK))
         span.end()
 
diff --git a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py
index 92b665b724..a8b1fdc60b 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py
+++ b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/utils.py
@@ -10,6 +10,8 @@
 # Handoff span attribute names
 GEN_AI_HANDOFF_FROM_AGENT = "gen_ai.handoff.from_agent"
 GEN_AI_HANDOFF_TO_AGENT = "gen_ai.handoff.to_agent"
+GEN_AI_HANDOFF_PARENT_AGENT = "gen_ai.agent.handoff_parent"
+OPENAI_AGENT_HANDOFFS = "openai.agent.handoffs"
 _TRACELOOP_TRACE_CONTENT = "TRACELOOP_TRACE_CONTENT"
 
 
diff --git a/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml b/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml
index 551683bada..dc33fb9794 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml
+++ b/packages/opentelemetry-instrumentation-openai-agents/pyproject.toml
@@ -34,7 +34,7 @@ dev = [
 ]
 test = [
   "litellm>=1.71.2,<2",
-  "openai-agents>=0.6.9",
+  "openai-agents>=0.14.2",
   "opentelemetry-sdk>=1.38.0,<2",
   "pytest-asyncio>=1.0.0,<2",
   "pytest-recording>=0.13.1,<0.14.0",
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_handoff_span_operation_name.yaml b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_handoff_span_operation_name.yaml
new file mode 100644
index 0000000000..dcf2387b8f
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_handoff_span_operation_name.yaml
@@ -0,0 +1,224 @@
+interactions:
+- request:
+    body: '{"include":[],"input":[{"content":"Please handle this task by delegating
+      to another agent.","role":"user"}],"instructions":"You decide which agent to
+      handoff to.","model":"gpt-4.1","stream":false,"tools":[{"name":"handoff_to_agent_a","parameters":{"properties":{"input":{"title":"Input","type":"string"}},"required":["input"],"title":"handoff_to_agent_a_args","type":"object","additionalProperties":false},"strict":true,"type":"function","description":"Handoff
+      to Agent A for specific tasks"},{"name":"handoff_to_agent_b","parameters":{"properties":{"input":{"title":"Input","type":"string"}},"required":["input"],"title":"handoff_to_agent_b_args","type":"object","additionalProperties":false},"strict":true,"type":"function","description":"Handoff
+      to Agent B for different tasks"},{"name":"transfer_to_agenta","parameters":{"additionalProperties":false,"type":"object","properties":{},"required":[]},"strict":true,"type":"function","description":"Handoff
+      to the AgentA agent to handle the request. "},{"name":"transfer_to_agentb","parameters":{"additionalProperties":false,"type":"object","properties":{},"required":[]},"strict":true,"type":"function","description":"Handoff
+      to the AgentB agent to handle the request. "}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1223'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE;
+        _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - Agents/Python 0.2.7
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.99.9
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.13
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//5FbLbts6EN37Kwitk0Cy5fixS4qivbt7VxdFEQgjcmizpkiVHCZNA/97
+        IUrWI482QIsCRTeGPIcznDlnSM7DjLFEiWTLEoe+Li7XG7nKRbbYLNfrbJNBfjlHyHGV5pfLNF2k
+        KwmcCyFFXpabfJ6cNQFs+Qk5nYJY47G1c4dAKAposGy1XM7X6XKziJgnoOAbH26rWiOhaJ1K4Ied
+        s8E0WUnQHqMZnbMu2TITtI4GZU6OhUACpf0U9eQCJ2VN3OSDDUwgVwLZ3V7xPYMdGmJk2R6MsFIy
+        shdtAhV8KWygOlBB9oBmErcByVpdcNDTHSsrUDdb7Wo6zy+y83k6X56n+XmWdyzFmMmWfZwxxthD
+        /O3pl7wnvyyXPJJ/WcpFLuVilS3yfJ0/S34MQfc1xiDBxJJjdgP8EtcRBLcLFRqK+MNxAJoYRZtc
+        /HxH9PXdytzd0+GN/S/c5eu35u3/7tPgYaCKaZAD4yW6gmwReYYkLjnOGLuJXNTgQGvUUy7JhVbs
+        2uGtssEXp35q8+i5rp2taio48D0WB7wfYw7BW6PMLtl2HCcopXU0WtRwEqoK3MlzxtixbUuQSPeF
+        EmhISYWTlvPobhXHglp7IlBC0JR0DW0djosgrGp0QCGas4u0s36hITNpXQXD/5GUcV3LWpfxLbrS
+        ekVNzkmFQoUq6fNuedxbxaM3BLJJD/inTfe4YwYRBXruVB2NW5a8748Hu4pH5opJ65ivkSupOCPw
+        B/+0BbpT1XdAAcOaRvwKCZ0fFd6qWqMjhVN7PM3tyRkbmyIU6bjbPxE/e4R2JXpyTTeMwGP/fRx8
+        Eoefg3IoerImm/eWm5FLn8DTcgtwO5+M13bpdJflCAEhVEM36H/HDMS7b/Yoz1hNvGubPhv3xy+Q
+        9jpKK5SU6OLt+Epty79L2/LP0pb22Op7NX30NEaooQY9XbBX3eM/FPp7Bb+KsWmnvCTize8m7/qn
+        ySv/LPL6t5psXWi7q50tm6hpb6zHz5oLhsOJQKE8lPo00QUPOxzevHjmhtEqW27OngKjme5hGEj4
+        HsXgmU6ex8cjW7Z4Dngubj8xvBSaLIEeRV7N+3c3+OmIUCGBAIJI/+z4DQAA//8DAFuxG79gCwAA
+    headers:
+      CF-RAY:
+      - 96fa91bb0eaf09c9-HFA
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 15 Aug 2025 17:56:33 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '595'
+      openai-project:
+      - proj_tzz1TbPPOXaf6j9tEkVUBIAa
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '597'
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '30000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '29999624'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_ac5a5da3776763a1e744e75310bb345e
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"include":[],"input":[{"content":"Please handle this task by delegating
+      to another agent.","role":"user"},{"arguments":"{}","call_id":"call_GttzG7nwytkCoQuw48EnEWrj","name":"transfer_to_agenta","type":"function_call","id":"fc_689f74d1bb5c8191a6bf34ff3713448407faccddfd4bb942","status":"completed"},{"call_id":"call_GttzG7nwytkCoQuw48EnEWrj","output":"{\"assistant\":
+      \"AgentA\"}","type":"function_call_output"}],"instructions":"Agent A does something.","model":"gpt-4.1","stream":false,"tools":[]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '498'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE;
+        _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - Agents/Python 0.2.7
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.99.9
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.13
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//dFTBjts4DL3PV7C69DITOKkziXPrce+9FQuDlmhHG1n0StS0QTH/vrCc
+        OHZ3egkcPvKJfHzSrycAZY06gQoUh/r1WLWH0uwK0sfjtto2B10dd01F2KKhQheHFrU2pjVl01Tl
+        Tj2PBNz8Q1ruJOwjTXEdCIVMjSO2Pez3u2Oxr8qMRUFJcazR3A+OhMxU1KC+dIGTH7tq0UXKYQqB
+        gzqBT87lgPX3wtqQoHVxjUYJSYtlnw/52pEX+AqGKULknuRsfbeZjuzxZ81JhiS18IX8imkEhdnV
+        Gt36jJ4NuZG8G+Sl3GxfdsVu/1KUL9vypkvmVCf4/gQA8Cv/zoL3sZv1PlCb9cZtcdTFa4klVs2u
+        On6od+aQ60CZhWLEjh7An4TNoGYv5B8tLdta0d7loJ8yV+cE9J4F77p+/3sFOu6GwM0HSCY6gfqG
+        8QJnjNAQeTDkqBstAsJwW9Hm/gE/rHNwRm8cgZwJAv2bKMoG/mrhygnO+EbQcyBYbhs4gCcygB7S
+        YFAI2Od6wXj5HGEI3AWK8RkGRxgJHAn0BBfPPz6puen329c8hwrssjYYo42CXqbkMTEnqQEDOkdu
+        7RcJabLwEOjNcor1/ZbU2Qizn4bA/SC1Rn2m+kLXJRYII3vrO3W6LUxR23KQRdK4/NT3GO6VTwDv
+        02XDluRaW0NebGtpdZEihTerqZYprgy1mNy0dhWFAy2HEOoHCigph7eb4hbN67111nLo8fF/Yauc
+        N6l26/iNQsPRynUys7GpV3Pfk45ntnoSPgmrGXi4TAkP9cJ7xRwclj2G5HV2bp7SRmzc/dFJ+Q7N
+        A1i/egv2h+f/xxevzjxmXp15FBarUX9/Yr68fgR8xDtv/0/UwoLuAVZfZglTXG+7J0GDgiP9+9P7
+        fwAAAP//AwCfc+CEAQYAAA==
+    headers:
+      CF-RAY:
+      - 96fa91c02a0f09c9-HFA
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 15 Aug 2025 17:56:35 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '1282'
+      openai-project:
+      - proj_tzz1TbPPOXaf6j9tEkVUBIAa
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '1284'
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '30000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '29999924'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_d1c98a5aeb66ec1b124d4c4d4ad0605e
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_tool_span_operation_name.yaml b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_tool_span_operation_name.yaml
new file mode 100644
index 0000000000..4062e28876
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/cassettes/test_openai_agents/test_tool_span_operation_name.yaml
@@ -0,0 +1,283 @@
+interactions:
+- request:
+    body: '{"include":[],"input":[{"content":"What is the weather in London?","role":"user"}],"instructions":"You
+      get the weather for a city using the get_weather tool.","model":"gpt-4.1","stream":false,"tools":[{"name":"get_weather","parameters":{"properties":{"city":{"title":"City","type":"string"}},"required":["city"],"title":"get_weather_args","type":"object","additionalProperties":false},"strict":true,"type":"function","description":"Gets
+      the current weather for a specified city."}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '482'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE;
+        _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - Agents/Python 0.2.7
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.99.9
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.13
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA3xUyW7bMBC9+ysInuNAcqxY9q0LWqAo0Fx6KJpAGJMjmQ1FquQwbRD43wtSthY3
+        7cWQ53G2N2/mZcEYV5LvGHfou+q23Nab9V6u8kyU+TYDLGElsnWONeaykNkWV1mey1Uh1vu63vCr
+        GMDuf6CgcxBrPPZ24RAIZQURyzdFsSqzYnOTME9AwUcfYdtOI6HsnfYgHhtng4lV1aA9JjM6Zx3f
+        MRO0TgZlzo6VRAKl/Rz15IIgZU1K8s0G1iAxOiD7hUAHdKy2jgETip5Z8Mo0CWyQqvMDslZf91W1
+        8LuygbpAFdlHNLNkEYxvKwF6XkZrJeqYv+loub7Ol6tsVSyz9TJfn6hLMfmOfV8wxthL+h1mUoth
+        InWZZXEi2zy/ucnK7W2Ry1LC9tWJpBD03GEKEkziIVU3wv8aQALBNaFFQwl/ueeRpHu+u+efrZHW
+        3PPj+DaGrfp60+fbkpq7rc6+fvn0Y/v+w3qDb/bF+ks7ehhoU2UTrnnCjgvGHhIvHTjQGvWcV3Kh
+        V0Pn8EnZ4Kuz4PoCBt47Z9uOKgHigNUjPk8xh+CtUabhuxPfHOvaOpo8ivyEtgV39lwwdux1CzXS
+        c6UkGlK1wpkmPbonJbCi3s4l1hA08ZPircNpE4Rthw4oJHN+nZ2sv2msrLauhfH/ZKzpXc/aqeIn
+        dHvrFcWaeYtShZYPdfc8HqwSyRsCWT4A/m8BXqpnnJ5EL5zqknHH+EcknzZHBOfQ0MV6+Q5F5Emm
+        Rbv+vwgGMI6/RULnJ633c+3QkcK5Paqw73tqi10o0inLuwhfXYCnFj25qIYJeBy+j6MPd/gzKIdy
+        IGuaejA8TDyG9JMmK3CN59NHpzJOV3SCgJQq0gz6btp3OoqLi/pSF+kIR31dbBPZrtK26ZzdxwDZ
+        YOymwnPBCDiPVSoPe30+ysFDg6MqlZkdws3q6m/75Cq/jKdCHFCOjtlMv5f3NS9eA16LO6z0v0KT
+        JdAjWG6GvQh+vsItEkggiOGPi+MfAAAA//8DAFEaBBMhBwAA
+    headers:
+      CF-RAY:
+      - 96fa913b3a9209c9-HFA
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 15 Aug 2025 17:56:14 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '1154'
+      openai-project:
+      - proj_tzz1TbPPOXaf6j9tEkVUBIAa
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '1156'
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '30000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '29999711'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_4e4374bb248e1a33239c0d7d3ad949e0
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"data\":[{\"object\":\"trace.span\",\"id\":\"span_06251ea089b64345a73bd1fa\",\"trace_id\":\"trace_6a430ad653c745b78c89622b8e61fccc\",\"parent_id\":\"span_0e985f83783e4a9facb13938\",\"started_at\":\"2025-08-15T17:56:08.287798+00:00\",\"ended_at\":\"2025-08-15T17:56:12.476921+00:00\",\"span_data\":{\"type\":\"response\",\"response_id\":\"resp_689f74b98be8819ead89b1c91c2819b20a8b48c839804f1e\"},\"error\":null},{\"object\":\"trace.span\",\"id\":\"span_0e985f83783e4a9facb13938\",\"trace_id\":\"trace_6a430ad653c745b78c89622b8e61fccc\",\"parent_id\":null,\"started_at\":\"2025-08-15T17:56:08.287474+00:00\",\"ended_at\":\"2025-08-15T17:56:12.477388+00:00\",\"span_data\":{\"type\":\"agent\",\"name\":\"testAgent\",\"handoffs\":[],\"tools\":[],\"output_type\":\"str\"},\"error\":null},{\"object\":\"trace\",\"id\":\"trace_6549cb4b93ea47c8967199b27a04d7c0\",\"workflow_name\":\"Agent
+      workflow\",\"group_id\":null,\"metadata\":null},{\"object\":\"trace.span\",\"id\":\"span_dd1aefc5a00e419d8f568f10\",\"trace_id\":\"trace_6549cb4b93ea47c8967199b27a04d7c0\",\"parent_id\":\"span_50bdc65df957417a9c596a17\",\"started_at\":\"2025-08-15T17:56:12.490029+00:00\",\"ended_at\":\"2025-08-15T17:56:14.226669+00:00\",\"span_data\":{\"type\":\"response\",\"response_id\":\"resp_689f74bd210c8190ae8a2c041efe1d5d09e2011d25c4bff7\"},\"error\":null},{\"object\":\"trace.span\",\"id\":\"span_41eb9236408240b8a88180c6\",\"trace_id\":\"trace_6549cb4b93ea47c8967199b27a04d7c0\",\"parent_id\":\"span_50bdc65df957417a9c596a17\",\"started_at\":\"2025-08-15T17:56:14.226963+00:00\",\"ended_at\":\"2025-08-15T17:56:14.227365+00:00\",\"span_data\":{\"type\":\"function\",\"name\":\"get_weather\",\"input\":\"{\\\"city\\\":\\\"London\\\"}\",\"output\":\"It's
+      cloudy with 15\xB0C\",\"mcp_data\":null},\"error\":null}]}"
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1601'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=UhrfEFws9O_ZBKuSryCKFovrTxciXL8p2WJuM1K2dN8-1755280562-1.0.1.1-dIIsnsWKGJtA9W6u0MbXjq7UUseSGAthIGNSZMriLzkecTBUlPjjJFr6r0QnteF8Ul.liPTWhJI6mlCKQBREwPTAAOYdCC2ZirAu9ZrwIWA;
+        _cfuvid=zDtlMy4g5CGjInt8L2ecM4HeWcHtz0bFgxVbfE5vSqk-1755280562683-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      openai-beta:
+      - traces=v1
+      user-agent:
+      - python-httpx/0.28.1
+    method: POST
+    uri: https://api.openai.com/v1/traces/ingest
+  response:
+    body:
+      string: ''
+    headers:
+      CF-RAY:
+      - 96fa9147be745bf5-FRA
+      Connection:
+      - keep-alive
+      Date:
+      - Fri, 15 Aug 2025 17:56:15 GMT
+      Server:
+      - cloudflare
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '201'
+      openai-project:
+      - proj_tzz1TbPPOXaf6j9tEkVUBIAa
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '205'
+      x-request-id:
+      - req_05380352daaa71309ccb911d54f73031
+    status:
+      code: 204
+      message: No Content
+- request:
+    body: "{\"include\":[],\"input\":[{\"content\":\"What is the weather in London?\",\"role\":\"user\"},{\"arguments\":\"{\\\"city\\\":\\\"London\\\"}\",\"call_id\":\"call_B8tgP9l0UOJj9DF47eAb54Om\",\"name\":\"get_weather\",\"type\":\"function_call\",\"id\":\"fc_689f74bdf800819091133089651d8da909e2011d25c4bff7\",\"status\":\"completed\"},{\"call_id\":\"call_B8tgP9l0UOJj9DF47eAb54Om\",\"output\":\"It's
+      cloudy with 15\xB0C\",\"type\":\"function_call_output\"}],\"instructions\":\"You
+      get the weather for a city using the get_weather tool.\",\"model\":\"gpt-4.1\",\"stream\":false,\"tools\":[{\"name\":\"get_weather\",\"parameters\":{\"properties\":{\"city\":{\"title\":\"City\",\"type\":\"string\"}},\"required\":[\"city\"],\"title\":\"get_weather_args\",\"type\":\"object\",\"additionalProperties\":false},\"strict\":true,\"type\":\"function\",\"description\":\"Gets
+      the current weather for a specified city.\"}]}"
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '794'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=WwDHl7j6.dqwOcLIJAXqGOLTR6ZUq3JCq47vW3LBIBs-1755280559-1.0.1.1-na9dmQo.4u4zv1vUQ7SN457JVcBR1ifes3cOUutsLuVtLSfo_sZ1I8fRayi6NDR2VKiwUFBhrUYM85dJ8BB7Ior2pM9Ng5MfNJwvGRd3lgE;
+        _cfuvid=PWHn6CD5_OXbE3jv9HT7E4FDlSvoTN5AciqTl4Chslg-1755280559217-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - Agents/Python 0.2.7
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.99.9
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.13
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//dFU9c+M2EO31KzCobQ0pk5al9oo0KVKkyVwynCWwpBCDAAMsfKe50X/P
+        ABRJUOdrPPK+/d73wB87xriS/My4Qz82r2+n7li1KE519VaeirdXWR7xWLUv3ctJ1K/FCQ9FWcpD
+        Laq26478KSaw7b8oaE5ijcfJLhwCoWwgYuWxrg9vRX2sEuYJKPgYI+wwaiSUU1AL4r13NpjYVQfa
+        YzKjc9bxMzNB62RQZg5sJBIo7beoJxcEKWtSkb9sYD0Sowuybwh0Qcc66xgwoejKglemT2CP1MwO
+        ZK3eT10N8L2xgcZADdl3NJtiEYy+jQC9bWOwEnWs34/0XO3L50NxqJ+L6rms7qtLOfmZfd0xxtiP
+        9He5yeD7+STdC4p0krZrq7rCrqoPh0qWL5+eJOWg64gpC3oPPa7Ar3afQGENoVlbytvapJ3Xgd9p
+        iU4OYIwlmFf/9Z8NqG0/Ott+gqREZ8b/zE6kDPvdGmkNU56J4Bwa0lcmtA3yyr4pujBghMOIDig4
+        ZLZjZf13KIq2+LLnS/rb/ddSkTur0xTgvfIEhibn6Jic+AgOtEa9vSy5MPFxdPihbPDNTPkmnWy5
+        /OjsMFIjQFywecdrjjkEb40yPT/fV8ux66yjzCmeKQwDuDlyx9htUg50SNdGSTSkOoUbVXh0H0pg
+        Q5OdS+wg6OlA3JN1mA+RbS4qdF/crekQ98466wZY/88IkPymrd07/kDXWq/oOtFOqjDwpe9pjxer
+        xLT4QJYvgP9ZAnOZLpik45WiEr1wakzGM+O/Ifmk3TtBHgTuRxRxTzJJfb+mMTCkApnkVzCef0BC
+        57PRp7uO6Ejh1h6FM82d2+IUiiaifYnw0wN4H9GTi2zIwNtK3TWGO/wvKIdyI8659GLIZLWWz4Zs
+        wPWe506zoqd3PENAShXXDPqPfO70LO8e+ktTpM9A5NeDmsiOTSb9YjGOOfFcMALms0rlodXzZyGk
+        J2xhpTKbp7gsyqefgezDsLAqCVKukcWGwI9PfHn8DPgs76LpX6UmS6CzzOXboozgtyIekEACQcx/
+        293+BwAA//8DAAdiPY2lBwAA
+    headers:
+      CF-RAY:
+      - 96fa9146097f09c9-HFA
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 15 Aug 2025 17:56:15 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '776'
+      openai-project:
+      - proj_tzz1TbPPOXaf6j9tEkVUBIAa
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '779'
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '30000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '29999682'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f46f76aed514a020bb0ff7dc5c8f3600
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_content_parts.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_content_parts.py
new file mode 100644
index 0000000000..a6d4b35e39
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_content_parts.py
@@ -0,0 +1,131 @@
+"""
+Unit tests for multimodal content block → OTel part conversion.
+
+Covers _dict_block_to_part and _object_block_to_part helpers in _hooks.py:
+  - data: URLs in image_url must produce BlobPart, not UriPart
+  - input_audio must include mime_type when format is provided
+"""
+
+from types import SimpleNamespace
+
+
+def _dict_block(block: dict):
+    from opentelemetry.instrumentation.openai_agents._hooks import _dict_block_to_part
+    return _dict_block_to_part(block)
+
+
+def _object_block(block):
+    from opentelemetry.instrumentation.openai_agents._hooks import _object_block_to_part
+    return _object_block_to_part(block)
+
+
+class TestImageUrlDataUrlBecomesBlob:
+    """image_url with a data: URL must produce BlobPart, not UriPart."""
+
+    def test_data_url_png_produces_blob_part(self):
+        block = {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc123=="}}
+        part = _dict_block(block)
+        assert part["type"] == "blob", f"Expected blob, got {part['type']}"
+        assert part["modality"] == "image"
+        assert part["content"] == "abc123=="
+        assert part.get("mime_type") == "image/png"
+
+    def test_data_url_jpeg_produces_blob_part(self):
+        block = {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ=="}}
+        part = _dict_block(block)
+        assert part["type"] == "blob"
+        assert part["content"] == "/9j/4AAQ=="
+        assert part.get("mime_type") == "image/jpeg"
+
+    def test_data_url_no_mime_produces_blob_without_mime_type(self):
+        """data: URL with no detectable MIME must still be BlobPart (no mime_type key)."""
+        block = {"type": "image_url", "image_url": {"url": "data:base64,abc123"}}
+        part = _dict_block(block)
+        assert part["type"] == "blob"
+        # mime_type may be absent or empty — must NOT be a URI
+        assert "uri" not in part
+
+    def test_https_url_still_produces_uri_part(self):
+        """Regular https: URLs must remain UriPart."""
+        block = {"type": "image_url", "image_url": {"url": "https://example.com/image.png"}}
+        part = _dict_block(block)
+        assert part["type"] == "uri"
+        assert part["modality"] == "image"
+        assert part["uri"] == "https://example.com/image.png"
+
+    def test_http_url_still_produces_uri_part(self):
+        block = {"type": "image_url", "image_url": {"url": "http://cdn.example.com/photo.jpg"}}
+        part = _dict_block(block)
+        assert part["type"] == "uri"
+        assert part["uri"] == "http://cdn.example.com/photo.jpg"
+
+    def test_object_block_data_url_produces_blob_part(self):
+        """SDK object path (_object_block_to_part) must also handle data: URLs."""
+        image_url_obj = SimpleNamespace(url="data:image/png;base64,xyz789==")
+        block = SimpleNamespace(type="image_url", image_url=image_url_obj)
+        part = _object_block(block)
+        assert part["type"] == "blob", f"Expected blob, got {part['type']}"
+        assert part["modality"] == "image"
+        assert part["content"] == "xyz789=="
+        assert part.get("mime_type") == "image/png"
+
+    def test_object_block_https_url_produces_uri_part(self):
+        image_url_obj = SimpleNamespace(url="https://example.com/photo.png")
+        block = SimpleNamespace(type="image_url", image_url=image_url_obj)
+        part = _object_block(block)
+        assert part["type"] == "uri"
+        assert part["uri"] == "https://example.com/photo.png"
+
+
+class TestInputAudioMimeType:
+    """input_audio BlobPart must include mime_type when format is provided."""
+
+    def test_wav_format_produces_audio_wav_mime_type(self):
+        block = {"type": "input_audio", "input_audio": {"data": "audiobase64==", "format": "wav"}}
+        part = _dict_block(block)
+        assert part["type"] == "blob"
+        assert part["modality"] == "audio"
+        assert part["content"] == "audiobase64=="
+        assert part.get("mime_type") == "audio/wav", (
+            f"Expected audio/wav, got {part.get('mime_type')!r}"
+        )
+
+    def test_mp3_format_produces_audio_mpeg_mime_type(self):
+        """MP3 format maps to audio/mpeg per IANA media types."""
+        block = {"type": "input_audio", "input_audio": {"data": "mp3base64==", "format": "mp3"}}
+        part = _dict_block(block)
+        assert part["type"] == "blob"
+        assert part.get("mime_type") == "audio/mpeg", (
+            f"Expected audio/mpeg, got {part.get('mime_type')!r}"
+        )
+
+    def test_ogg_format_produces_audio_ogg_mime_type(self):
+        block = {"type": "input_audio", "input_audio": {"data": "oggdata==", "format": "ogg"}}
+        part = _dict_block(block)
+        assert part.get("mime_type") == "audio/ogg"
+
+    def test_no_format_omits_mime_type(self):
+        """When format is absent, mime_type should not be present (don't fabricate it)."""
+        block = {"type": "input_audio", "input_audio": {"data": "rawdata=="}}
+        part = _dict_block(block)
+        assert part["type"] == "blob"
+        assert part["modality"] == "audio"
+        assert "mime_type" not in part
+
+    def test_object_block_wav_format_produces_mime_type(self):
+        """SDK object path (_object_block_to_part) must also include mime_type."""
+        audio_obj = SimpleNamespace(data="audiobase64==", format="wav")
+        block = SimpleNamespace(type="input_audio", input_audio=audio_obj)
+        part = _object_block(block)
+        assert part["type"] == "blob"
+        assert part["modality"] == "audio"
+        assert part["content"] == "audiobase64=="
+        assert part.get("mime_type") == "audio/wav", (
+            f"Expected audio/wav, got {part.get('mime_type')!r}"
+        )
+
+    def test_object_block_mp3_format_produces_mpeg_mime_type(self):
+        audio_obj = SimpleNamespace(data="mp3data==", format="mp3")
+        block = SimpleNamespace(type="input_audio", input_audio=audio_obj)
+        part = _object_block(block)
+        assert part.get("mime_type") == "audio/mpeg"
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_finish_reasons.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_finish_reasons.py
new file mode 100644
index 0000000000..d669857ca6
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_finish_reasons.py
@@ -0,0 +1,418 @@
+"""
+Unit tests for finish_reason attribution on multi-output Responses API spans.
+
+Tests _extract_response_attributes directly with mock response objects
+to verify per-message finish_reason mapping and top-level dedup.
+"""
+
+import json
+import pytest
+from types import SimpleNamespace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.semconv._incubating.attributes import (
+    gen_ai_attributes as GenAIAttributes,
+)
+
+
+@pytest.fixture
+def span():
+    provider = TracerProvider()
+    tracer = provider.get_tracer("test")
+    otel_span = tracer.start_span("test-span")
+    yield otel_span
+    otel_span.end()
+
+
+def _make_response(output, finish_reason="stop", status=None, model="gpt-4o"):
+    kwargs = dict(
+        output=output,
+        model=model,
+        id="resp-test",
+        temperature=None,
+        max_output_tokens=None,
+        top_p=None,
+        frequency_penalty=None,
+        usage=None,
+    )
+    if status is not None:
+        kwargs["status"] = status
+    else:
+        kwargs["finish_reason"] = finish_reason
+    return SimpleNamespace(**kwargs)
+
+
+def _text_message(*texts, role="assistant"):
+    content = [
+        SimpleNamespace(type="output_text", text=t) for t in texts
+    ]
+    return SimpleNamespace(type="message", content=content, role=role)
+
+
+def _reasoning_and_text_message(reasoning_summary, text, role="assistant"):
+    content = [
+        SimpleNamespace(type="reasoning", summary=[SimpleNamespace(text=reasoning_summary)]),
+        SimpleNamespace(type="output_text", text=text),
+    ]
+    return SimpleNamespace(type="message", content=content, role=role)
+
+
+def _tool_call(name, arguments="{}", call_id="call_0"):
+    return SimpleNamespace(
+        type="function_call", name=name, arguments=arguments, call_id=call_id,
+    )
+
+
+def _extract(span, response, trace_content=True):
+    from opentelemetry.instrumentation.openai_agents._hooks import (
+        _extract_response_attributes,
+    )
+    return _extract_response_attributes(span, response, trace_content)
+
+
+def _get_output_messages(span):
+    raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+    return json.loads(raw) if raw else []
+
+
+def _get_finish_reasons(span):
+    return span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+
+
+class TestMultiOutputFinishReasons:
+    """Verify per-message and top-level finish_reasons when a Responses-API
+    reply contains reasoning + text + tool_call outputs."""
+
+    def test_reasoning_text_and_tool_call(self, span):
+        response = _make_response(
+            output=[
+                _reasoning_and_text_message("let me think", "Here's what I found"),
+                _tool_call("get_weather", '{"city": "NYC"}', "call_abc"),
+            ],
+            finish_reason="stop",
+        )
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert len(msgs) == 2
+
+        # Message 0: text+reasoning → mapped "stop"
+        assert msgs[0]["role"] == "assistant"
+        assert msgs[0]["finish_reason"] == "stop"
+        part_types = [p["type"] for p in msgs[0]["parts"]]
+        assert "reasoning" in part_types
+        assert "text" in part_types
+
+        # Message 1: tool call → mapped "tool_call" (singular, per OTel spec)
+        assert msgs[1]["role"] == "assistant"
+        assert msgs[1]["finish_reason"] == "tool_call"
+        assert msgs[1]["parts"][0]["type"] == "tool_call"
+        assert msgs[1]["parts"][0]["name"] == "get_weather"
+
+        # Top-level: deduped, order-preserved
+        assert _get_finish_reasons(span) == ("stop", "tool_call")
+
+    def test_tool_calls_mapped_to_singular(self, span):
+        """'tool_calls' (OpenAI) must map to 'tool_call' (OTel singular)."""
+        response = _make_response(
+            output=[_tool_call("search", '{"q": "test"}', "call_1")],
+            finish_reason="stop",
+        )
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert msgs[0]["finish_reason"] == "tool_call"
+        assert _get_finish_reasons(span) == ("tool_call",)
+
+    def test_status_completed_mapped_to_stop(self, span):
+        """Responses API status='completed' must map to 'stop'."""
+        response = _make_response(
+            output=[_text_message("Done")],
+            status="completed",
+        )
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert msgs[0]["finish_reason"] == "stop"
+        assert _get_finish_reasons(span) == ("stop",)
+
+    def test_multiple_text_messages_dedup_finish_reason(self, span):
+        """Two text outputs with the same finish_reason should dedup to one top-level entry."""
+        response = _make_response(
+            output=[
+                _text_message("Part 1"),
+                _text_message("Part 2"),
+            ],
+            finish_reason="stop",
+        )
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert len(msgs) == 2
+        assert msgs[0]["finish_reason"] == "stop"
+        assert msgs[1]["finish_reason"] == "stop"
+        # Deduped: only one "stop"
+        assert _get_finish_reasons(span) == ("stop",)
+
+    def test_text_and_tool_call_distinct_reasons(self, span):
+        """Text ('stop') + tool call ('tool_call') → both in top-level tuple."""
+        response = _make_response(
+            output=[
+                _text_message("Here you go"),
+                _tool_call("lookup", '{"id": 1}', "call_2"),
+            ],
+            finish_reason="stop",
+        )
+
+        _extract(span, response)
+
+        assert _get_finish_reasons(span) == ("stop", "tool_call")
+
+    def test_trace_content_false_still_sets_finish_reasons(self, span):
+        """When trace_content=False, output messages are omitted but top-level
+        finish_reasons must still reflect all output item types, including tool calls."""
+        response = _make_response(
+            output=[
+                _text_message("secret"),
+                _tool_call("get_weather", '{"city": "NYC"}', "call_abc"),
+            ],
+            finish_reason="stop",
+        )
+
+        _extract(span, response, trace_content=False)
+
+        # No output messages (content suppressed)
+        assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes
+        # finish_reasons must reflect both the text (stop) and the tool call (tool_call)
+        assert _get_finish_reasons(span) == ("stop", "tool_call")
+
+    def test_incomplete_response_preserves_incomplete_finish_reason(self, span):
+        """Responses API status='incomplete' must preserve 'incomplete', not remap to 'length'.
+
+        'incomplete' can be caused by a content filter, not just token limits — mapping
+        to 'length' would misrepresent the reason and lose information.
+        """
+        response = _make_response(
+            output=[_text_message("Partial...")],
+            status="incomplete",
+        )
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert msgs[0]["finish_reason"] == "incomplete"
+        assert _get_finish_reasons(span) == ("incomplete",)
+
+    def test_cancelled_response_preserves_cancelled_finish_reason(self, span):
+        """Responses API status='cancelled' must preserve 'cancelled', not remap to 'error'.
+
+        Cancellation is a distinct lifecycle event from an error; conflating the two
+        prevents consumers from distinguishing user-initiated cancels from failures.
+        """
+        response = _make_response(
+            output=[_text_message("Partial")],
+            status="cancelled",
+        )
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert msgs[0]["finish_reason"] == "cancelled"
+        assert _get_finish_reasons(span) == ("cancelled",)
+
+    def test_failed_status_mapped_to_error(self, span):
+        """Responses API status='failed' must map to 'error'."""
+        response = _make_response(
+            output=[_text_message("Oops")],
+            status="failed",
+        )
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert msgs[0]["finish_reason"] == "error"
+        assert _get_finish_reasons(span) == ("error",)
+
+
+class TestFinishReasonsWithoutContent:
+    """finish_reason granularity must be preserved when trace_content=False.
+
+    gen_ai.response.finish_reasons is Recommended metadata, not opt-in content.
+    The code must iterate output items for their inherent finish reasons even
+    when message content is suppressed.
+    """
+
+    def test_tool_call_only_output_with_responses_api_completed_status(self, span):
+        """Responses API status='completed' maps to 'stop' at the response level.
+        But when the output contains only a function_call item, the top-level
+        finish_reasons must show 'tool_call', not the response-level 'stop'."""
+        response = _make_response(
+            output=[_tool_call("get_weather", '{"city": "NYC"}', "call_abc")],
+            status="completed",
+        )
+
+        _extract(span, response, trace_content=False)
+
+        assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes
+        assert _get_finish_reasons(span) == ("tool_call",)
+
+    def test_tool_call_only_without_content_trace_enabled(self, span):
+        """Same response with trace_content=True must also yield only 'tool_call'."""
+        response = _make_response(
+            output=[_tool_call("search", '{"q": "test"}', "call_1")],
+            status="completed",
+        )
+
+        _extract(span, response, trace_content=True)
+
+        msgs = _get_output_messages(span)
+        assert msgs[0]["finish_reason"] == "tool_call"
+        assert _get_finish_reasons(span) == ("tool_call",)
+
+    def test_no_output_falls_back_to_response_level_finish_reason(self, span):
+        """When the response has no output items, fall back to the response-level reason."""
+        response = _make_response(output=[], finish_reason="stop")
+
+        _extract(span, response, trace_content=False)
+
+        assert _get_finish_reasons(span) == ("stop",)
+
+
+class TestToolCallPartOptionalFields:
+    """P2: Optional fields on tool_call parts must be omitted (not set to '' or null)
+    when not present in the source data.
+
+    OTel ToolCallRequestPart schema: id is optional (default null), arguments is optional.
+    Emitting empty-string id or null arguments causes consumer correlation failures.
+    """
+
+    def test_function_call_no_call_id_omits_id_from_part(self, span):
+        """Responses API function_call with no call_id must omit 'id' from the part."""
+        output = SimpleNamespace(type="function_call", name="search", arguments='{"q": "test"}')
+        # No call_id attribute — getattr fallback was "", which is wrong
+        response = _make_response(output=[output], finish_reason="stop")
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert len(msgs) == 1
+        part = msgs[0]["parts"][0]
+        assert part["type"] == "tool_call"
+        assert "id" not in part or part["id"], (
+            f"id must be absent or non-empty when call_id not provided, got: {part}"
+        )
+
+    def test_function_call_none_arguments_omits_arguments_key(self, span):
+        """Responses API function_call with no arguments must omit 'arguments' from the part."""
+        output = SimpleNamespace(type="function_call", name="noop", call_id="c1")
+        # No arguments attribute
+        response = _make_response(output=[output], finish_reason="stop")
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert len(msgs) == 1
+        part = msgs[0]["parts"][0]
+        assert part["type"] == "tool_call"
+        assert "arguments" not in part, (
+            f"arguments must be omitted when None, got: {part}"
+        )
+
+    def test_function_call_with_call_id_and_arguments_still_included(self, span):
+        """Sanity: when call_id and arguments are present, both must be emitted."""
+        output = SimpleNamespace(
+            type="function_call", name="get_weather",
+            call_id="call_99", arguments='{"city": "NYC"}',
+        )
+        response = _make_response(output=[output], finish_reason="stop")
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        part = msgs[0]["parts"][0]
+        assert part.get("id") == "call_99"
+        assert isinstance(part.get("arguments"), dict)
+        assert part["arguments"]["city"] == "NYC"
+
+
+class TestContentItemTypeClassification:
+    """Content items inside output.content must be dispatched by their 'type' field first.
+
+    The hasattr(.text) fallback must NOT shadow typed items like 'reasoning' or 'refusal'
+    that happen to also carry a .text attribute.
+    """
+
+    def test_reasoning_item_with_text_attribute_not_misclassified_as_text(self, span):
+        """A content item with type='reasoning' that also has a .text attribute
+        must produce a 'reasoning' part, not a 'text' part."""
+        from types import SimpleNamespace
+
+        reasoning_item = SimpleNamespace(
+            type="reasoning",
+            text="This shadows the type if hasattr fires first",
+            summary=[SimpleNamespace(text="actual chain-of-thought")],
+        )
+        output = SimpleNamespace(type="message", content=[reasoning_item], role="assistant")
+        response = _make_response(output=[output], finish_reason="stop")
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert len(msgs) == 1
+        part = msgs[0]["parts"][0]
+        assert part["type"] == "reasoning", (
+            f"type='reasoning' item with .text was misclassified as '{part['type']}'"
+        )
+
+    def test_refusal_item_with_text_attribute_not_misclassified_as_text(self, span):
+        """A content item with type='refusal' that also has a .text attribute
+        must produce a 'refusal' part, not a 'text' part."""
+        from types import SimpleNamespace
+
+        refusal_item = SimpleNamespace(
+            type="refusal",
+            text="I cannot do that",
+            refusal="I cannot do that",
+        )
+        output = SimpleNamespace(type="message", content=[refusal_item], role="assistant")
+        response = _make_response(output=[output], finish_reason="stop")
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert len(msgs) == 1
+        part = msgs[0]["parts"][0]
+        assert part["type"] == "refusal", (
+            f"type='refusal' item with .text was misclassified as '{part['type']}'"
+        )
+
+    def test_output_text_item_still_produces_text_part(self, span):
+        """Sanity check: type='output_text' must still produce a 'text' part."""
+        from types import SimpleNamespace
+
+        text_item = SimpleNamespace(type="output_text", text="Hello!")
+        output = SimpleNamespace(type="message", content=[text_item], role="assistant")
+        response = _make_response(output=[output], finish_reason="stop")
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert msgs[0]["parts"][0]["type"] == "text"
+        assert msgs[0]["parts"][0]["content"] == "Hello!"
+
+    def test_unknown_typed_item_without_text_still_handled(self, span):
+        """An item with an unknown type and no .text must fall through to the generic handler."""
+        from types import SimpleNamespace
+
+        unknown_item = SimpleNamespace(type="image_file", file_id="file_abc")
+        output = SimpleNamespace(type="message", content=[unknown_item], role="assistant")
+        response = _make_response(output=[output], finish_reason="stop")
+
+        _extract(span, response)
+
+        msgs = _get_output_messages(span)
+        assert len(msgs) == 1
+        assert msgs[0]["parts"][0]["type"] == "image_file"
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py
index 0c030334eb..7a0fce3ef1 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py
@@ -1,3 +1,4 @@
+import json
 import pytest
 from unittest.mock import MagicMock
 from opentelemetry.instrumentation.openai_agents import (
@@ -49,24 +50,19 @@ def test_dict_content_serialization(exporter):
 
     spans = exporter.get_finished_spans()
 
-    # Look for any spans with prompt/content attributes
+    # Look for any spans with message content attributes
     for span in spans:
         for attr_name, attr_value in span.attributes.items():
-            prompt_content_check = ("prompt" in attr_name and "content" in attr_name) or (
-                "gen_ai.prompt" in attr_name and "content" in attr_name
+            prompt_content_check = (
+                attr_name in ("gen_ai.input.messages", "gen_ai.output.messages")
             )
             if prompt_content_check:
                 # All content attributes should be strings, not dicts
                 error_msg = f"Attribute {attr_name} should be a string, got {type(attr_value)}: {attr_value}"
                 assert isinstance(attr_value, str), error_msg
 
-                # If it looks like JSON, verify it can be parsed
-                if attr_value.startswith("{") and attr_value.endswith("}"):
-                    try:
-                        json.loads(attr_value)
-                    except json.JSONDecodeError:
-                        # If it fails to parse, that's still fine - just not JSON
-                        pass
+                # Message attributes must be valid JSON (arrays of message objects)
+                json.loads(attr_value)
 
     # The test passes if no dict type warnings occurred (all content attributes are strings)
 
@@ -87,52 +83,57 @@ def test_agent_spans(exporter, test_agent):
 
     # Test agent span attributes (should NOT contain prompts/completions/usage/llm_params)
     assert agent_span.name == "testAgent.agent"
-    assert agent_span.kind == agent_span.kind.CLIENT
+    assert agent_span.kind == agent_span.kind.INTERNAL
     assert agent_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value
     assert agent_span.attributes[GenAIAttributes.GEN_AI_AGENT_NAME] == "testAgent"
-    assert agent_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "openai_agents"
+    assert agent_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
     assert agent_span.status.status_code == StatusCode.OK
 
     # Agent span should NOT contain LLM parameters
-    assert SpanAttributes.LLM_REQUEST_TEMPERATURE not in agent_span.attributes
-    assert SpanAttributes.LLM_REQUEST_MAX_TOKENS not in agent_span.attributes
-    assert SpanAttributes.LLM_REQUEST_TOP_P not in agent_span.attributes
-    assert "openai.agent.model.frequency_penalty" not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_TOP_P not in agent_span.attributes
+    assert GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY not in agent_span.attributes
 
     # Find the response span (openai.response) - this should contain prompts/completions/usage
     response_spans = [s for s in spans if s.name == "openai.response"]
     assert len(response_spans) >= 1, f"Expected at least 1 openai.response span, got {len(response_spans)}"
     response_span = response_spans[0]
 
-    # Test response span attributes (should contain prompts/completions/usage)
-
     # Test proper semantic conventions
-    assert response_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "response"
-    assert response_span.attributes["gen_ai.operation.name"] == "response"
-    assert response_span.attributes["gen_ai.system"] == "openai"
+    assert response_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat"
+    assert response_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
 
-    # Test prompts using OpenAI semantic conventions
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user"
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == "What is AI?"
+    # Test input messages (JSON array with parts-based schema)
+    input_messages = json.loads(response_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES])
+    assert input_messages[0]["role"] == "user"
+    assert "parts" in input_messages[0], "Input messages must use parts-based schema"
+    assert input_messages[0]["parts"][0]["type"] == "text"
+    assert input_messages[0]["parts"][0]["content"] == "What is AI?"
 
     # Test usage tokens
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] is not None
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] is not None
-    assert response_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] is not None
+    assert response_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] is not None
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] > 0
     assert response_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] > 0
-    assert response_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] > 0
+    assert response_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] > 0
 
-    # Test completions using OpenAI semantic conventions
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] is not None
-    assert len(response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"]) > 0
-    assert response_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role"] is not None
+    # Test output messages (JSON array with parts-based schema)
+    output_messages = json.loads(response_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES])
+    assert "parts" in output_messages[0], "Output messages must use parts-based schema"
+    assert output_messages[0]["parts"][0]["type"] == "text"
+    assert output_messages[0]["parts"][0]["content"] is not None
+    assert len(output_messages[0]["parts"][0]["content"]) > 0
+    assert output_messages[0]["role"] is not None
+    assert "finish_reason" in output_messages[0], "Output messages must have finish_reason"
 
     # Test model settings are in the response span
     assert response_span.attributes["gen_ai.request.temperature"] == 0.3
     assert response_span.attributes["gen_ai.request.max_tokens"] == 1024
     assert response_span.attributes["gen_ai.request.top_p"] == 0.2
-    assert response_span.attributes["gen_ai.request.model"] is not None
+    assert response_span.attributes.get("gen_ai.request.model") is not None or \
+        response_span.attributes.get("gen_ai.response.model") is not None
 
     # Test proper duration (should be > 0)
     duration_ms = (response_span.end_time - response_span.start_time) / 1_000_000
@@ -444,60 +445,97 @@ async def get_city_info(city_name: str) -> str:
     second_response_span = response_spans[1]
 
     # The tool call and result appear in the SECOND response span as part of conversation history
-    # Find the assistant message with tool call
+    # Parse the input messages JSON array (parts-based schema)
+    input_messages = json.loads(
+        second_response_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]
+    )
+
     tool_call_found = False
     tool_result_found = False
 
-    for i in range(20):  # Check conversation history
-        role_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.role"
-        if role_key not in second_response_span.attributes:
-            continue
-
-        role = second_response_span.attributes[role_key]
+    for msg in input_messages:
+        role = msg.get("role")
+        parts = msg.get("parts", [])
 
         if role == "assistant" and not tool_call_found:
-            # Check if this assistant message has tool_calls
-            tool_call_name_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.name"
-            if tool_call_name_key in second_response_span.attributes:
-                tool_call_found = True
-                # Verify tool call attributes
-                assert second_response_span.attributes[tool_call_name_key] == "get_city_info", (
-                    f"Expected tool name 'get_city_info', got '{second_response_span.attributes[tool_call_name_key]}'"
-                )
-                # Verify tool call ID exists
-                tool_call_id_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.id"
-                assert tool_call_id_key in second_response_span.attributes, (
-                    f"Tool call ID not found at {tool_call_id_key}"
-                )
-                tool_call_id = second_response_span.attributes[tool_call_id_key]
-                assert len(tool_call_id) > 0, "Tool call ID should not be empty"
-
-                # Verify arguments exist and contain city name
-                tool_call_args_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_calls.0.arguments"
-                assert tool_call_args_key in second_response_span.attributes, (
-                    f"Tool call arguments not found at {tool_call_args_key}"
-                )
-                arguments = second_response_span.attributes[tool_call_args_key]
-                assert "London" in arguments or "london" in arguments.lower(), (
-                    f"Expected 'London' in arguments, got: {arguments}"
-                )
+            # Look for tool_call parts
+            for part in parts:
+                if part.get("type") == "tool_call":
+                    tool_call_found = True
+                    assert part["name"] == "get_city_info", (
+                        f"Expected tool name 'get_city_info', got '{part['name']}'"
+                    )
+                    tool_call_id = part.get("id", "")
+                    assert len(tool_call_id) > 0, "Tool call ID should not be empty"
+                    arguments = part.get("arguments", "")
+                    if isinstance(arguments, dict):
+                        arguments = json.dumps(arguments)
+                    assert "London" in arguments or "london" in arguments.lower(), (
+                        f"Expected 'London' in arguments, got: {arguments}"
+                    )
+                    break
 
         elif role == "tool" and not tool_result_found:
-            tool_result_found = True
-            # Verify tool result attributes
-            content_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.content"
-            tool_call_id_key = f"{SpanAttributes.LLM_PROMPTS}.{i}.tool_call_id"
-
-            assert content_key in second_response_span.attributes, f"Tool result content not found at {content_key}"
-            content = second_response_span.attributes[content_key]
-            assert len(content) > 0, "Tool result content should not be empty"
-            assert "London" in content or "9000000" in content or "United Kingdom" in content, (
-                f"Expected tool result to contain city info, got: {content}"
-            )
+            # Look for tool_call_response parts
+            for part in parts:
+                if part.get("type") == "tool_call_response":
+                    tool_result_found = True
+                    response_text = part.get("response", "")
+                    assert len(response_text) > 0, "Tool result response should not be empty"
+                    assert (
+                        "London" in response_text
+                        or "9000000" in response_text
+                        or "United Kingdom" in response_text
+                    ), (
+                        f"Expected tool result to contain city info, got: {response_text}"
+                    )
+                    tool_call_id = part.get("id", "")
+                    assert len(tool_call_id) > 0, "Tool call ID should not be empty"
+                    break
+
+    assert tool_call_found, "No assistant message with tool_call parts found in second response span"
+    assert tool_result_found, "No tool message with tool_call_response parts found in second response span"
 
-            assert tool_call_id_key in second_response_span.attributes, f"Tool call ID not found at {tool_call_id_key}"
-            tool_call_id = second_response_span.attributes[tool_call_id_key]
-            assert len(tool_call_id) > 0, "Tool call ID should not be empty"
 
-    assert tool_call_found, "No assistant message with tool_calls found in second response span"
-    assert tool_result_found, "No tool message found in second response span"
+@pytest.mark.vcr
+def test_tool_span_operation_name(exporter, function_tool_agent):
+    """Test that tool/function spans have gen_ai.operation.name set to 'execute_tool'."""
+    query = "What is the weather in London?"
+
+    Runner.run_sync(function_tool_agent, query)
+
+    spans = exporter.get_finished_spans()
+    tool_spans = [s for s in spans if s.name.endswith(".tool")]
+
+    assert len(tool_spans) >= 1, f"Expected at least 1 tool span, found {len(tool_spans)}"
+
+    for tool_span in tool_spans:
+        assert GenAIAttributes.GEN_AI_OPERATION_NAME in tool_span.attributes, (
+            f"Tool span '{tool_span.name}' missing gen_ai.operation.name attribute"
+        )
+        assert tool_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "execute_tool", (
+            f"Tool span '{tool_span.name}' has incorrect gen_ai.operation.name: "
+            f"{tool_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}, expected 'execute_tool'"
+        )
+
+
+@pytest.mark.vcr
+def test_handoff_span_operation_name(exporter, handoff_agent):
+    """Test that handoff spans have gen_ai.operation.name set to 'handoff'."""
+    query = "Please handle this task by delegating to another agent."
+
+    Runner.run_sync(handoff_agent, query)
+
+    spans = exporter.get_finished_spans()
+    handoff_spans = [s for s in spans if ".handoff" in s.name]
+
+    assert len(handoff_spans) >= 1, f"Expected at least 1 handoff span, found {len(handoff_spans)}"
+
+    for handoff_span in handoff_spans:
+        assert GenAIAttributes.GEN_AI_OPERATION_NAME in handoff_span.attributes, (
+            f"Handoff span '{handoff_span.name}' missing gen_ai.operation.name attribute"
+        )
+        assert handoff_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "handoff", (
+            f"Handoff span '{handoff_span.name}' has incorrect gen_ai.operation.name: "
+            f"{handoff_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}, expected 'handoff'"
+        )
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py
index 02a030a72f..2b27104d3a 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime.py
@@ -11,7 +11,6 @@
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry.trace import StatusCode
-from opentelemetry.semconv_ai import SpanAttributes
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAIAttributes,
 )
@@ -117,8 +116,7 @@ def test_speech_span_start_creates_otel_span(self, tracer_provider_and_exporter)
         assert "openai.realtime.speech" in span_names
 
         speech_span = next(s for s in spans if s.name == "openai.realtime.speech")
-        assert speech_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime"
-        assert speech_span.attributes["gen_ai.system"] == "openai"
+        assert speech_span.attributes["gen_ai.provider.name"] == "openai"
         assert speech_span.attributes["gen_ai.operation.name"] == "speech"
         assert speech_span.status.status_code == StatusCode.OK
 
@@ -213,8 +211,7 @@ def test_transcription_span_start_creates_otel_span(self, tracer_provider_and_ex
         assert "openai.realtime.transcription" in span_names
 
         transcription_span = next(s for s in spans if s.name == "openai.realtime.transcription")
-        assert transcription_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime"
-        assert transcription_span.attributes["gen_ai.system"] == "openai"
+        assert transcription_span.attributes["gen_ai.provider.name"] == "openai"
         assert transcription_span.attributes["gen_ai.operation.name"] == "transcription"
 
     def test_transcription_span_captures_model_and_format(self, tracer_provider_and_exporter):
@@ -306,8 +303,7 @@ def test_speech_group_span_creates_otel_span(self, tracer_provider_and_exporter)
         assert "openai.realtime.speech_group" in span_names
 
         speech_group_span = next(s for s in spans if s.name == "openai.realtime.speech_group")
-        assert speech_group_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "realtime"
-        assert speech_group_span.attributes["gen_ai.system"] == "openai"
+        assert speech_group_span.attributes["gen_ai.provider.name"] == "openai"
         assert speech_group_span.attributes["gen_ai.operation.name"] == "speech_group"
         assert speech_group_span.status.status_code == StatusCode.OK
 
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py
index 43d80525dc..b04b983e9e 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py
@@ -1,5 +1,6 @@
 """Tests for realtime session instrumentation via wrapper patching."""
 
+import json
 import pytest
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
@@ -199,10 +200,14 @@ def test_record_completion_creates_llm_span(self, tracer, tracer_provider):
         assert len(llm_spans) == 1
 
         llm_span = llm_spans[0]
-        assert llm_span.attributes.get("gen_ai.prompt.0.role") == "user"
-        assert llm_span.attributes.get("gen_ai.prompt.0.content") == "What is the weather?"
-        assert llm_span.attributes.get("gen_ai.completion.0.role") == "assistant"
-        assert llm_span.attributes.get("gen_ai.completion.0.content") == "The weather is sunny."
+        input_msgs = json.loads(llm_span.attributes.get("gen_ai.input.messages"))
+        assert input_msgs[0]["role"] == "user"
+        assert input_msgs[0]["parts"][0]["type"] == "text"
+        assert input_msgs[0]["parts"][0]["content"] == "What is the weather?"
+        output_msgs = json.loads(llm_span.attributes.get("gen_ai.output.messages"))
+        assert output_msgs[0]["role"] == "assistant"
+        assert output_msgs[0]["parts"][0]["type"] == "text"
+        assert output_msgs[0]["parts"][0]["content"] == "The weather is sunny."
 
     def test_multiple_llm_spans(self, tracer, tracer_provider):
         """Test that multiple prompt/completion pairs create multiple LLM spans."""
@@ -229,12 +234,16 @@ def test_multiple_llm_spans(self, tracer, tracer_provider):
         assert len(llm_spans) == 2
 
         # First span should have "Hello" and "Hi there!"
-        assert llm_spans[0].attributes.get("gen_ai.prompt.0.content") == "Hello"
-        assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Hi there!"
+        first_in = json.loads(llm_spans[0].attributes.get("gen_ai.input.messages"))
+        first_out = json.loads(llm_spans[0].attributes.get("gen_ai.output.messages"))
+        assert first_in[0]["parts"][0]["content"] == "Hello"
+        assert first_out[0]["parts"][0]["content"] == "Hi there!"
 
         # Second span should have "What is the weather?" and "It's sunny."
-        assert llm_spans[1].attributes.get("gen_ai.prompt.0.content") == "What is the weather?"
-        assert llm_spans[1].attributes.get("gen_ai.completion.0.content") == "It's sunny."
+        second_in = json.loads(llm_spans[1].attributes.get("gen_ai.input.messages"))
+        second_out = json.loads(llm_spans[1].attributes.get("gen_ai.output.messages"))
+        assert second_in[0]["parts"][0]["content"] == "What is the weather?"
+        assert second_out[0]["parts"][0]["content"] == "It's sunny."
 
     def test_cleanup_ends_all_spans(self, tracer, tracer_provider):
         """Test that cleanup ends all remaining spans."""
@@ -331,6 +340,58 @@ def test_duplicate_completion_ignored(self, tracer, tracer_provider):
         llm_spans = [s for s in spans if s.name == "openai.realtime"]
         assert len(llm_spans) == 1
 
+    def test_agent_span_has_invoke_agent_operation_name(self, tracer, tracer_provider):
+        """Agent spans must set gen_ai.operation.name='invoke_agent' per OTel spec."""
+        _, exporter = tracer_provider
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("Test Agent")
+        state.start_agent_span("Voice Assistant")
+
+        span = state.agent_spans["Voice Assistant"]
+        span.end()
+
+        finished = exporter.get_finished_spans()
+        agent = next(s for s in finished if s.name == "Voice Assistant.agent")
+        assert agent.attributes.get("gen_ai.operation.name") == "invoke_agent"
+
+        state.cleanup()
+
+    def test_tool_result_structured_output_serialized_as_json(self, tracer, tracer_provider):
+        """Structured tool output must be JSON, not Python repr (str())."""
+        import json as json_mod
+
+        _, exporter = tracer_provider
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("Agent")
+        state.start_agent_span("Agent")
+        state.start_tool_span("my_tool", "Agent")
+        state.end_tool_span("my_tool", output={"key": "value", "num": 42})
+
+        spans = exporter.get_finished_spans()
+        tool_span = next(s for s in spans if s.name == "my_tool.tool")
+        result = tool_span.attributes.get("gen_ai.tool.call.result")
+        if result is not None:
+            assert "'" not in result, f"Python repr detected: {result}"
+            parsed = json_mod.loads(result)
+            assert parsed == {"key": "value", "num": 42}
+
+        state.cleanup()
+
+    def test_seen_completions_bounded_at_1000(self, tracer, tracer_provider):
+        """seen_completions must not grow without bound in long sessions."""
+        _, exporter = tracer_provider
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("Agent")
+        state.start_agent_span("Agent")
+        state.record_prompt("user", "hello")
+
+        for i in range(2000):
+            state.record_completion("assistant", f"unique response {i}")
+
+        assert len(state.seen_completions) <= 1000
+
+        state.cleanup()
+
 
 class TestRealtimeSessionWrapping:
     """Tests for the session wrapping functionality."""
@@ -584,7 +645,8 @@ def __init__(self, role, content):
         spans = exporter.get_finished_spans()
         llm_spans = [s for s in spans if s.name == "openai.realtime"]
         assert len(llm_spans) == 1
-        assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Hi there!"
+        out_msgs = json.loads(llm_spans[0].attributes.get("gen_ai.output.messages"))
+        assert out_msgs[0]["parts"][0]["content"] == "Hi there!"
 
     def test_response_done_dict_captures_usage_and_completion(self, tracer, tracer_provider):
         """Test that response.done with dict data captures usage and completions."""
@@ -646,7 +708,8 @@ def test_response_done_dict_captures_usage_and_completion(self, tracer, tracer_p
         llm_span = llm_spans[0]
         assert llm_span.attributes.get("gen_ai.usage.input_tokens") == 42
         assert llm_span.attributes.get("gen_ai.usage.output_tokens") == 18
-        assert llm_span.attributes.get("gen_ai.completion.0.content") == "It is sunny today."
+        out_msgs = json.loads(llm_span.attributes.get("gen_ai.output.messages"))
+        assert out_msgs[0]["parts"][0]["content"] == "It is sunny today."
 
     def test_response_done_without_usage_still_captures_completion(self, tracer, tracer_provider):
         """Test that completions are captured even when usage is absent from response.done."""
@@ -694,5 +757,6 @@ def test_response_done_without_usage_still_captures_completion(self, tracer, tra
         spans = exporter.get_finished_spans()
         llm_spans = [s for s in spans if s.name == "openai.realtime"]
         assert len(llm_spans) == 1
-        assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Why did the chicken cross the road?"
+        output = json.loads(llm_spans[0].attributes.get("gen_ai.output.messages"))
+        assert output[0]["parts"][0]["content"] == "Why did the chicken cross the road?"
         assert llm_spans[0].attributes.get("gen_ai.usage.input_tokens") is None
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py
index f80a908e36..a1d7f15a4f 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_recipe_agents_hierarchy.py
@@ -272,30 +272,30 @@ async def test_recipe_agents_hierarchy(exporter, recipe_agents):
 
     # Verify each response span has prompts, completions, and usage
     for i, response_span in enumerate(response_spans):
-        # Check for prompts
-        has_prompt = any(key.startswith("gen_ai.prompt.") for key in response_span.attributes.keys())
+        # Check for input messages (new JSON array format)
+        has_prompt = "gen_ai.input.messages" in response_span.attributes
         assert has_prompt, (
             f"Response span {i} should have prompt attributes, attributes: {dict(response_span.attributes)}"
         )
 
-        # Check for completions
-        has_completion = any(key.startswith("gen_ai.completion.") for key in response_span.attributes.keys())
+        # Check for output messages (new JSON array format)
+        has_completion = "gen_ai.output.messages" in response_span.attributes
         assert has_completion, (
             f"Response span {i} should have completion attributes, attributes: {dict(response_span.attributes)}"
         )
 
         # Check for usage
         has_usage = any(
-            key.startswith("gen_ai.usage.") or key.startswith("llm.usage.") for key in response_span.attributes.keys()
+            key.startswith("gen_ai.usage.") for key in response_span.attributes.keys()
         )
         assert has_usage, (
             f"Response span {i} should have usage attributes, attributes: {dict(response_span.attributes)}"
         )
 
         # Check specific expected attributes
-        assert "gen_ai.system" in response_span.attributes, f"Response span {i} should have gen_ai.system"
-        assert response_span.attributes["gen_ai.system"] == "openai", (
-            f"Response span {i} gen_ai.system should be 'openai'"
+        assert "gen_ai.provider.name" in response_span.attributes, f"Response span {i} should have gen_ai.provider.name"
+        assert response_span.attributes["gen_ai.provider.name"] == "openai", (
+            f"Response span {i} gen_ai.provider.name should be 'openai'"
         )
 
         pass  # Validation passed
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_compliance.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_compliance.py
new file mode 100644
index 0000000000..35a01e3380
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_compliance.py
@@ -0,0 +1,8 @@
+# ruff: noqa: F401, F403
+"""
+Semconv compliance tests re-used from opentelemetry-semantic-conventions-ai.
+
+Ensures the installed semconv package has the expected constant values.
+To add more compliance checks, update _testing.py in that package — not here.
+"""
+from opentelemetry.semconv_ai._testing import *
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_messages.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_messages.py
new file mode 100644
index 0000000000..35fb7dd5c4
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_semconv_messages.py
@@ -0,0 +1,3635 @@
+"""
+Comprehensive OTel GenAI semconv compliance tests for openai-agents instrumentation.
+
+Tests validate that all message formatting, attribute names, and values conform to
+the OTel GenAI semantic conventions (parts-based schema, v1.40.0+).
+
+Reference schemas: semconv-schemas/gen-ai-input-messages.json, gen-ai-output-messages.json
+"""
+
+import json
+import pytest
+from unittest.mock import MagicMock, patch
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.semconv._incubating.attributes import (
+    gen_ai_attributes as GenAIAttributes,
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def tracer_and_exporter():
+    """Create a tracer provider with in-memory exporter for unit tests."""
+    exporter = InMemorySpanExporter()
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(exporter))
+    return provider.get_tracer("test"), exporter
+
+
+@pytest.fixture
+def processor(tracer_and_exporter):
+    """Create an OpenTelemetryTracingProcessor with a fresh tracer."""
+    from opentelemetry.instrumentation.openai_agents._hooks import (
+        OpenTelemetryTracingProcessor,
+    )
+
+    tracer, _ = tracer_and_exporter
+    return OpenTelemetryTracingProcessor(tracer)
+
+
+# ---------------------------------------------------------------------------
+# Helper: mock span data objects
+# ---------------------------------------------------------------------------
+
+class MockAgentSpan:
+    def __init__(self, span_data, trace_id="test-trace", error=None):
+        self.span_data = span_data
+        self.trace_id = trace_id
+        self.error = error
+
+
+class MockGenerationSpanData:
+    """Mock for agents.GenerationSpanData."""
+
+    def __init__(self, input=None, response=None):
+        self.input = input or []
+        self.response = response
+
+
+class ResponseSpanData:
+    """Lightweight stub whose __name__ is 'ResponseSpanData' (no MagicMock mutation)."""
+
+    def __init__(self, input=None, response=None):
+        self.input = input or []
+        self.response = response
+
+
+class MockResponseOutput:
+    """Mock for a response output item with text content."""
+
+    def __init__(self, role="assistant", content=None, text=None, name=None,
+                 call_id=None, arguments=None, type=None):
+        self.role = role
+        self.content = content
+        self.text = text
+        self.name = name
+        self.call_id = call_id
+        self.arguments = arguments
+        if type is None and content is not None:
+            self.type = "message"
+        elif type is None and call_id is not None:
+            self.type = "function_call"
+        else:
+            self.type = type
+
+
+class MockContentItem:
+    """Mock for a content item inside ResponseOutputMessage."""
+
+    def __init__(self, text=None):
+        self.text = text
+
+
+class MockUsage:
+    def __init__(self, input_tokens=10, output_tokens=20, total_tokens=30):
+        self.input_tokens = input_tokens
+        self.output_tokens = output_tokens
+        self.total_tokens = total_tokens
+        self.prompt_tokens = None
+        self.completion_tokens = None
+
+
+class MockResponse:
+    """Mock for the response object from GenerationSpanData."""
+
+    def __init__(self, output=None, model=None, temperature=None,
+                 max_output_tokens=None, top_p=None, frequency_penalty=None,
+                 usage=None, finish_reason=None, id=None, tools=None):
+        self.output = output or []
+        self.model = model
+        self.temperature = temperature
+        self.max_output_tokens = max_output_tokens
+        self.top_p = top_p
+        self.frequency_penalty = frequency_penalty
+        self.usage = usage
+        self.finish_reason = finish_reason
+        self.id = id
+        self.tools = tools or []
+
+
+class MockFunction:
+    """Mock for a tool function definition."""
+
+    def __init__(self, name="", description="", parameters=None):
+        self.name = name
+        self.description = description
+        self.parameters = parameters
+
+
+class MockTool:
+    """Mock for a tool definition with function wrapper."""
+
+    def __init__(self, function=None, type="function"):
+        self.function = function
+        self.type = type
+
+
+# ---------------------------------------------------------------------------
+# P1-1: gen_ai.provider.name replaces gen_ai.system
+# ---------------------------------------------------------------------------
+
+class TestProviderName:
+    """Verify gen_ai.provider.name is used instead of deprecated gen_ai.system."""
+
+    def test_generation_span_uses_provider_name(self, tracer_and_exporter):
+        """GenerationSpanData spans must use gen_ai.provider.name, not gen_ai.system."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import GenerationSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-pn-1"
+        proc.on_trace_start(mock_trace)
+
+        gen_data = GenerationSpanData(model="gpt-4o", model_config={})
+        span = MockAgentSpan(gen_data, trace_id="test-pn-1")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        response_span = next((s for s in spans if s.name == "openai.response"), None)
+        assert response_span is not None, "Expected openai.response span"
+
+        attrs = dict(response_span.attributes)
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs, (
+            f"Expected gen_ai.provider.name attribute, got keys: {list(attrs.keys())}"
+        )
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+    def test_agent_span_uses_provider_name_openai(self, tracer_and_exporter):
+        """Agent spans must use gen_ai.provider.name = 'openai', NOT 'openai_agents'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import AgentSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-pn-2"
+        proc.on_trace_start(mock_trace)
+
+        agent_data = AgentSpanData(name="TestAgent", handoffs=[], tools=[], output_type="")
+        span = MockAgentSpan(agent_data, trace_id="test-pn-2")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        agent_span = next((s for s in spans if s.name == "TestAgent.agent"), None)
+        assert agent_span is not None, "Expected TestAgent.agent span"
+
+        attrs = dict(agent_span.attributes)
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai", (
+            f"Agent span provider name should be 'openai', got '{attrs.get(GenAIAttributes.GEN_AI_PROVIDER_NAME)}'"
+        )
+
+    def test_workflow_span_uses_provider_name(self, tracer_and_exporter):
+        """Workflow spans must use gen_ai.provider.name."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-pn-3"
+        proc.on_trace_start(mock_trace)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        wf_span = next((s for s in spans if s.name == "Agent Workflow"), None)
+        assert wf_span is not None
+
+        attrs = dict(wf_span.attributes)
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+    def test_tool_span_uses_provider_name(self, tracer_and_exporter):
+        """Tool spans must use gen_ai.provider.name."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import FunctionSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-pn-4"
+        proc.on_trace_start(mock_trace)
+
+        func_data = FunctionSpanData(name="get_weather", input="", output="")
+        span = MockAgentSpan(func_data, trace_id="test-pn-4")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        tool_span = next((s for s in spans if s.name == "get_weather.tool"), None)
+        assert tool_span is not None
+
+        attrs = dict(tool_span.attributes)
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+    def test_handoff_span_uses_provider_name(self, tracer_and_exporter):
+        """Handoff spans must use gen_ai.provider.name."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import HandoffSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-pn-5"
+        proc.on_trace_start(mock_trace)
+
+        handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB")
+        span = MockAgentSpan(handoff_data, trace_id="test-pn-5")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        handoff_span = next((s for s in spans if "handoff" in s.name), None)
+        assert handoff_span is not None
+
+        attrs = dict(handoff_span.attributes)
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+
+# ---------------------------------------------------------------------------
+# P1-2 / P1-3: Input & Output messages use parts-based schema
+# ---------------------------------------------------------------------------
+
+class TestInputMessagePartsFormat:
+    """Verify gen_ai.input.messages uses {role, parts} schema."""
+
+    def test_text_message_has_parts(self, tracer_and_exporter):
+        """Simple text message must have parts: [{type: 'text', content: '...'}]."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, exporter = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"role": "user", "content": "Hello world"}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None, "gen_ai.input.messages should be set"
+        messages = json.loads(raw)
+
+        assert len(messages) == 1
+        msg = messages[0]
+        assert msg["role"] == "user"
+        assert "parts" in msg, f"Message must have 'parts' key, got keys: {list(msg.keys())}"
+        assert "content" not in msg, "Top-level 'content' key should NOT be present (use parts instead)"
+
+        parts = msg["parts"]
+        assert len(parts) == 1
+        assert parts[0]["type"] == "text"
+        assert parts[0]["content"] == "Hello world"
+
+        span.end()
+
+    def test_tool_call_message_has_parts(self, tracer_and_exporter):
+        """Assistant tool call message must use parts with type 'tool_call'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "tool_calls": [{
+                "id": "call_123",
+                "function": {
+                    "name": "get_weather",
+                    "arguments": '{"city": "NYC"}'
+                }
+            }]
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        msg = messages[0]
+
+        assert msg["role"] == "assistant"
+        assert "parts" in msg
+        assert "tool_calls" not in msg, "Top-level 'tool_calls' must NOT be present (use parts)"
+
+        tool_part = msg["parts"][0]
+        assert tool_part["type"] == "tool_call"
+        assert tool_part["id"] == "call_123"
+        assert tool_part["name"] == "get_weather"
+        # Arguments must be parsed object, not string
+        assert isinstance(tool_part["arguments"], dict), (
+            f"arguments must be dict (parsed object), got {type(tool_part['arguments'])}"
+        )
+        assert tool_part["arguments"] == {"city": "NYC"}
+
+        span.end()
+
+    def test_tool_result_message_has_parts(self, tracer_and_exporter):
+        """Tool result message must use parts with type 'tool_call_response'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "tool",
+            "tool_call_id": "call_123",
+            "content": "72°F, sunny"
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        msg = messages[0]
+
+        assert msg["role"] == "tool"
+        assert "parts" in msg
+        assert "content" not in msg, "Top-level 'content' must NOT be present for tool messages"
+        assert "tool_call_id" not in msg, "Top-level 'tool_call_id' must NOT be present"
+
+        tool_part = msg["parts"][0]
+        assert tool_part["type"] == "tool_call_response"
+        assert tool_part["id"] == "call_123"
+        assert tool_part["response"] == "72°F, sunny"
+
+        span.end()
+
+    def test_agents_sdk_function_call_format(self, tracer_and_exporter):
+        """Agents SDK function_call type messages must convert to tool_call parts."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        # Agents SDK format uses dict with 'type' key, no 'role'
+        input_data = [{
+            "type": "function_call",
+            "id": "fc_1",
+            "name": "search",
+            "arguments": '{"q": "test"}',
+        }]
+
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        msg = messages[0]
+
+        assert msg["role"] == "assistant"
+        assert "parts" in msg
+        tool_part = msg["parts"][0]
+        assert tool_part["type"] == "tool_call"
+        assert tool_part["name"] == "search"
+        assert isinstance(tool_part["arguments"], dict)
+        assert tool_part["arguments"]["q"] == "test"
+
+        span.end()
+
+    def test_agents_sdk_function_call_output_format(self, tracer_and_exporter):
+        """Agents SDK function_call_output type must convert to tool_call_response parts."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        # Agents SDK format uses dict with 'type' key, no 'role'
+        input_data = [{
+            "type": "function_call_output",
+            "call_id": "fc_1",
+            "output": "Result data",
+        }]
+
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        msg = messages[0]
+
+        assert msg["role"] == "tool"
+        assert "parts" in msg
+        tool_resp_part = msg["parts"][0]
+        assert tool_resp_part["type"] == "tool_call_response"
+        assert tool_resp_part["id"] == "fc_1"
+        assert tool_resp_part["response"] == "Result data"
+
+        span.end()
+
+    def test_list_content_with_tool_calls_preserves_structure(self, tracer_and_exporter):
+        """List content + tool_calls must preserve structured parts, not stringify."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me check"},
+                {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
+            ],
+            "tool_calls": [{
+                "id": "call_1",
+                "function": {
+                    "name": "get_weather",
+                    "arguments": '{"city": "NYC"}'
+                }
+            }]
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+        msg = messages[0]
+
+        assert msg["role"] == "assistant"
+        assert "parts" in msg
+
+        parts = msg["parts"]
+        # Expect: text part, uri part (image), tool_call part
+        assert len(parts) == 3, f"Expected 3 parts (text + image + tool_call), got {len(parts)}: {parts}"
+
+        text_part = parts[0]
+        assert text_part["type"] == "text"
+        assert text_part["content"] == "Let me check"
+
+        image_part = parts[1]
+        assert image_part["type"] == "uri", (
+            f"image_url must map to 'uri' part, got type '{image_part['type']}'"
+        )
+        assert image_part["modality"] == "image"
+        assert image_part["uri"] == "https://example.com/img.png"
+
+        tool_part = parts[2]
+        assert tool_part["type"] == "tool_call"
+        assert tool_part["name"] == "get_weather"
+
+        span.end()
+
+    def test_string_content_with_tool_calls(self, tracer_and_exporter):
+        """String content + tool_calls should produce text part + tool_call part."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "content": "Let me look that up",
+            "tool_calls": [{
+                "id": "call_2",
+                "function": {
+                    "name": "search",
+                    "arguments": '{"q": "test"}'
+                }
+            }]
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+        msg = messages[0]
+
+        parts = msg["parts"]
+        assert len(parts) == 2, f"Expected 2 parts (text + tool_call), got {len(parts)}"
+        assert parts[0]["type"] == "text"
+        assert parts[0]["content"] == "Let me look that up"
+        assert parts[1]["type"] == "tool_call"
+
+        span.end()
+
+    def test_none_content_message(self, tracer_and_exporter):
+        """Messages with None content should still produce valid parts."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"role": "assistant", "content": None}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        if raw:
+            messages = json.loads(raw)
+            if messages:
+                msg = messages[0]
+                assert "parts" in msg
+
+        span.end()
+
+    def test_empty_input_data(self, tracer_and_exporter):
+        """Empty input data should not set the attribute."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        _extract_prompt_attributes(span, [], trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is None, "Empty input should not set gen_ai.input.messages"
+
+        span.end()
+
+
+class TestOutputMessagePartsFormat:
+    """Verify gen_ai.output.messages uses {role, parts, finish_reason} schema."""
+
+    def test_text_output_has_parts(self, tracer_and_exporter):
+        """Text output must be wrapped in parts: [{type: 'text', content: '...'}]."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = MockContentItem(text="Hello!")
+        output_item = MockResponseOutput(role="assistant", content=[content_item])
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason="stop",
+            id="resp_123",
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+
+        msg = messages[0]
+        assert msg["role"] == "assistant"
+        assert "parts" in msg, f"Output message must have 'parts', got keys: {list(msg.keys())}"
+        assert "content" not in msg, "Top-level 'content' must NOT be present"
+
+        parts = msg["parts"]
+        assert len(parts) >= 1
+        assert parts[0]["type"] == "text"
+        assert parts[0]["content"] == "Hello!"
+
+        span.end()
+
+    def test_tool_call_output_has_parts(self, tracer_and_exporter):
+        """Tool call output must use parts with type 'tool_call'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        output_item = MockResponseOutput(
+            name="get_weather", call_id="call_456", arguments='{"city": "London"}'
+        )
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+
+        msg = messages[0]
+        assert msg["role"] == "assistant"
+        assert "parts" in msg
+        assert "tool_calls" not in msg, "Top-level 'tool_calls' must NOT be present"
+
+        tool_part = msg["parts"][0]
+        assert tool_part["type"] == "tool_call"
+        assert tool_part["name"] == "get_weather"
+        assert tool_part["id"] == "call_456"
+
+        span.end()
+
+    def test_output_finish_reason_present(self, tracer_and_exporter):
+        """Output messages must have finish_reason at message level."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = MockContentItem(text="Done")
+        output_item = MockResponseOutput(role="assistant", content=[content_item])
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason="stop",
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+
+        msg = messages[0]
+        assert "finish_reason" in msg, "finish_reason is required per schema"
+        assert msg["finish_reason"] == "stop"
+
+        span.end()
+
+    def test_output_finish_reason_empty_when_unknown(self, tracer_and_exporter):
+        """finish_reason must be '' (not fabricated 'stop') when unknown."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = MockContentItem(text="Done")
+        output_item = MockResponseOutput(role="assistant", content=[content_item])
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason=None,
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+
+        msg = messages[0]
+        assert "finish_reason" in msg, "finish_reason must always be present (required by schema)"
+        # When finish_reason is unknown, it should be empty string, NOT fabricated "stop"
+        assert msg["finish_reason"] == "", (
+            f"finish_reason should be '' when unknown, got '{msg['finish_reason']}'"
+        )
+
+        span.end()
+
+    def test_message_with_empty_content_and_name_not_tool_call(self, tracer_and_exporter):
+        """ResponseOutputMessage with empty content + participant name must not become a tool call.
+
+        Semconv: ToolCallRequestPart.name MUST identify a tool, not a participant.
+        """
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+        from types import SimpleNamespace
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        output = SimpleNamespace(
+            type="message", content=[], name="CustomerServiceBot", role="assistant",
+        )
+        response = SimpleNamespace(
+            temperature=None, max_output_tokens=None, top_p=None,
+            model=None, id=None, frequency_penalty=None,
+            finish_reason=None, status="completed",
+            output=[output], usage=None,
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        if raw:
+            messages = json.loads(raw)
+            for msg in messages:
+                for part in msg.get("parts", []):
+                    assert part.get("type") != "tool_call", (
+                        "Participant name was misclassified as tool call"
+                    )
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# P1-4: Arguments parsed as objects
+# ---------------------------------------------------------------------------
+
+class TestArgumentsParsing:
+    """Verify tool call arguments are parsed to objects, not kept as strings."""
+
+    def test_string_arguments_parsed_to_dict(self, tracer_and_exporter):
+        """JSON string arguments must be parsed to dict."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "tool_calls": [{
+                "id": "call_1",
+                "function": {
+                    "name": "search",
+                    "arguments": '{"query": "weather", "limit": 5}'
+                }
+            }]
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        tool_part = messages[0]["parts"][0]
+
+        assert isinstance(tool_part["arguments"], dict), (
+            f"Arguments should be parsed to dict, got {type(tool_part['arguments'])}"
+        )
+        assert tool_part["arguments"]["query"] == "weather"
+        assert tool_part["arguments"]["limit"] == 5
+
+        span.end()
+
+    def test_dict_arguments_kept_as_dict(self, tracer_and_exporter):
+        """Dict arguments should stay as dict."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "tool_calls": [{
+                "id": "call_1",
+                "function": {
+                    "name": "search",
+                    "arguments": {"query": "test"}
+                }
+            }]
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        tool_part = messages[0]["parts"][0]
+
+        assert isinstance(tool_part["arguments"], dict)
+        assert tool_part["arguments"]["query"] == "test"
+
+        span.end()
+
+    def test_invalid_json_arguments_fallback(self, tracer_and_exporter):
+        """Invalid JSON string arguments should have best-effort fallback."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "tool_calls": [{
+                "id": "call_1",
+                "function": {
+                    "name": "search",
+                    "arguments": "not valid json {"
+                }
+            }]
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        tool_part = messages[0]["parts"][0]
+
+        # Should not crash, arguments should be present in some form
+        assert "arguments" in tool_part
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# P1-5 / P1-6: Finish reasons
+# ---------------------------------------------------------------------------
+
+class TestFinishReasons:
+    """Verify finish reason mapping and top-level attribute."""
+
+    def test_finish_reasons_top_level_attribute(self, tracer_and_exporter):
+        """gen_ai.response.finish_reasons must be set as top-level span array."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = MockContentItem(text="Done")
+        output_item = MockResponseOutput(role="assistant", content=[content_item])
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason="stop",
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None, (
+            "gen_ai.response.finish_reasons must be set as top-level span attribute"
+        )
+        assert isinstance(finish_reasons, (list, tuple))
+        assert "stop" in finish_reasons
+
+        span.end()
+
+    def test_finish_reasons_tool_calls_mapped_to_singular(self, tracer_and_exporter):
+        """OpenAI 'tool_calls' (plural) must map to 'tool_call' (singular)."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        output_item = MockResponseOutput(name="search", call_id="c1", arguments="{}")
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason="tool_calls",
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None
+
+        # Must be singular "tool_call", not plural "tool_calls"
+        assert "tool_call" in finish_reasons, (
+            f"Expected 'tool_call' (singular), got {finish_reasons}"
+        )
+        assert "tool_calls" not in finish_reasons
+
+        span.end()
+
+    def test_finish_reasons_none_omits_attribute(self, tracer_and_exporter):
+        """When finish_reason is None, top-level attr should be omitted (not fabricated)."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = MockContentItem(text="Done")
+        output_item = MockResponseOutput(role="assistant", content=[content_item])
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason=None,
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        # When None, attribute should be omitted entirely
+        if finish_reasons is not None:
+            assert "stop" not in finish_reasons, "Must NOT fabricate 'stop' when finish_reason is None"
+
+        span.end()
+
+    def test_finish_reasons_set_without_prompts(self, tracer_and_exporter):
+        """finish_reasons must be set even when should_send_prompts() is False."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = MockContentItem(text="Done")
+        output_item = MockResponseOutput(role="assistant", content=[content_item])
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason="stop",
+        )
+
+        # trace_content=False simulates should_send_prompts() returning False
+        _extract_response_attributes(span, response, trace_content=False)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None, (
+            "gen_ai.response.finish_reasons must be set even when content tracing is disabled"
+        )
+
+        span.end()
+
+    def test_tool_call_top_level_matches_per_message(self, tracer_and_exporter):
+        """Top-level finish_reasons must say 'tool_call' when output contains tool calls.
+
+        Semconv: gen_ai.response.finish_reasons corresponds to each generation.
+        If the model stopped to emit a tool call, both levels must agree.
+        """
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+        from types import SimpleNamespace
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        tool_output = SimpleNamespace(
+            type="function_call",
+            content=None,
+            name="get_weather",
+            arguments='{"city": "London"}',
+            call_id="call_123",
+        )
+        response = SimpleNamespace(
+            temperature=None, max_output_tokens=None, top_p=None,
+            model=None, id=None, frequency_penalty=None,
+            finish_reason=None, status="completed",
+            output=[tool_output], usage=None,
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None, "Missing gen_ai.response.finish_reasons"
+        assert "tool_call" in finish_reasons, (
+            f"Expected 'tool_call' in finish_reasons, got {finish_reasons}"
+        )
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# P1-7: Operation name
+# ---------------------------------------------------------------------------
+
+class TestOperationName:
+    """Verify gen_ai.operation.name uses well-known OTel values."""
+
+    def test_generation_span_operation_name_is_chat(self, tracer_and_exporter):
+        """GenerationSpanData must use operation name 'chat'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import GenerationSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-op-1"
+        proc.on_trace_start(mock_trace)
+
+        gen_data = GenerationSpanData(model="gpt-4o", model_config={})
+        span = MockAgentSpan(gen_data, trace_id="test-op-1")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        resp_span = next((s for s in spans if s.name == "openai.response"), None)
+        assert resp_span is not None
+
+        assert resp_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", (
+            f"Expected 'chat', got '{resp_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]}'"
+        )
+
+    def test_response_span_data_operation_name_is_chat(self, tracer_and_exporter):
+        """ResponseSpanData (Responses API) must use 'chat', same as GenerationSpanData.
+
+        'generate_content' is the GCP/Gemini well-known value and must not be used for
+        OpenAI's Responses API, which is a chat completion surface.
+        """
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-op-2"
+        proc.on_trace_start(mock_trace)
+
+        response_data = ResponseSpanData(input=[], response=None)
+
+        span = MockAgentSpan(response_data, trace_id="test-op-2")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        resp_span = next((s for s in spans if s.name == "openai.response"), None)
+        assert resp_span is not None
+
+        op_name = resp_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        assert op_name == "chat", (
+            f"ResponseSpanData must emit 'chat', got '{op_name}'"
+        )
+
+
+# ---------------------------------------------------------------------------
+# P2-1 / P2-2: Response model and ID
+# ---------------------------------------------------------------------------
+
+class TestResponseAttributes:
+    """Verify recommended response attributes are set."""
+
+    def test_response_model_set(self, tracer_and_exporter):
+        """gen_ai.response.model should be set from response."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            model="gpt-4o-2024-08-06",
+            usage=MockUsage(),
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in span.attributes, (
+            "gen_ai.response.model should be set"
+        )
+        assert span.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] == "gpt-4o-2024-08-06"
+
+        span.end()
+
+    def test_response_id_set(self, tracer_and_exporter):
+        """gen_ai.response.id should be set from response."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            model="gpt-4o",
+            usage=MockUsage(),
+            id="resp_abc123",
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        assert GenAIAttributes.GEN_AI_RESPONSE_ID in span.attributes, (
+            "gen_ai.response.id should be set"
+        )
+        assert span.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] == "resp_abc123"
+
+        span.end()
+
+    def test_frequency_penalty_set_on_span(self, tracer_and_exporter):
+        """gen_ai.request.frequency_penalty should be set as span attribute."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            model="gpt-4o",
+            frequency_penalty=0.5,
+            usage=MockUsage(),
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        assert GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY in span.attributes, (
+            "gen_ai.request.frequency_penalty should be set on span"
+        )
+        assert span.attributes[GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+
+        span.end()
+
+    def test_response_model_does_not_overwrite_request_model(self, tracer_and_exporter):
+        """response.model must only set gen_ai.response.model, not gen_ai.request.model.
+
+        Semconv: gen_ai.request.model (alias, e.g. 'gpt-4o') and
+        gen_ai.response.model (served, e.g. 'gpt-4o-2024-08-06') are distinct.
+        """
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+        from types import SimpleNamespace
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+        span.set_attribute(GenAIAttributes.GEN_AI_REQUEST_MODEL, "gpt-4o")
+
+        response = SimpleNamespace(
+            temperature=None, max_output_tokens=None, top_p=None,
+            model="gpt-4o-2024-08-06", id=None, frequency_penalty=None,
+            finish_reason=None, status="completed", output=[], usage=None,
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        assert span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "gpt-4o", (
+            "response.model must not overwrite gen_ai.request.model"
+        )
+        assert span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "gpt-4o-2024-08-06"
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# P2-7: Tool definitions preserve full format
+# ---------------------------------------------------------------------------
+
+class TestToolDefinitions:
+    """Verify tool definitions preserve the source system's representation."""
+
+    def test_tool_definitions_preserve_type_wrapper(self, tracer_and_exporter):
+        """Tool definitions should preserve the 'type: function' wrapper."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import GenerationSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-td-1"
+        proc.on_trace_start(mock_trace)
+
+        gen_data = GenerationSpanData(model="gpt-4o", model_config={})
+        gen_data.input = []
+
+        # Create response with tools
+        func = MockFunction(name="search", description="Search for data", parameters={"type": "object"})
+        tool = MockTool(function=func, type="function")
+        gen_data.response = MockResponse(
+            model="gpt-4o",
+            tools=[tool],
+            usage=MockUsage(),
+        )
+
+        span = MockAgentSpan(gen_data, trace_id="test-td-1")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        resp_span = next((s for s in spans if s.name == "openai.response"), None)
+        assert resp_span is not None
+
+        raw_defs = resp_span.attributes.get(GenAIAttributes.GEN_AI_TOOL_DEFINITIONS)
+        assert raw_defs is not None, "gen_ai.tool.definitions must be set when tools are present"
+        defs = json.loads(raw_defs)
+        assert len(defs) >= 1
+        tool_def = defs[0]
+        # Per spec: preserve source system's representation
+        assert "type" in tool_def, "Tool definition should preserve 'type' field"
+        assert tool_def["type"] == "function"
+        assert "function" in tool_def, "Tool definition should preserve 'function' wrapper"
+
+
+# ---------------------------------------------------------------------------
+# P2-5: Realtime messages parts format
+# ---------------------------------------------------------------------------
+
+class TestRealtimeMessageFormat:
+    """Verify realtime LLM span messages use parts-based format."""
+
+    def test_realtime_llm_span_input_uses_parts(self):
+        """Realtime input messages must use parts-based format."""
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        exporter = InMemorySpanExporter()
+        provider = TracerProvider()
+        provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracer = provider.get_tracer("test")
+
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("TestAgent")
+        state.start_agent_span("TestAgent")
+
+        state.record_prompt("user", "What is the weather?")
+
+        with patch(
+            "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts",
+            return_value=True,
+        ):
+            state.create_llm_span("It's sunny!")
+
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        llm_span = next((s for s in spans if s.name == "openai.realtime"), None)
+        assert llm_span is not None
+
+        raw_input = llm_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw_input is not None, "gen_ai.input.messages must be set on realtime LLM span"
+        messages = json.loads(raw_input)
+        msg = messages[0]
+        assert "parts" in msg, f"Realtime input must use parts format, got keys: {list(msg.keys())}"
+
+    def test_realtime_llm_span_output_uses_parts(self):
+        """Realtime output messages must use parts-based format."""
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        exporter = InMemorySpanExporter()
+        provider = TracerProvider()
+        provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracer = provider.get_tracer("test")
+
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("TestAgent")
+        state.start_agent_span("TestAgent")
+
+        state.record_prompt("user", "Hello")
+
+        with patch(
+            "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts",
+            return_value=True,
+        ):
+            state.create_llm_span("Hi there!")
+
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        llm_span = next((s for s in spans if s.name == "openai.realtime"), None)
+        assert llm_span is not None
+
+        raw_output = llm_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        assert raw_output is not None, "gen_ai.output.messages must be set on realtime LLM span"
+        messages = json.loads(raw_output)
+        msg = messages[0]
+        assert "parts" in msg, f"Realtime output must use parts format, got keys: {list(msg.keys())}"
+
+    def test_realtime_does_not_fabricate_stop(self):
+        """Realtime must NOT fabricate finish_reason 'stop'."""
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        exporter = InMemorySpanExporter()
+        provider = TracerProvider()
+        provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracer = provider.get_tracer("test")
+
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("TestAgent")
+        state.start_agent_span("TestAgent")
+
+        state.record_prompt("user", "Test")
+
+        with patch(
+            "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts",
+            return_value=True,
+        ):
+            state.create_llm_span("Response")
+
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        llm_span = next((s for s in spans if s.name == "openai.realtime"), None)
+        assert llm_span is not None
+
+        raw_output = llm_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        assert raw_output is not None, "gen_ai.output.messages must be set on realtime LLM span"
+        messages = json.loads(raw_output)
+        msg = messages[0]
+        # finish_reason should be empty string, not fabricated "stop"
+        fr = msg.get("finish_reason")
+        assert fr == "", (
+            f"Realtime should not fabricate finish_reason, got '{fr}'"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Realtime operation name
+# ---------------------------------------------------------------------------
+
+class TestRealtimeOperationName:
+    """Verify realtime spans set gen_ai.operation.name."""
+
+    def test_realtime_llm_span_operation_name(self):
+        """Realtime LLM span must set gen_ai.operation.name."""
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        exporter = InMemorySpanExporter()
+        provider = TracerProvider()
+        provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracer = provider.get_tracer("test")
+
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("TestAgent")
+        state.start_agent_span("TestAgent")
+
+        state.record_prompt("user", "Hello")
+
+        with patch(
+            "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts",
+            return_value=True,
+        ):
+            state.create_llm_span("Hi there!")
+
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        llm_span = next((s for s in spans if s.name == "openai.realtime"), None)
+        assert llm_span is not None
+
+        op_name = llm_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        assert op_name is not None, "gen_ai.operation.name must be set on realtime LLM span"
+        # "realtime" is a custom extension (no well-known OTel equivalent);
+        # lock the current value so changes are intentional.
+        assert op_name == "realtime", (
+            f"Expected 'realtime' operation name, got '{op_name}'"
+        )
+
+    def test_realtime_audio_span_operation_name(self):
+        """Realtime audio span must set gen_ai.operation.name."""
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        exporter = InMemorySpanExporter()
+        provider = TracerProvider()
+        provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracer = provider.get_tracer("test")
+
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("TestAgent")
+        state.start_agent_span("TestAgent")
+
+        state.start_audio_span("item-1", 0)
+        state.end_audio_span("item-1", 0)
+
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        audio_span = next(
+            (s for s in spans if s.name == "openai.realtime" and
+             s.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "realtime"),
+            None,
+        )
+        assert audio_span is not None, "Audio span must exist with operation name 'realtime'"
+
+
+# ---------------------------------------------------------------------------
+# Realtime provider name
+# ---------------------------------------------------------------------------
+
+class TestRealtimeProviderName:
+    """Verify realtime spans use gen_ai.provider.name."""
+
+    def test_realtime_workflow_uses_provider_name(self):
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        exporter = InMemorySpanExporter()
+        provider = TracerProvider()
+        provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracer = provider.get_tracer("test")
+
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("TestAgent")
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        wf_span = next((s for s in spans if s.name == "Realtime Session"), None)
+        assert wf_span is not None
+
+        attrs = dict(wf_span.attributes)
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+    def test_realtime_agent_span_uses_provider_name(self):
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        exporter = InMemorySpanExporter()
+        provider = TracerProvider()
+        provider.add_span_processor(SimpleSpanProcessor(exporter))
+        tracer = provider.get_tracer("test")
+
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("TestAgent")
+        state.start_agent_span("TestAgent")
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        agent_span = next((s for s in spans if s.name == "TestAgent.agent"), None)
+        assert agent_span is not None
+
+        attrs = dict(agent_span.attributes)
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+
+# ---------------------------------------------------------------------------
+# No deprecated gen_ai.system anywhere
+# ---------------------------------------------------------------------------
+
+class TestNoDeprecatedAttributes:
+    """Ensure no span uses the deprecated gen_ai.system attribute."""
+
+    def test_no_gen_ai_system_in_generation_span(self, tracer_and_exporter):
+        """Spans must not contain the deprecated gen_ai.system attribute."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import GenerationSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-dep-1"
+        proc.on_trace_start(mock_trace)
+
+        gen_data = GenerationSpanData(model="gpt-4o", model_config={})
+        span = MockAgentSpan(gen_data, trace_id="test-dep-1")
+
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        for s in spans:
+            attrs = dict(s.attributes)
+            assert "gen_ai.system" not in attrs, (
+                f"Span '{s.name}' uses deprecated 'gen_ai.system' attribute. "
+                f"Must use 'gen_ai.provider.name' instead."
+            )
+
+
+# ---------------------------------------------------------------------------
+# P3: Content gating – trace_content=False must suppress content attributes
+# ---------------------------------------------------------------------------
+
+class TestContentGating:
+    """Verify opt-in content attributes are not emitted when tracing is disabled."""
+
+    def test_input_messages_suppressed_when_tracing_disabled(self, tracer_and_exporter):
+        """gen_ai.input.messages must NOT be set when trace_content=False."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"role": "user", "content": "secret prompt"}]
+        _extract_prompt_attributes(span, input_data, trace_content=False)
+
+        assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in span.attributes
+        span.end()
+
+    def test_output_messages_suppressed_when_tracing_disabled(self, tracer_and_exporter):
+        """gen_ai.output.messages must NOT be set when trace_content=False."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MagicMock()
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.model = "gpt-4o"
+        response.id = "resp_1"
+        response.frequency_penalty = None
+        response.finish_reason = "stop"
+
+        content_item = MagicMock()
+        content_item.type = "output_text"
+        content_item.text = "secret output"
+
+        output_msg = MagicMock()
+        output_msg.type = "message"
+        output_msg.content = [content_item]
+        output_msg.role = "assistant"
+        output_msg.name = None  # Not a tool call
+
+        response.output = [output_msg]
+        response.usage = None
+        response.tools = None
+
+        _extract_response_attributes(span, response, trace_content=False)
+
+        assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes
+        # finish_reasons should still be set (not content-gated)
+        assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS in span.attributes
+        span.end()
+
+    def test_tool_definitions_suppressed_when_tracing_disabled(
+        self, tracer_and_exporter
+    ):
+        """gen_ai.tool.definitions must NOT be set when trace_content=False."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import GenerationSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-gate-tools"
+        proc.on_trace_start(mock_trace)
+
+        gen_data = GenerationSpanData(model="gpt-4o", model_config={})
+        span_obj = MockAgentSpan(gen_data, trace_id="test-gate-tools")
+
+        func_mock = MagicMock()
+        func_mock.name = "lookup"
+        func_mock.description = "Look something up"
+        func_mock.parameters = {"type": "object"}
+
+        tool_mock = MagicMock()
+        tool_mock.function = func_mock
+        tool_mock.type = "function"
+
+        response_mock = MagicMock()
+        response_mock.tools = [tool_mock]
+        response_mock.output = []
+        response_mock.usage = None
+        response_mock.temperature = None
+        response_mock.max_output_tokens = None
+        response_mock.top_p = None
+        response_mock.model = "gpt-4o"
+        response_mock.id = "resp_1"
+        response_mock.frequency_penalty = None
+        response_mock.finish_reason = None
+        gen_data.response = response_mock
+
+        with patch(
+            "opentelemetry.instrumentation.openai_agents._hooks.should_send_prompts",
+            return_value=False,
+        ):
+            proc.on_span_start(span_obj)
+            proc.on_span_end(span_obj)
+
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        response_spans = [s for s in spans if "response" in s.name or "chat" in s.name]
+        for s in response_spans:
+            assert GenAIAttributes.GEN_AI_TOOL_DEFINITIONS not in s.attributes, (
+                f"Span '{s.name}' should not have tool definitions when tracing disabled"
+            )
+
+
+# ---------------------------------------------------------------------------
+# P3: Invalid tool arguments fallback – must always be object or null
+# ---------------------------------------------------------------------------
+
+class TestInvalidToolArgumentsFallback:
+    """Ensure _parse_arguments never returns a raw string."""
+
+    def test_invalid_json_returns_wrapped_object(self):
+        """Invalid JSON string must produce {_raw: ...} object."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _parse_arguments,
+        )
+
+        result = _parse_arguments("not valid json {{{")
+        assert isinstance(result, dict), f"Expected dict, got {type(result)}"
+        assert "_raw" in result
+        assert result["_raw"] == "not valid json {{{"
+
+    def test_json_array_returns_wrapped_object(self):
+        """JSON array string must produce {_raw: ...} object (not a list)."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _parse_arguments,
+        )
+
+        result = _parse_arguments('[1, 2, 3]')
+        assert isinstance(result, dict), f"Expected dict, got {type(result)}"
+        assert "_raw" in result
+
+    def test_empty_string_returns_none(self):
+        """Empty/whitespace string must return None."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _parse_arguments,
+        )
+
+        assert _parse_arguments("") is None
+        assert _parse_arguments("   ") is None
+
+    def test_none_returns_none(self):
+        """None input must return None."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _parse_arguments,
+        )
+
+        assert _parse_arguments(None) is None
+
+    def test_numeric_arg_returns_wrapped_object(self):
+        """Non-string non-dict input must produce {_raw: ...} object."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _parse_arguments,
+        )
+
+        result = _parse_arguments(42)
+        assert isinstance(result, dict)
+        assert "_raw" in result
+
+    def test_valid_json_dict_returns_dict(self):
+        """Valid JSON dict string must parse normally."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _parse_arguments,
+        )
+
+        result = _parse_arguments('{"city": "NYC"}')
+        assert isinstance(result, dict)
+        assert result == {"city": "NYC"}
+
+
+# ---------------------------------------------------------------------------
+# Spec §1: Multimodal content mapping — lock OTel part types
+# Ref: openllmetry-semconv-review.md §1 "Provider-Specific Content Block Mapping"
+#   OpenAI image_url  → OTel UriPart  {type: "uri", modality: "image", uri: "..."}
+#   OpenAI input_audio → OTel BlobPart {type: "blob", modality: "audio", ...}
+# ---------------------------------------------------------------------------
+
+class TestMultimodalInputMapping:
+    """Lock multimodal content blocks to OTel part types per spec."""
+
+    def test_image_url_maps_to_uri_part(self, tracer_and_exporter):
+        """Spec §1: OpenAI image_url MUST map to UriPart, NOT 'image_url' type."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What is in this image?"},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": "https://example.com/img.png"},
+                },
+            ],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        assert len(parts) == 2
+        assert parts[0] == {"type": "text", "content": "What is in this image?"}
+        # Spec: UriPart — NOT {"type": "image_url", ...}
+        assert parts[1]["type"] == "uri", (
+            f"image_url must map to UriPart (type='uri'), got type='{parts[1]['type']}'"
+        )
+        assert parts[1]["modality"] == "image"
+        assert parts[1]["uri"] == "https://example.com/img.png"
+
+        span.end()
+
+    def test_input_audio_maps_to_blob_part(self, tracer_and_exporter):
+        """Spec §1: OpenAI input_audio MUST map to BlobPart, NOT 'input_audio' type."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "user",
+            "content": [
+                {
+                    "type": "input_audio",
+                    "input_audio": {"data": "base64audiodata==", "format": "wav"},
+                },
+            ],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        assert len(parts) == 1
+        # Spec: BlobPart — NOT {"type": "input_audio", ...}
+        assert parts[0]["type"] == "blob", (
+            f"input_audio must map to BlobPart (type='blob'), got type='{parts[0]['type']}'"
+        )
+        assert parts[0]["modality"] == "audio"
+        assert parts[0]["content"] == "base64audiodata=="
+
+        span.end()
+
+    def test_mixed_text_blocks_mapped(self, tracer_and_exporter):
+        """Spec §1: Multiple text blocks → multiple TextPart objects."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "First paragraph."},
+                {"type": "text", "text": "Second paragraph."},
+            ],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        assert len(parts) == 2
+        assert parts[0] == {"type": "text", "content": "First paragraph."}
+        assert parts[1] == {"type": "text", "content": "Second paragraph."}
+
+        span.end()
+
+    def test_plain_string_content_produces_text_part(self, tracer_and_exporter):
+        """Spec §1: Plain string content → single TextPart."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"role": "user", "content": "Hello"}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        assert len(parts) == 1
+        assert parts[0] == {"type": "text", "content": "Hello"}
+
+        span.end()
+
+    def test_text_key_is_content_not_text(self, tracer_and_exporter):
+        """Spec §1: TextPart key is 'content', NOT 'text'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"role": "user", "content": "Check key name"}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        part = messages[0]["parts"][0]
+
+        assert "content" in part, "TextPart must use 'content' key"
+        assert "text" not in part, (
+            "TextPart must NOT use 'text' key — spec requires 'content'"
+        )
+
+        span.end()
+
+    def test_unknown_block_type_preserved_as_generic_part(self, tracer_and_exporter):
+        """Spec §1: Unknown block types → GenericPart with type preserved."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "user",
+            "content": [
+                {"type": "custom_widget", "widget_id": "w1"},
+            ],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        part = messages[0]["parts"][0]
+
+        assert part["type"] == "custom_widget", "Unknown type must be preserved"
+
+        span.end()
+
+    def test_sdk_object_image_url_maps_to_uri_part(self, tracer_and_exporter):
+        """Spec §1: SDK-object image_url blocks also map to UriPart."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _content_block_to_part,
+        )
+
+        url_obj = MagicMock()
+        url_obj.url = "https://example.com/photo.jpg"
+        block = MagicMock()
+        block.type = "image_url"
+        block.image_url = url_obj
+
+        result = _content_block_to_part(block)
+
+        assert result["type"] == "uri"
+        assert result["modality"] == "image"
+        assert result["uri"] == "https://example.com/photo.jpg"
+
+    def test_sdk_object_input_audio_maps_to_blob_part(self, tracer_and_exporter):
+        """Spec §1: SDK-object input_audio blocks also map to BlobPart."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _content_block_to_part,
+        )
+
+        audio_obj = MagicMock()
+        audio_obj.data = "base64data=="
+        block = MagicMock()
+        block.type = "input_audio"
+        block.input_audio = audio_obj
+
+        result = _content_block_to_part(block)
+
+        assert result["type"] == "blob"
+        assert result["modality"] == "audio"
+        assert result["content"] == "base64data=="
+
+    def test_unknown_block_preserves_per_field_structure(self):
+        """Unknown block types must preserve per-field structure, not json.dumps the whole block."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _dict_block_to_part,
+        )
+
+        block = {"type": "file", "file_id": "file_abc123", "filename": "data.csv"}
+        part = _dict_block_to_part(block)
+
+        assert part["type"] == "file"
+        assert "file_id" in part, f"Expected 'file_id' in part, got: {part}"
+        assert part["file_id"] == "file_abc123"
+
+
+# ---------------------------------------------------------------------------
+# Spec §1: Assistant text + tool_calls combined
+# Ref: "Messages can include both text and tool_call parts"
+# ---------------------------------------------------------------------------
+
+class TestAssistantTextWithToolCalls:
+    """Lock: assistant messages with both text and tool_calls emit both parts."""
+
+    def test_text_and_tool_call_both_present(self, tracer_and_exporter):
+        """Spec §1: text content alongside tool_calls → text + tool_call parts."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "content": "Let me look that up.",
+            "tool_calls": [{
+                "id": "call_1",
+                "function": {
+                    "name": "search",
+                    "arguments": '{"q": "weather"}',
+                },
+            }],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        types = [p["type"] for p in parts]
+        assert "text" in types, "Missing text part alongside tool_call"
+        assert "tool_call" in types, "Missing tool_call part"
+
+        text_part = next(p for p in parts if p["type"] == "text")
+        assert text_part["content"] == "Let me look that up."
+
+        tc_part = next(p for p in parts if p["type"] == "tool_call")
+        assert tc_part["name"] == "search"
+        assert isinstance(tc_part["arguments"], dict)
+
+        span.end()
+
+    def test_tool_calls_without_content(self, tracer_and_exporter):
+        """Spec §1: tool_calls with no text content → only tool_call parts."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [{
+                "id": "call_1",
+                "function": {"name": "search", "arguments": "{}"},
+            }],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        assert len(parts) == 1
+        assert parts[0]["type"] == "tool_call"
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# Spec §1/§4: Output messages — non-text parts, finish_reason always present
+# Ref: "finish_reason in output JSON: required per schema — always set"
+# ---------------------------------------------------------------------------
+
+class TestOutputNonTextParts:
+    """Lock: output messages handle refusal, reasoning, and finish_reason."""
+
+    def test_refusal_content_mapped(self, tracer_and_exporter):
+        """Spec §1: Refusal content → {type: 'text', content: '...'} (standard TextPart)."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MagicMock()
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.model = "gpt-4o"
+        response.id = "resp_1"
+        response.frequency_penalty = None
+        response.finish_reason = "stop"
+
+        content_item = MagicMock()
+        content_item.type = "refusal"
+        content_item.refusal = "I cannot help with that."
+        content_item.text = None
+
+        output_msg = MagicMock()
+        output_msg.type = "message"
+        output_msg.content = [content_item]
+        output_msg.role = "assistant"
+        output_msg.name = None
+
+        response.output = [output_msg]
+        response.usage = None
+        response.tools = None
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        assert len(parts) == 1
+        assert parts[0]["type"] == "refusal"
+        assert parts[0]["content"] == "I cannot help with that."
+
+        span.end()
+
+    def test_output_finish_reason_always_present_in_json(self, tracer_and_exporter):
+        """Spec §4: finish_reason key MUST always exist in output JSON (even if unknown)."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MagicMock()
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.model = "gpt-4o"
+        response.id = "resp_1"
+        response.frequency_penalty = None
+        response.finish_reason = None  # Unknown/absent
+        response.status = None
+
+        content_item = MagicMock()
+        content_item.type = "output_text"
+        content_item.text = "Hello"
+
+        output_msg = MagicMock()
+        output_msg.type = "message"
+        output_msg.content = [content_item]
+        output_msg.role = "assistant"
+        output_msg.name = None
+
+        response.output = [output_msg]
+        response.usage = None
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+
+        assert "finish_reason" in messages[0], (
+            "finish_reason key must always be present in output JSON per schema"
+        )
+        assert messages[0]["finish_reason"] == ""
+
+        span.end()
+
+    def test_output_finish_reason_mapped_value(self, tracer_and_exporter):
+        """Spec §4: finish_reason in JSON uses mapped OTel value."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MagicMock()
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.model = "gpt-4o"
+        response.id = "resp_1"
+        response.frequency_penalty = None
+        response.finish_reason = "tool_calls"  # OpenAI plural
+
+        content_item = MagicMock()
+        content_item.type = "output_text"
+        content_item.text = "Calling tool"
+
+        output_msg = MagicMock()
+        output_msg.type = "message"
+        output_msg.content = [content_item]
+        output_msg.role = "assistant"
+        output_msg.name = None
+
+        response.output = [output_msg]
+        response.usage = None
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+
+        # Spec §4: tool_calls → tool_call (singular)
+        assert messages[0]["finish_reason"] == "tool_call"
+
+        span.end()
+
+    def test_reasoning_content_mapped(self, tracer_and_exporter):
+        """Spec §1: Reasoning content → {type: 'reasoning', content: '...'}."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MagicMock()
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.model = "gpt-4o"
+        response.id = "resp_1"
+        response.frequency_penalty = None
+        response.finish_reason = "stop"
+
+        content_item = MagicMock()
+        content_item.type = "reasoning"
+        content_item.text = None
+
+        summary_item = MagicMock()
+        summary_item.text = "The user wants weather info"
+        content_item.summary = [summary_item]
+
+        output_msg = MagicMock()
+        output_msg.type = "message"
+        output_msg.content = [content_item]
+        output_msg.role = "assistant"
+        output_msg.name = None
+
+        response.output = [output_msg]
+        response.usage = None
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+        parts = messages[0]["parts"]
+
+        assert len(parts) == 1
+        assert parts[0]["type"] == "reasoning"
+        assert "weather" in parts[0]["content"]
+
+        span.end()
+
+    def test_reasoning_summary_dict_items_extract_text(self, tracer_and_exporter):
+        """Dict-form reasoning summary items must extract 'text' field, not dump repr."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+        from types import SimpleNamespace
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = SimpleNamespace(
+            type="reasoning",
+            summary=[{"text": "The model considered options."}],
+        )
+        output = SimpleNamespace(
+            type="message", content=[content_item], role="assistant",
+        )
+        response = SimpleNamespace(
+            temperature=None, max_output_tokens=None, top_p=None,
+            model=None, id=None, frequency_penalty=None,
+            finish_reason=None, status="completed",
+            output=[output], usage=None,
+        )
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+        reasoning_parts = [
+            p for msg in messages for p in msg.get("parts", [])
+            if p.get("type") == "reasoning"
+        ]
+        assert len(reasoning_parts) >= 1
+        assert "{'text'" not in reasoning_parts[0]["content"], (
+            "Dict repr leaked into reasoning content"
+        )
+        assert "The model considered options" in reasoning_parts[0]["content"]
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# Spec §2: Roles — only OTel-valid roles emitted
+# ---------------------------------------------------------------------------
+
+class TestRoles:
+    """Lock: only valid OTel roles (system, user, assistant, tool) emitted."""
+
+    def test_system_role_preserved(self, tracer_and_exporter):
+        """Spec §2: system role kept inline in input messages for OpenAI."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hi"},
+        ]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+
+        roles = [m["role"] for m in messages]
+        assert "system" in roles
+        assert "user" in roles
+
+        span.end()
+
+    def test_developer_role_preserved(self, tracer_and_exporter):
+        """Spec §2: provider-specific roles like 'developer' are allowed."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"role": "developer", "content": "Be concise."}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+
+        assert messages[0]["role"] == "developer"
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# Spec §4: finish_reasons top-level span attribute — comprehensive
+# ---------------------------------------------------------------------------
+
+class TestFinishReasonTopLevel:
+    """Lock: gen_ai.response.finish_reasons as top-level span attribute."""
+
+    def test_finish_reasons_not_gated_by_content(self, tracer_and_exporter):
+        """Spec §4: finish_reasons set even when should_send_prompts()=False."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MagicMock()
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.model = "gpt-4o"
+        response.id = "resp_1"
+        response.frequency_penalty = None
+        response.finish_reason = "stop"
+        response.output = []
+        response.usage = None
+
+        _extract_response_attributes(span, response, trace_content=False)
+
+        # finish_reasons is metadata, NOT content — must be set
+        assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS in span.attributes
+        assert span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == (
+            "stop",
+        )
+        # But output messages must NOT be set
+        assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes
+
+        span.end()
+
+    def test_none_finish_reason_omits_attribute(self, tracer_and_exporter):
+        """Spec §4: None finish_reason → attribute omitted, NOT fabricated."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MagicMock()
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.model = "gpt-4o"
+        response.id = "resp_1"
+        response.frequency_penalty = None
+        response.finish_reason = None
+        response.output = []
+        response.usage = None
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS not in span.attributes
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# Spec §1: _msg_to_dict with SDK objects (not just dicts)
+# ---------------------------------------------------------------------------
+
+class TestMsgToDict:
+    """Lock: _msg_to_dict normalizes SDK objects to plain dicts."""
+
+    def test_sdk_object_normalized(self):
+        """Spec §1: SDK objects with attributes are normalized to dicts."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _msg_to_dict,
+        )
+
+        obj = MagicMock()
+        obj.role = "user"
+        obj.content = "Hello"
+        # Only set some attrs
+        del obj.tool_call_id
+        del obj.tool_calls
+
+        result = _msg_to_dict(obj)
+        assert isinstance(result, dict)
+        assert result["role"] == "user"
+        assert result["content"] == "Hello"
+
+    def test_dict_passed_through(self):
+        """Spec §1: dict messages are returned as-is."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _msg_to_dict,
+        )
+
+        msg = {"role": "user", "content": "Hello"}
+        result = _msg_to_dict(msg)
+        assert result is msg  # Same reference, not a copy
+
+
+# ---------------------------------------------------------------------------
+# Spec §1: Tool call round-trip (request → response)
+# ---------------------------------------------------------------------------
+
+class TestToolCallRoundTrip:
+    """Lock: tool_call → tool_call_response forms a complete round trip."""
+
+    def test_full_round_trip(self, tracer_and_exporter):
+        """Spec §1: tool_call request and response correlate via id."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [
+            {
+                "role": "assistant",
+                "tool_calls": [{
+                    "id": "call_abc",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": '{"city": "NYC"}',
+                    },
+                }],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_abc",
+                "content": '{"temp": 72}',
+            },
+        ]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+
+        # Message 1: assistant with tool_call
+        assert messages[0]["role"] == "assistant"
+        tc = messages[0]["parts"][0]
+        assert tc["type"] == "tool_call"
+        assert tc["id"] == "call_abc"
+        assert tc["name"] == "get_weather"
+        assert tc["arguments"] == {"city": "NYC"}
+
+        # Message 2: tool response correlating via same id
+        assert messages[1]["role"] == "tool"
+        resp = messages[1]["parts"][0]
+        assert resp["type"] == "tool_call_response"
+        assert resp["id"] == "call_abc"
+        assert resp["response"] == '{"temp": 72}'
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# Spec: _convert_agents_sdk_message unknown type returns (None, [])
+# ---------------------------------------------------------------------------
+
+class TestAgentsSdkUnknownType:
+    """Lock: unknown Agents SDK message types are silently skipped."""
+
+    def test_unknown_type_skipped(self, tracer_and_exporter):
+        """Unknown Agents SDK type must not produce a message."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"type": "unknown_sdk_type", "data": "foo"}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        # No messages should be set (unknown type skipped)
+        assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in span.attributes
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# P1-1: input_text / output_text blocks must map to TextPart in input path
+# ---------------------------------------------------------------------------
+
+class TestInputTextOutputTextMapping:
+    """Verify Responses API input_text/output_text blocks map to TextPart."""
+
+    def test_dict_input_text_maps_to_text_part(self):
+        """input_text dict block must produce {type: 'text', content: '...'}."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _dict_block_to_part,
+        )
+
+        block = {"type": "input_text", "text": "Hello from user"}
+        result = _dict_block_to_part(block)
+
+        assert result["type"] == "text", (
+            f"input_text should map to type='text', got '{result['type']}'"
+        )
+        assert result["content"] == "Hello from user", (
+            f"input_text content should be the text value, got '{result.get('content')}'"
+        )
+        assert "data" not in result, (
+            "input_text should NOT fall through to generic path with 'data' key"
+        )
+
+    def test_dict_output_text_maps_to_text_part(self):
+        """output_text dict block must produce {type: 'text', content: '...'}."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _dict_block_to_part,
+        )
+
+        block = {"type": "output_text", "text": "Here is my response"}
+        result = _dict_block_to_part(block)
+
+        assert result["type"] == "text", (
+            f"output_text should map to type='text', got '{result['type']}'"
+        )
+        assert result["content"] == "Here is my response"
+        assert "data" not in result
+
+    def test_object_input_text_maps_to_text_part(self):
+        """input_text SDK object must produce {type: 'text', content: '...'}."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _object_block_to_part,
+        )
+
+        block = MagicMock()
+        block.type = "input_text"
+        block.text = "Hello from user"
+
+        result = _object_block_to_part(block)
+
+        assert result["type"] == "text", (
+            f"input_text object should map to type='text', got '{result['type']}'"
+        )
+        assert result["content"] == "Hello from user"
+        assert "data" not in result
+
+    def test_object_output_text_maps_to_text_part(self):
+        """output_text SDK object must produce {type: 'text', content: '...'}."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _object_block_to_part,
+        )
+
+        block = MagicMock()
+        block.type = "output_text"
+        block.text = "Here is my response"
+
+        result = _object_block_to_part(block)
+
+        assert result["type"] == "text", (
+            f"output_text object should map to type='text', got '{result['type']}'"
+        )
+        assert result["content"] == "Here is my response"
+        assert "data" not in result
+
+    def test_input_text_in_full_input_message_pipeline(self, tracer_and_exporter):
+        """input_text blocks in chat messages must produce valid TextPart in gen_ai.input.messages."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [
+            {"role": "user", "content": [{"type": "input_text", "text": "Hello, can you help me?"}]},
+            {"role": "assistant", "content": [{"type": "output_text", "text": "Of course!"}]},
+        ]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+
+        # User message: input_text → TextPart
+        user_parts = messages[0]["parts"]
+        assert user_parts[0]["type"] == "text", (
+            f"input_text in pipeline should be type='text', got '{user_parts[0]['type']}'"
+        )
+        assert user_parts[0]["content"] == "Hello, can you help me?"
+
+        # Assistant message: output_text → TextPart
+        assistant_parts = messages[1]["parts"]
+        assert assistant_parts[0]["type"] == "text", (
+            f"output_text in pipeline should be type='text', got '{assistant_parts[0]['type']}'"
+        )
+        assert assistant_parts[0]["content"] == "Of course!"
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# P2 (was P1-2): gen_ai.request.model set at span start from span_data.model
+# ---------------------------------------------------------------------------
+
+class TestRequestModelAtSpanStart:
+    """Verify gen_ai.request.model is set at span creation from span_data."""
+
+    def test_request_model_set_from_span_data(self, tracer_and_exporter):
+        """gen_ai.request.model must be set at span start from span_data.model."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import GenerationSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-reqmodel-1"
+        proc.on_trace_start(mock_trace)
+
+        gen_data = GenerationSpanData(model="gpt-4o-mini", model_config={})
+        span = MockAgentSpan(gen_data, trace_id="test-reqmodel-1")
+
+        proc.on_span_start(span)
+        # Don't call on_span_end — check span attributes right after creation
+        otel_span = proc._otel_spans.get(span)
+        assert otel_span is not None, "OTel span should exist after on_span_start"
+
+        attrs = dict(otel_span.attributes)
+        assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs, (
+            f"gen_ai.request.model should be set at span start, got keys: {list(attrs.keys())}"
+        )
+        assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "gpt-4o-mini"
+
+        # Clean up
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+    def test_request_model_fallback_when_response_model_missing(self, tracer_and_exporter):
+        """gen_ai.request.model must persist even if response.model is None."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import GenerationSpanData
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "test-reqmodel-2"
+        proc.on_trace_start(mock_trace)
+
+        gen_data = GenerationSpanData(model="gpt-4o", model_config={})
+        # Simulate a response with no model
+        gen_data.response = MagicMock()
+        gen_data.response.model = None
+        gen_data.response.id = None
+        gen_data.response.temperature = None
+        gen_data.response.max_output_tokens = None
+        gen_data.response.top_p = None
+        gen_data.response.frequency_penalty = None
+        gen_data.response.finish_reason = None
+        gen_data.response.output = []
+        gen_data.response.usage = None
+        gen_data.response.tools = []
+
+        span = MockAgentSpan(gen_data, trace_id="test-reqmodel-2")
+        proc.on_span_start(span)
+        proc.on_span_end(span)
+        proc.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        response_span = next((s for s in spans if s.name == "openai.response"), None)
+        assert response_span is not None
+
+        attrs = dict(response_span.attributes)
+        assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs, (
+            "gen_ai.request.model should be set even when response.model is None"
+        )
+        assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "gpt-4o"
+
+
+# ---------------------------------------------------------------------------
+# P2-1: Tool-call response parts include response key even when content=None
+# ---------------------------------------------------------------------------
+
+class TestToolResponseNoneContent:
+    """Verify tool_call_response includes response key when content is None."""
+
+    def test_tool_response_part_has_response_key_when_none(self):
+        """tool_call_response must include 'response' key even when content is None."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _build_tool_response_part,
+        )
+
+        part = _build_tool_response_part("call_123", None)
+
+        assert part["type"] == "tool_call_response"
+        assert part["id"] == "call_123"
+        assert "response" in part, (
+            "tool_call_response must include 'response' key even when content is None"
+        )
+        assert part["response"] == ""
+
+    def test_tool_response_part_has_response_key_when_present(self):
+        """tool_call_response with content must include 'response' key."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _build_tool_response_part,
+        )
+
+        part = _build_tool_response_part("call_456", "72 degrees")
+
+        assert part["type"] == "tool_call_response"
+        assert part["id"] == "call_456"
+        assert part["response"] == "72 degrees"
+
+    def test_tool_response_none_content_in_full_pipeline(self, tracer_and_exporter):
+        """Tool message with content=None must still produce response key in gen_ai.input.messages."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [
+            {"role": "tool", "tool_call_id": "call_789", "content": None},
+        ]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+
+        tool_part = messages[0]["parts"][0]
+        assert tool_part["type"] == "tool_call_response"
+        assert "response" in tool_part, (
+            "tool_call_response must include 'response' key even with None content"
+        )
+
+        span.end()
+
+    def test_structured_dict_result_preserved(self):
+        """Dict tool result should be kept as-is, not stringified."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _build_tool_response_part,
+        )
+
+        part = _build_tool_response_part("call_1", {"status": "ok", "count": 5})
+        assert isinstance(part["response"], dict)
+        assert part["response"] == {"status": "ok", "count": 5}
+
+    def test_structured_list_result_preserved(self):
+        """List tool result should be kept as-is, not stringified."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _build_tool_response_part,
+        )
+
+        part = _build_tool_response_part("call_2", [1, 2, 3])
+        assert isinstance(part["response"], list)
+
+
+# ---------------------------------------------------------------------------
+# P2-2: Realtime LLM spans set response metadata
+# ---------------------------------------------------------------------------
+
+class TestRealtimeResponseMetadata:
+    """Verify realtime LLM spans set recommended response attributes."""
+
+    def test_realtime_span_sets_response_model(self, tracer_and_exporter):
+        """Realtime LLM spans should set gen_ai.response.model."""
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        tracer, exporter = tracer_and_exporter
+        state = RealtimeTracingState(tracer)
+        state.model_name = "gpt-4o-realtime-preview-2024-12-17"
+
+        # Create a parent span for context
+        parent = tracer.start_span("parent")
+        state.pending_prompts.append(("user", "Hello"))
+        state.prompt_start_time = 1000
+
+        state.create_llm_span("Hi there!")
+        parent.end()
+
+        finished = exporter.get_finished_spans()
+        rt_span = next((s for s in finished if s.name == "openai.realtime"), None)
+        assert rt_span is not None
+
+        attrs = dict(rt_span.attributes)
+        assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in attrs, (
+            "Realtime LLM span should set gen_ai.response.model"
+        )
+
+    def test_realtime_span_sets_finish_reason_empty(self, tracer_and_exporter):
+        """Realtime LLM spans should use '' finish_reason, NOT fabricate 'stop'."""
+        from opentelemetry.instrumentation.openai_agents._realtime_wrappers import (
+            RealtimeTracingState,
+        )
+
+        tracer, exporter = tracer_and_exporter
+        state = RealtimeTracingState(tracer)
+        state.model_name = "gpt-4o-realtime-preview"
+
+        parent = tracer.start_span("parent")
+        state.pending_prompts.append(("user", "Hello"))
+        state.prompt_start_time = 1000
+
+        with patch(
+            "opentelemetry.instrumentation.openai_agents._realtime_wrappers.should_send_prompts",
+            return_value=True,
+        ):
+            state.create_llm_span("Hi there!")
+
+        parent.end()
+
+        finished = exporter.get_finished_spans()
+        rt_span = next((s for s in finished if s.name == "openai.realtime"), None)
+        assert rt_span is not None
+
+        raw = rt_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+        assert messages[0]["finish_reason"] == "", (
+            f"Realtime finish_reason should be '' (not fabricated), got '{messages[0].get('finish_reason')}'"
+        )
+
+
+# ---------------------------------------------------------------------------
+# F1: BlobPart must use "content" key, NOT "data"
+# OTel spec: BlobPart.required = ["type", "modality", "content"]
+# Upstream refs: opentelemetry-python-contrib Blob dataclass uses "content",
+#   Bedrock/OpenAI instrumentations use "content" for blob parts.
+# ---------------------------------------------------------------------------
+
+class TestBlobPartContentKey:
+    """F1: BlobPart must use 'content' key per OTel GenAI semconv."""
+
+    def test_dict_input_audio_blob_uses_content_key(self, tracer_and_exporter):
+        """Dict input_audio block must produce BlobPart with 'content', NOT 'data'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _dict_block_to_part,
+        )
+
+        block = {
+            "type": "input_audio",
+            "input_audio": {"data": "base64audiodata==", "format": "wav"},
+        }
+        result = _dict_block_to_part(block)
+
+        assert result["type"] == "blob"
+        assert result["modality"] == "audio"
+        assert "content" in result, (
+            "BlobPart must use 'content' key per OTel spec, not 'data'"
+        )
+        assert "data" not in result, (
+            "BlobPart must NOT use 'data' key — spec requires 'content'"
+        )
+        assert result["content"] == "base64audiodata=="
+
+    def test_object_input_audio_blob_uses_content_key(self, tracer_and_exporter):
+        """SDK-object input_audio block must produce BlobPart with 'content', NOT 'data'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _object_block_to_part,
+        )
+
+        audio_obj = MagicMock()
+        audio_obj.data = "base64data=="
+        block = MagicMock()
+        block.type = "input_audio"
+        block.input_audio = audio_obj
+
+        result = _object_block_to_part(block)
+
+        assert result["type"] == "blob"
+        assert result["modality"] == "audio"
+        assert "content" in result, (
+            "BlobPart must use 'content' key per OTel spec, not 'data'"
+        )
+        assert "data" not in result, (
+            "BlobPart must NOT use 'data' key — spec requires 'content'"
+        )
+        assert result["content"] == "base64data=="
+
+    def test_blob_content_key_in_full_pipeline(self, tracer_and_exporter):
+        """BlobPart 'content' key must survive through the full input message pipeline."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "user",
+            "content": [
+                {
+                    "type": "input_audio",
+                    "input_audio": {"data": "YXVkaW9kYXRh", "format": "mp3"},
+                },
+            ],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        blob_part = messages[0]["parts"][0]
+
+        assert blob_part["type"] == "blob"
+        assert "content" in blob_part, "BlobPart must use 'content' in pipeline output"
+        assert "data" not in blob_part, "BlobPart must NOT use 'data' in pipeline output"
+        assert blob_part["content"] == "YXVkaW9kYXRh"
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# F2: gen_ai.tool.call.arguments/result must use json.dumps(), NOT str()
+# str(dict) produces Python repr with single quotes — not valid JSON.
+# All other structured attributes in this package use json.dumps().
+# ---------------------------------------------------------------------------
+
+class TestToolCallArgumentsSerialization:
+    """F2: Tool call arguments/result must be valid JSON, not Python repr."""
+
+    def test_dict_input_serialized_as_json(self, tracer_and_exporter):
+        """Dict tool input must be serialized with json.dumps(), not str()."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import FunctionSpanData
+
+        tracer, _ = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        func_data = FunctionSpanData(
+            name="get_weather",
+            input={"city": "London"},
+            output="72F",
+        )
+        otel_span = proc._start_function_span(func_data, parent_context=None)
+        proc._end_function_span(otel_span, func_data, trace_content=True)
+        otel_span.end()
+
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        raw_args = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS]
+        # Must be valid JSON (double quotes), NOT Python repr (single quotes)
+        assert '"city"' in raw_args, (
+            f"Expected JSON with double quotes, got: {raw_args}"
+        )
+        assert "'" not in raw_args or raw_args == raw_args, (
+            f"str() produces single quotes; expected json.dumps(): {raw_args}"
+        )
+        # Must parse as valid JSON
+        parsed = json.loads(raw_args)
+        assert parsed == {"city": "London"}
+
+    def test_dict_output_serialized_as_json(self, tracer_and_exporter):
+        """Dict tool output must be serialized with json.dumps(), not str()."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import FunctionSpanData
+
+        tracer, _ = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        func_data = FunctionSpanData(
+            name="get_weather",
+            input="query",
+            output={"temp": 72, "unit": "F"},
+        )
+        otel_span = proc._start_function_span(func_data, parent_context=None)
+        proc._end_function_span(otel_span, func_data, trace_content=True)
+        otel_span.end()
+
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        raw_result = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT]
+        parsed = json.loads(raw_result)
+        assert parsed == {"temp": 72, "unit": "F"}
+
+    def test_string_input_kept_as_is(self, tracer_and_exporter):
+        """String tool input must be kept as-is (already a string)."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import FunctionSpanData
+
+        tracer, _ = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        func_data = FunctionSpanData(
+            name="echo",
+            input='{"already": "json"}',
+            output="done",
+        )
+        otel_span = proc._start_function_span(func_data, parent_context=None)
+        proc._end_function_span(otel_span, func_data, trace_content=True)
+        otel_span.end()
+
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        raw_args = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS]
+        assert raw_args == '{"already": "json"}'
+
+    def test_list_output_serialized_as_json(self, tracer_and_exporter):
+        """List tool output must be serialized with json.dumps()."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+        from agents import FunctionSpanData
+
+        tracer, _ = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        func_data = FunctionSpanData(
+            name="search",
+            input="query",
+            output=["result1", "result2"],
+        )
+        otel_span = proc._start_function_span(func_data, parent_context=None)
+        proc._end_function_span(otel_span, func_data, trace_content=True)
+        otel_span.end()
+
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        raw_result = otel_span.attributes[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT]
+        parsed = json.loads(raw_result)
+        assert parsed == ["result1", "result2"]
+
+
+# ---------------------------------------------------------------------------
+# F3: Responses API status → finish_reason mapping
+# The Responses API uses "status" ("completed"/"failed"/"cancelled"/"incomplete"),
+# NOT "finish_reason". Must map status to OTel finish reasons.
+# Upstream ref: opentelemetry-python-contrib _finish_reason_from_status()
+# ---------------------------------------------------------------------------
+
+class TestResponsesApiStatusMapping:
+    """F3: Map Responses API 'status' to finish_reason when finish_reason absent."""
+
+    def test_completed_status_maps_to_stop(self, tracer_and_exporter):
+        """Responses API status='completed' must map to finish_reason='stop'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            output=[],
+            model="gpt-4o",
+            usage=MockUsage(),
+        )
+        # Responses API: no finish_reason, but has status
+        del response.finish_reason
+        response.status = "completed"
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None, (
+            "status='completed' must produce gen_ai.response.finish_reasons"
+        )
+        assert "stop" in finish_reasons, (
+            f"status='completed' must map to 'stop', got {finish_reasons}"
+        )
+
+        span.end()
+
+    def test_failed_status_maps_to_error(self, tracer_and_exporter):
+        """Responses API status='failed' must map to OTel finish_reason='error'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            output=[],
+            model="gpt-4o",
+            usage=MockUsage(),
+        )
+        del response.finish_reason
+        response.status = "failed"
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None
+        assert "error" in finish_reasons
+
+        span.end()
+
+    def test_cancelled_response_preserves_cancelled_finish_reason(self, tracer_and_exporter):
+        """Responses API status='cancelled' must preserve 'cancelled', not remap to 'error'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            output=[],
+            model="gpt-4o",
+            usage=MockUsage(),
+        )
+        del response.finish_reason
+        response.status = "cancelled"
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None
+        assert "cancelled" in finish_reasons
+
+        span.end()
+
+    def test_incomplete_response_preserves_incomplete_finish_reason(self, tracer_and_exporter):
+        """Responses API status='incomplete' must preserve 'incomplete', not remap to 'length'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            output=[],
+            model="gpt-4o",
+            usage=MockUsage(),
+        )
+        del response.finish_reason
+        response.status = "incomplete"
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None
+        assert "incomplete" in finish_reasons
+
+        span.end()
+
+    def test_status_not_used_when_finish_reason_present(self, tracer_and_exporter):
+        """When finish_reason is present, status must NOT override it."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        response = MockResponse(
+            output=[],
+            model="gpt-4o",
+            usage=MockUsage(),
+            finish_reason="stop",
+        )
+        response.status = "completed"  # Both present — finish_reason wins
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        finish_reasons = span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS)
+        assert finish_reasons is not None
+        assert "stop" in finish_reasons
+
+        span.end()
+
+    def test_completed_status_maps_to_stop_in_output_messages(self, tracer_and_exporter):
+        """status='completed' → output message finish_reason='stop'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_response_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        content_item = MockContentItem(text="Done")
+        output_item = MockResponseOutput(role="assistant", content=[content_item])
+        response = MockResponse(
+            output=[output_item],
+            model="gpt-4o",
+            usage=MockUsage(),
+        )
+        del response.finish_reason
+        response.status = "completed"
+
+        _extract_response_attributes(span, response, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        messages = json.loads(raw)
+        assert messages[0]["finish_reason"] == "stop", (
+            f"status='completed' should produce finish_reason='stop' in output, "
+            f"got '{messages[0]['finish_reason']}'"
+        )
+
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# Missing finish_reason mapping tests
+# ---------------------------------------------------------------------------
+
+class TestFinishReasonMappingCompleteness:
+    """Cover finish_reason mappings missing from original test suite."""
+
+    def test_length_mapping(self):
+        """'length' must map to 'length'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason
+        assert _map_finish_reason("length") == "length"
+
+    def test_content_filter_mapping(self):
+        """'content_filter' must map to 'content_filter'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason
+        assert _map_finish_reason("content_filter") == "content_filter"
+
+    def test_error_mapping(self):
+        """'error' must map to 'error'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason
+        assert _map_finish_reason("error") == "error"
+
+    def test_unknown_finish_reason_passes_through(self):
+        """Unknown/new finish reason values must pass through unchanged."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason
+        assert _map_finish_reason("some_new_reason") == "some_new_reason"
+
+    def test_function_call_maps_to_tool_call(self):
+        """Legacy 'function_call' must map to 'tool_call'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _map_finish_reason
+        assert _map_finish_reason("function_call") == "tool_call"
+
+
+# ---------------------------------------------------------------------------
+# P2-3: Agents SDK function_call id — omit when absent
+# ---------------------------------------------------------------------------
+
+class TestAgentsSdkFunctionCallIdOmitted:
+    """P2-3: Agents SDK function_call with no 'id' must emit no 'id' key in the part.
+
+    The id field is optional (OTel ToolCallRequestPart.id defaults null).
+    Emitting empty-string id breaks tool_call / tool_call_response correlation.
+    """
+
+    def test_agents_sdk_function_call_no_id_omits_id(self, tracer_and_exporter):
+        """Agents SDK function_call without an 'id' key must not emit 'id' in the part."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "type": "function_call",
+            "name": "search",
+            "arguments": '{"q": "test"}',
+            # Intentionally no "id" key
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+        part = messages[0]["parts"][0]
+
+        assert part["type"] == "tool_call"
+        assert "id" not in part or part["id"], (
+            f"id must be absent or non-empty when no id in source, got: {part}"
+        )
+        span.end()
+
+    def test_agents_sdk_function_call_with_id_still_included(self, tracer_and_exporter):
+        """Sanity: when 'id' is present, it must appear in the part."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "type": "function_call",
+            "id": "fc_1",
+            "name": "search",
+            "arguments": '{"q": "test"}',
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        messages = json.loads(raw)
+        part = messages[0]["parts"][0]
+
+        assert part.get("id") == "fc_1"
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# P3-4 (updated): ResponseSpanData → gen_ai.operation.name = "chat"
+# ---------------------------------------------------------------------------
+
+class TestResponseSpanDataOperationName:
+    """ResponseSpanData (Responses API) must use operation.name='chat'.
+
+    Both GenerationSpanData (Chat Completions) and ResponseSpanData (Responses
+    API) are chat completion surfaces.  Using 'generate_content' (a GCP/Gemini
+    well-known value) for an OpenAI span is incorrect.
+    """
+
+    def test_response_span_data_uses_chat(self, tracer_and_exporter):
+        """_start_generation_span with ResponseSpanData must emit 'chat'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        span_data = ResponseSpanData(input=[], response=None)
+        span_data.model = "gpt-4o"
+
+        otel_span = proc._start_generation_span(parent_context=None, span_data=span_data)
+        attrs = dict(otel_span.attributes)
+        otel_span.end()
+
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", (
+            f"ResponseSpanData must use 'chat', got '{attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}'"
+        )
+
+    def test_generation_span_data_keeps_chat(self, tracer_and_exporter):
+        """_start_generation_span with GenerationSpanData (or no span_data) must keep 'chat'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            OpenTelemetryTracingProcessor,
+        )
+
+        tracer, exporter = tracer_and_exporter
+        proc = OpenTelemetryTracingProcessor(tracer)
+
+        # No span_data → Chat Completions default
+        otel_span = proc._start_generation_span(parent_context=None, span_data=None)
+        attrs = dict(otel_span.attributes)
+        otel_span.end()
+
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat"
+
+
+# ---------------------------------------------------------------------------
+# P3-6: tool_call part with empty name — name key must still be emitted
+# ---------------------------------------------------------------------------
+
+class TestToolCallPartEmptyName:
+    """P3-6: OTel ToolCallRequestPart.name is required.
+
+    The prior `if tc.get("name"):` guard silently dropped the key when name=""
+    (empty string is falsy). The key must be emitted even for empty strings
+    so that downstream consumers can observe the malformed call rather than
+    getting a part with no name at all.
+    """
+
+    def test_empty_string_tool_name_emits_name_key(self, tracer_and_exporter):
+        """Tool call with name='' must produce a part with name key present."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_prompt_attributes,
+        )
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "tool_calls": [{
+                "id": "call_x",
+                "function": {
+                    "name": "",
+                    "arguments": "{}",
+                }
+            }]
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+        part = messages[0]["parts"][0]
+
+        assert part["type"] == "tool_call"
+        assert "name" in part, (
+            "name key must be present even when tool name is empty string — "
+            "required field per OTel ToolCallRequestPart schema"
+        )
+
+    def test_missing_tool_name_falls_back_to_empty_string(self, tracer_and_exporter):
+        """Tool call with no name must emit name='' rather than omitting the key.
+
+        OTel ToolCallRequestPart requires 'name'; omitting it produces a schema-invalid part.
+        """
+        from opentelemetry.instrumentation.openai_agents._hooks import _tool_call_to_part
+
+        part = _tool_call_to_part({"type": "tool_call"})  # no name key in source
+        assert "name" in part, "name must always be present on tool_call parts"
+        assert part["name"] == ""
+
+
+# ---------------------------------------------------------------------------
+# tool_call_response parts must omit 'id' when call_id is absent
+# ---------------------------------------------------------------------------
+
+class TestToolResponsePartIdOmitted:
+    """tool_call_response parts must omit 'id' when call_id is absent or None.
+
+    OTel ToolCallResponsePart: id is optional (absent vs. null are distinct in
+    JSON Schema).  Emitting "id": null causes schema violations and can break
+    consumer correlation logic that checks for key presence.
+    """
+
+    def test_build_tool_response_part_omits_id_when_call_id_is_none(self):
+        """_build_tool_response_part(None, ...) must not include an 'id' key."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _build_tool_response_part
+
+        part = _build_tool_response_part(None, "tool result")
+
+        assert "id" not in part, (
+            f"'id' must be absent when call_id is None, got {part!r}"
+        )
+        assert part["type"] == "tool_call_response"
+        assert part["response"] == "tool result"
+
+    def test_build_tool_response_part_includes_id_when_call_id_present(self):
+        """_build_tool_response_part with a real call_id must include 'id'."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _build_tool_response_part
+
+        part = _build_tool_response_part("call_abc", "output")
+
+        assert part["id"] == "call_abc"
+
+    def test_agents_sdk_function_call_output_without_call_id_omits_id(self, tracer_and_exporter):
+        """Agents SDK function_call_output message with no call_id must omit id from the part."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"type": "function_call_output", "output": "the result"}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        msgs = json.loads(raw)
+        assert len(msgs) == 1
+        part = msgs[0]["parts"][0]
+        assert part["type"] == "tool_call_response"
+        assert "id" not in part, (
+            f"'id' must be absent (not null) when call_id is missing, got {part!r}"
+        )
+        span.end()
+
+    def test_agents_sdk_function_call_output_with_call_id_includes_id(self, tracer_and_exporter):
+        """Agents SDK function_call_output with a call_id must include 'id' in the part."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"type": "function_call_output", "call_id": "call_99", "output": "done"}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        msgs = json.loads(raw)
+        part = msgs[0]["parts"][0]
+        assert part["id"] == "call_99"
+        span.end()
+
+
+# ---------------------------------------------------------------------------
+# Input messages with empty parts must be excluded
+# ---------------------------------------------------------------------------
+
+class TestEmptyPartsExcluded:
+    """Messages that produce no parts must not appear in gen_ai.input.messages.
+
+    A message with a role but no content and no tool_calls yields parts=[].
+    Emitting {"role": "assistant", "parts": []} adds noise and may confuse
+    consumers that assume each message carries at least one part.
+    """
+
+    def test_message_with_no_content_and_no_tool_calls_excluded(self, tracer_and_exporter):
+        """A role-only message (no content, no tool_calls) must be excluded."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{"role": "assistant"}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        if raw is not None:
+            msgs = json.loads(raw)
+            for msg in msgs:
+                assert msg.get("parts"), (
+                    f"Message with empty parts must be excluded, got {msg!r}"
+                )
+        span.end()
+
+    def test_empty_message_excluded_valid_message_kept(self, tracer_and_exporter):
+        """Only the valid message is emitted when mixed with an empty-parts message."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [
+            {"role": "assistant"},                      # no content → parts=[]
+            {"role": "user", "content": "Hello"},       # valid
+        ]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        msgs = json.loads(raw)
+        assert len(msgs) == 1, f"Expected 1 message (empty-parts excluded), got {len(msgs)}: {msgs}"
+        assert msgs[0]["role"] == "user"
+        span.end()
+
+    def test_content_none_assistant_with_tool_calls_still_emitted(self, tracer_and_exporter):
+        """An assistant message with tool_calls but no text content must still be emitted."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [{"id": "c1", "function": {"name": "search", "arguments": "{}"}}],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        msgs = json.loads(raw)
+        assert len(msgs) == 1
+        assert msgs[0]["parts"][0]["type"] == "tool_call"
+
+
+# ---------------------------------------------------------------------------
+# tool_call part must always carry a name (required by OTel ToolCallRequestPart)
+# ---------------------------------------------------------------------------
+
+class TestToolCallNameAlwaysPresent:
+    """Every tool_call part in gen_ai.input.messages must include a 'name' field.
+
+    OTel ToolCallRequestPart requires 'name'. When the upstream SDK object or
+    dict does not provide one, the instrumentation must fall back to an empty
+    string rather than silently omitting the key.
+    """
+
+    def test_tool_call_with_no_name_in_function_wrapper_gets_empty_name(self, tracer_and_exporter):
+        """A tool_call whose function wrapper has no name must produce name='' not a missing key."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "tool_calls": [{"id": "c1", "function": {"arguments": '{"x": 1}'}}],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        msgs = json.loads(raw)
+        part = msgs[0]["parts"][0]
+        assert part["type"] == "tool_call"
+        assert "name" in part, f"'name' must always be present on tool_call parts, got: {part}"
+        span.end()
+
+    def test_tool_call_with_none_name_attribute_gets_empty_name(self, tracer_and_exporter):
+        """A tool_call object whose name attribute resolves to None must produce name=''."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+        from types import SimpleNamespace
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        tool_call = SimpleNamespace(id="c2", name=None, arguments="{}")
+        input_data = [{"role": "assistant", "tool_calls": [tool_call]}]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        msgs = json.loads(raw)
+        part = msgs[0]["parts"][0]
+        assert part["type"] == "tool_call"
+        assert "name" in part, f"'name' must always be present on tool_call parts, got: {part}"
+        span.end()
+
+    def test_tool_call_with_valid_name_preserves_name(self, tracer_and_exporter):
+        """Sanity: a tool_call with a proper name must still emit that name."""
+        from opentelemetry.instrumentation.openai_agents._hooks import _extract_prompt_attributes
+
+        tracer, _ = tracer_and_exporter
+        span = tracer.start_span("test")
+
+        input_data = [{
+            "role": "assistant",
+            "tool_calls": [{"id": "c3", "function": {"name": "get_weather", "arguments": '{"city": "NYC"}'}}],
+        }]
+        _extract_prompt_attributes(span, input_data, trace_content=True)
+
+        raw = span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        msgs = json.loads(raw)
+        part = msgs[0]["parts"][0]
+        assert part["name"] == "get_weather"
+        span.end()
+        span.end()
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_tracing_processor.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_tracing_processor.py
new file mode 100644
index 0000000000..442a98d595
--- /dev/null
+++ b/packages/opentelemetry-instrumentation-openai-agents/tests/test_tracing_processor.py
@@ -0,0 +1,1060 @@
+"""
+TDD tests for on_span_start / on_span_end refactoring.
+
+Tests target the extracted helper methods:
+  on_span_start handlers:
+    _start_agent_span, _start_handoff_span, _start_function_span,
+    _start_generation_span, _start_realtime_span
+  on_span_end helpers:
+    _extract_tool_definitions  (pure function)
+    _end_generation_span       (method)
+    _set_realtime_io_attributes (method)
+
+These tests are written BEFORE the implementation (TDD).
+"""
+
+import json
+import pytest
+from unittest.mock import MagicMock
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.trace import SpanKind
+from opentelemetry.semconv._incubating.attributes import (
+    gen_ai_attributes as GenAIAttributes,
+)
+from opentelemetry.semconv_ai import SpanAttributes, TraceloopSpanKindValues
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def tracer_and_exporter():
+    exporter = InMemorySpanExporter()
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(exporter))
+    return provider.get_tracer("test-refactor"), exporter
+
+
+@pytest.fixture
+def processor(tracer_and_exporter):
+    from opentelemetry.instrumentation.openai_agents._hooks import (
+        OpenTelemetryTracingProcessor,
+    )
+    tracer, _ = tracer_and_exporter
+    return OpenTelemetryTracingProcessor(tracer)
+
+
+# ---------------------------------------------------------------------------
+# Helpers: mock SDK span_data objects
+# ---------------------------------------------------------------------------
+
+class MockAgentSpan:
+    """Minimal mock of an Agents SDK span object."""
+    def __init__(self, span_data, trace_id="test-trace", error=None):
+        self.span_data = span_data
+        self.trace_id = trace_id
+        self.error = error
+
+
+# ---------------------------------------------------------------------------
+# Tests: _start_agent_span
+# ---------------------------------------------------------------------------
+
+class TestStartAgentSpan:
+    """Unit tests for the extracted _start_agent_span handler."""
+
+    def test_returns_span_with_agent_attributes(self, tracer_and_exporter, processor):
+        """Must return a span named '{name}.agent' with correct attributes."""
+        from agents import AgentSpanData
+
+        tracer, exporter = tracer_and_exporter
+        agent_data = AgentSpanData(name="MyAgent", handoffs=[], tools=[], output_type="")
+
+        otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t1")
+
+        assert otel_span is not None
+        assert otel_span.name == "MyAgent.agent"
+        attrs = dict(otel_span.attributes)
+        assert attrs[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value
+        assert attrs[GenAIAttributes.GEN_AI_AGENT_NAME] == "MyAgent"
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+        otel_span.end()
+
+    def test_unknown_agent_name_defaults(self, tracer_and_exporter, processor):
+        """Agent with no name → 'unknown_agent'."""
+        from agents import AgentSpanData
+
+        agent_data = AgentSpanData(name=None, handoffs=[], tools=[], output_type="")
+
+        otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t2")
+
+        assert otel_span.name == "unknown_agent.agent"
+        assert otel_span.attributes[GenAIAttributes.GEN_AI_AGENT_NAME] == "unknown_agent"
+
+        otel_span.end()
+
+    def test_handoff_parent_attribute_set(self, tracer_and_exporter, processor):
+        """When a reverse handoff exists, handoff_parent must be set."""
+        from agents import AgentSpanData
+
+        # Pre-seed the reverse handoff dict
+        processor._reverse_handoffs_dict["TargetAgent:t3"] = "SourceAgent"
+
+        agent_data = AgentSpanData(name="TargetAgent", handoffs=[], tools=[], output_type="")
+        otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t3")
+
+        attrs = dict(otel_span.attributes)
+        assert attrs.get("gen_ai.agent.handoff_parent") == "SourceAgent"
+        # Consumed from the dict
+        assert "TargetAgent:t3" not in processor._reverse_handoffs_dict
+
+        otel_span.end()
+
+    def test_handoffs_list_serialized(self, tracer_and_exporter, processor):
+        """Handoff targets should be serialized as JSON attributes."""
+        from agents import AgentSpanData
+
+        mock_handoff_agent = MagicMock()
+        mock_handoff_agent.name = "AgentB"
+        mock_handoff_agent.instructions = "Help the user"
+
+        agent_data = AgentSpanData(name="AgentA", handoffs=[mock_handoff_agent], tools=[], output_type="")
+        otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t4")
+
+        attrs = dict(otel_span.attributes)
+        handoffs = json.loads(attrs["openai.agent.handoffs"])
+        assert isinstance(handoffs, list)
+        assert handoffs[0]["name"] == "AgentB"
+        assert handoffs[0]["instructions"] == "Help the user"
+
+        otel_span.end()
+
+    def test_span_kind_is_internal(self, tracer_and_exporter, processor):
+        """Agent spans must be INTERNAL kind (in-process orchestration, not a remote call)."""
+        from agents import AgentSpanData
+
+        agent_data = AgentSpanData(name="Agent", handoffs=[], tools=[], output_type="")
+        otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t5")
+
+        assert otel_span.kind == SpanKind.INTERNAL
+
+        otel_span.end()
+
+    def test_agent_span_has_invoke_agent_operation_name(self, tracer_and_exporter, processor):
+        """Agent spans must set gen_ai.operation.name='invoke_agent' per OTel Agent Spans spec."""
+        from agents import AgentSpanData
+
+        agent_data = AgentSpanData(name="Agent", handoffs=[], tools=[], output_type="")
+        otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t6")
+
+        attrs = dict(otel_span.attributes)
+        assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent"
+
+        otel_span.end()
+
+    def test_handoffs_collapsed_to_single_json_array(self, tracer_and_exporter, processor):
+        """Handoffs must be a single 'openai.agent.handoffs' JSON array, not indexed attributes."""
+        from agents import AgentSpanData
+
+        mock_a = MagicMock()
+        mock_a.name = "AgentA"
+        mock_a.instructions = "Does A"
+        mock_b = MagicMock()
+        mock_b.name = "AgentB"
+        mock_b.instructions = "Does B"
+
+        agent_data = AgentSpanData(name="Router", handoffs=[mock_a, mock_b], tools=[], output_type="")
+        otel_span = processor._start_agent_span(agent_data, parent_context=None, trace_id="t7")
+
+        attrs = dict(otel_span.attributes)
+        assert "openai.agent.handoff0" not in attrs
+        assert "openai.agent.handoff1" not in attrs
+
+        handoffs = json.loads(attrs["openai.agent.handoffs"])
+        assert isinstance(handoffs, list) and len(handoffs) == 2
+        assert handoffs[0]["name"] == "AgentA"
+        assert handoffs[1]["name"] == "AgentB"
+
+        otel_span.end()
+
+
+# ---------------------------------------------------------------------------
+# Tests: _start_handoff_span
+# ---------------------------------------------------------------------------
+
+class TestStartHandoffSpan:
+    """Unit tests for the extracted _start_handoff_span handler."""
+
+    def test_returns_span_with_handoff_attributes(self, tracer_and_exporter, processor):
+        """Must create a span named '{from} → {to}.handoff'."""
+        from agents import HandoffSpanData
+
+        handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB")
+
+        otel_span = processor._start_handoff_span(
+            handoff_data, parent_context=None, trace_id="t1",
+        )
+
+        assert otel_span is not None
+        assert otel_span.name == "AgentA → AgentB.handoff"
+        attrs = dict(otel_span.attributes)
+        assert attrs[SpanAttributes.TRACELOOP_SPAN_KIND] == "handoff"
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+        otel_span.end()
+
+    def test_from_and_to_agent_attributes(self, tracer_and_exporter, processor):
+        """from_agent and to_agent must be set as attributes."""
+        from agents import HandoffSpanData
+        from opentelemetry.instrumentation.openai_agents.utils import (
+            GEN_AI_HANDOFF_FROM_AGENT,
+            GEN_AI_HANDOFF_TO_AGENT,
+        )
+
+        handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB")
+
+        otel_span = processor._start_handoff_span(
+            handoff_data, parent_context=None, trace_id="t2",
+        )
+
+        attrs = dict(otel_span.attributes)
+        assert attrs[GEN_AI_HANDOFF_FROM_AGENT] == "AgentA"
+        assert attrs[GEN_AI_HANDOFF_TO_AGENT] == "AgentB"
+
+        otel_span.end()
+
+    def test_registers_reverse_handoff(self, tracer_and_exporter, processor):
+        """Must register reverse handoff for the target agent."""
+        from agents import HandoffSpanData
+
+        handoff_data = HandoffSpanData(from_agent="AgentA", to_agent="AgentB")
+
+        processor._start_handoff_span(
+            handoff_data, parent_context=None, trace_id="trace-123",
+        )
+
+        assert processor._reverse_handoffs_dict.get("AgentB:trace-123") == "AgentA"
+
+    def test_unknown_agents_fallback(self, tracer_and_exporter, processor):
+        """None agent names → 'unknown' in span name."""
+        from agents import HandoffSpanData
+
+        handoff_data = HandoffSpanData(from_agent=None, to_agent=None)
+
+        otel_span = processor._start_handoff_span(
+            handoff_data, parent_context=None, trace_id="t3",
+        )
+
+        assert "unknown" in otel_span.name
+
+        otel_span.end()
+
+    def test_span_kind_is_internal(self, tracer_and_exporter, processor):
+        """Handoff spans must be INTERNAL kind."""
+        from agents import HandoffSpanData
+
+        handoff_data = HandoffSpanData(from_agent="A", to_agent="B")
+        otel_span = processor._start_handoff_span(
+            handoff_data, parent_context=None, trace_id="t4",
+        )
+
+        assert otel_span.kind == SpanKind.INTERNAL
+
+        otel_span.end()
+
+
+# ---------------------------------------------------------------------------
+# Tests: _start_function_span
+# ---------------------------------------------------------------------------
+
+class TestStartFunctionSpan:
+    """Unit tests for the extracted _start_function_span handler."""
+
+    def test_returns_span_with_tool_attributes(self, tracer_and_exporter, processor):
+        """Must return a span named '{tool}.tool' with tool attributes."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(name="get_weather", input="", output="")
+
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+
+        assert otel_span is not None
+        assert otel_span.name == "get_weather.tool"
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_TOOL_NAME] == "get_weather"
+        assert attrs[GenAIAttributes.GEN_AI_TOOL_TYPE] == "function"
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+        assert attrs[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.TOOL.value
+
+        otel_span.end()
+
+    def test_unknown_tool_name_defaults(self, tracer_and_exporter, processor):
+        """Tool with no name → 'unknown_tool'."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(name=None, input="", output="")
+
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+
+        assert otel_span.name == "unknown_tool.tool"
+
+        otel_span.end()
+
+    def test_description_attribute_set(self, tracer_and_exporter, processor):
+        """Non-generic descriptions must appear as GEN_AI_TOOL_DESCRIPTION."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(name="search", input="", output="")
+        func_data.description = "Search the web for information"
+
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_TOOL_DESCRIPTION] == "Search the web for information"
+
+        otel_span.end()
+
+    def test_generic_description_filtered_out(self, tracer_and_exporter, processor):
+        """Descriptions starting with 'Represents a Function Span' must be ignored."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(name="search", input="", output="")
+        func_data.description = "Represents a Function Span for search"
+
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+
+        attrs = dict(otel_span.attributes)
+        assert GenAIAttributes.GEN_AI_TOOL_DESCRIPTION not in attrs
+
+        otel_span.end()
+
+    def test_span_kind_is_internal(self, tracer_and_exporter, processor):
+        """Function/tool spans must be INTERNAL kind."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(name="tool", input="", output="")
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+
+        assert otel_span.kind == SpanKind.INTERNAL
+
+        otel_span.end()
+
+
+# ---------------------------------------------------------------------------
+# Tests: _end_function_span
+# ---------------------------------------------------------------------------
+
+class TestEndFunctionSpan:
+    """Unit tests for _end_function_span — sets tool call arguments/result."""
+
+    def test_sets_tool_call_arguments_and_result(self, tracer_and_exporter, processor):
+        """Must set gen_ai.tool.call.arguments and gen_ai.tool.call.result."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(
+            name="get_weather", input='{"city": "NYC"}', output='{"temp": 72}'
+        )
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+        processor._end_function_span(otel_span, func_data, trace_content=True)
+        otel_span.end()
+
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS] == '{"city": "NYC"}'
+        assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT] == '{"temp": 72}'
+
+    def test_content_gated_when_false(self, tracer_and_exporter, processor):
+        """Must NOT set arguments/result when trace_content is False."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(
+            name="get_weather", input='{"city": "NYC"}', output='{"temp": 72}'
+        )
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+        processor._end_function_span(otel_span, func_data, trace_content=False)
+        otel_span.end()
+
+        attrs = dict(otel_span.attributes)
+        assert GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS not in attrs
+        assert GenAIAttributes.GEN_AI_TOOL_CALL_RESULT not in attrs
+
+    def test_none_input_output_omitted(self, tracer_and_exporter, processor):
+        """None input/output must not produce attributes."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(name="noop", input=None, output=None)
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+        processor._end_function_span(otel_span, func_data, trace_content=True)
+        otel_span.end()
+
+        attrs = dict(otel_span.attributes)
+        assert GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS not in attrs
+        assert GenAIAttributes.GEN_AI_TOOL_CALL_RESULT not in attrs
+
+    def test_non_string_output_coerced(self, tracer_and_exporter, processor):
+        """Non-string output must be str()-converted."""
+        from agents import FunctionSpanData
+
+        func_data = FunctionSpanData(name="calc", input="2+2", output=4)
+        otel_span = processor._start_function_span(func_data, parent_context=None)
+        processor._end_function_span(otel_span, func_data, trace_content=True)
+        otel_span.end()
+
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "2+2"
+        assert attrs[GenAIAttributes.GEN_AI_TOOL_CALL_RESULT] == "4"
+
+
+# ---------------------------------------------------------------------------
+# Tests: _start_generation_span
+# ---------------------------------------------------------------------------
+
+class TestStartGenerationSpan:
+    """Unit tests for the extracted _start_generation_span handler."""
+
+    def test_returns_span_with_chat_attributes(self, tracer_and_exporter, processor):
+        """GenerationSpanData (no span_data) → operation_name=chat."""
+        otel_span = processor._start_generation_span(parent_context=None)
+
+        assert otel_span is not None
+        assert otel_span.name == "openai.response"
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat"
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+
+        otel_span.end()
+
+    def test_response_span_data_uses_chat_operation(self, tracer_and_exporter, processor):
+        """ResponseSpanData (OpenAI Responses API) must emit 'chat' as operation name.
+
+        'generate_content' is the GCP/Gemini well-known value; it must not be used for
+        an OpenAI Responses API span, which is a chat completion surface.
+        """
+        class ResponseSpanData:
+            model = "gpt-4o"
+
+        otel_span = processor._start_generation_span(
+            parent_context=None, span_data=ResponseSpanData()
+        )
+
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", (
+            f"ResponseSpanData (OpenAI Responses API) must emit 'chat', "
+            f"got '{attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}'"
+        )
+
+        otel_span.end()
+
+    def test_span_kind_is_client(self, tracer_and_exporter, processor):
+        """Generation/response spans must be CLIENT kind."""
+        otel_span = processor._start_generation_span(parent_context=None)
+
+        assert otel_span.kind == SpanKind.CLIENT
+
+        otel_span.end()
+
+
+# ---------------------------------------------------------------------------
+# Tests: _start_realtime_span
+# ---------------------------------------------------------------------------
+
+class TestStartRealtimeSpan:
+    """Unit tests for the extracted _start_realtime_span handler."""
+
+    def test_speech_span_attributes(self, tracer_and_exporter, processor):
+        """Speech span must have correct name and operation."""
+        span_data = MagicMock()
+        span_data.model = "gpt-4o-realtime-preview"
+
+        otel_span = processor._start_realtime_span(
+            span_data, parent_context=None,
+            span_name="openai.realtime.speech", operation="speech",
+        )
+
+        assert otel_span is not None
+        assert otel_span.name == "openai.realtime.speech"
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "speech"
+        assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai"
+        assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "gpt-4o-realtime-preview"
+
+        otel_span.end()
+
+    def test_transcription_span_attributes(self, tracer_and_exporter, processor):
+        """Transcription span must have correct name and operation."""
+        span_data = MagicMock()
+        span_data.model = "whisper-1"
+
+        otel_span = processor._start_realtime_span(
+            span_data, parent_context=None,
+            span_name="openai.realtime.transcription", operation="transcription",
+        )
+
+        assert otel_span.name == "openai.realtime.transcription"
+        attrs = dict(otel_span.attributes)
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "transcription"
+        assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "whisper-1"
+
+        otel_span.end()
+
+    def test_speech_group_span_no_model(self, tracer_and_exporter, processor):
+        """Speech group span with no model → model attribute omitted."""
+        span_data = MagicMock(spec=[])  # no attributes at all
+
+        otel_span = processor._start_realtime_span(
+            span_data, parent_context=None,
+            span_name="openai.realtime.speech_group", operation="speech_group",
+        )
+
+        assert otel_span.name == "openai.realtime.speech_group"
+        attrs = dict(otel_span.attributes)
+        assert GenAIAttributes.GEN_AI_REQUEST_MODEL not in attrs
+
+        otel_span.end()
+
+    def test_span_kind_is_client(self, tracer_and_exporter, processor):
+        """All realtime spans must be CLIENT kind."""
+        span_data = MagicMock(spec=[])
+
+        otel_span = processor._start_realtime_span(
+            span_data, parent_context=None,
+            span_name="openai.realtime.speech", operation="speech",
+        )
+
+        assert otel_span.kind == SpanKind.CLIENT
+
+        otel_span.end()
+
+
+# ---------------------------------------------------------------------------
+# Tests: _extract_tool_definitions (pure function)
+# ---------------------------------------------------------------------------
+
+class TestExtractToolDefinitions:
+    """Unit tests for the extracted _extract_tool_definitions helper."""
+
+    def test_function_wrapped_tool(self):
+        """Tool with .function wrapper → {type, function: {name, description, parameters}}."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_tool_definitions,
+        )
+
+        func = MagicMock()
+        func.name = "get_weather"
+        func.description = "Get weather data"
+        func.parameters = {"type": "object", "properties": {"city": {"type": "string"}}}
+
+        tool = MagicMock()
+        tool.function = func
+        tool.type = "function"
+
+        result = _extract_tool_definitions([tool])
+
+        assert len(result) == 1
+        assert result[0]["type"] == "function"
+        assert result[0]["function"]["name"] == "get_weather"
+        assert result[0]["function"]["description"] == "Get weather data"
+        assert "properties" in result[0]["function"]["parameters"]
+
+    def test_direct_function_tool(self):
+        """Tool with direct .name (no .function wrapper) → wrapped {type, function} shape."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_tool_definitions,
+        )
+
+        tool = MagicMock(spec=["name", "description", "parameters"])
+        tool.name = "search"
+        tool.description = "Search the web"
+        tool.parameters = {"type": "object"}
+
+        result = _extract_tool_definitions([tool])
+
+        assert len(result) == 1
+        assert result[0]["type"] == "function"
+        assert result[0]["function"]["name"] == "search"
+        assert result[0]["function"]["description"] == "Search the web"
+
+    def test_empty_tools_list(self):
+        """Empty tools list → empty result."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_tool_definitions,
+        )
+
+        result = _extract_tool_definitions([])
+        assert result == []
+
+    def test_none_tools(self):
+        """None tools → empty result."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_tool_definitions,
+        )
+
+        result = _extract_tool_definitions(None)
+        assert result == []
+
+    def test_mixed_tool_formats(self):
+        """Mix of function-wrapped and direct tools."""
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_tool_definitions,
+        )
+
+        func = MagicMock()
+        func.name = "tool_a"
+        func.description = "Tool A"
+        func.parameters = {}
+
+        wrapped = MagicMock()
+        wrapped.function = func
+        wrapped.type = "function"
+
+        direct = MagicMock(spec=["name", "description"])
+        direct.name = "tool_b"
+        direct.description = "Tool B"
+
+        result = _extract_tool_definitions([wrapped, direct])
+        assert len(result) == 2
+        for d in result:
+            assert d["type"] == "function"
+            assert "function" in d
+        names = {r["function"]["name"] for r in result}
+        assert names == {"tool_a", "tool_b"}
+
+    def test_both_branches_produce_consistent_wrapped_shape(self):
+        """Both function-wrapped and direct-function tools must produce the same shape.
+
+        Semconv note [14]: gen_ai.tool.definitions should use source system's
+        representation. For OpenAI that's always {type:'function', function:{...}}.
+        """
+        from opentelemetry.instrumentation.openai_agents._hooks import (
+            _extract_tool_definitions,
+        )
+        from types import SimpleNamespace
+
+        wrapped = SimpleNamespace(
+            type="function",
+            function=SimpleNamespace(
+                name="search", description="Search things", parameters={"type": "object"}
+            ),
+        )
+        direct = SimpleNamespace(
+            name="lookup", description="Look up things", parameters={"type": "object"}
+        )
+
+        defs = _extract_tool_definitions([wrapped, direct])
+
+        assert len(defs) == 2
+        for d in defs:
+            assert d["type"] == "function", f"Missing type wrapper: {d}"
+            assert "function" in d, f"Missing function wrapper: {d}"
+            assert "name" in d["function"]
+
+
+# ---------------------------------------------------------------------------
+# Tests: _end_generation_span
+# ---------------------------------------------------------------------------
+
+class TestEndGenerationSpan:
+    """Unit tests for the extracted _end_generation_span method."""
+
+    def test_extracts_prompt_attributes(self, tracer_and_exporter, processor):
+        """Must call _extract_prompt_attributes with input data."""
+        tracer, exporter = tracer_and_exporter
+        otel_span = tracer.start_span("test-gen")
+
+        span_data = MagicMock()
+        span_data.input = [{"role": "user", "content": "Hello"}]
+        span_data.response = None
+
+        processor._end_generation_span(otel_span, span_data, trace_content=True)
+
+        raw = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw is not None
+        messages = json.loads(raw)
+        assert messages[0]["role"] == "user"
+
+        otel_span.end()
+
+    def test_extracts_tool_definitions(self, tracer_and_exporter, processor):
+        """Must extract and set tool definitions from response.tools."""
+        tracer, exporter = tracer_and_exporter
+        otel_span = tracer.start_span("test-gen")
+
+        func = MagicMock()
+        func.name = "search"
+        func.description = "Search"
+        func.parameters = {}
+        tool = MagicMock()
+        tool.function = func
+        tool.type = "function"
+
+        response = MagicMock()
+        response.tools = [tool]
+        response.output = []
+        response.model = "gpt-4o"
+        response.id = "resp-1"
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.frequency_penalty = None
+        response.finish_reason = "stop"
+        response.usage = None
+
+        span_data = MagicMock()
+        span_data.input = []
+        span_data.response = response
+        span_data.tools = None
+
+        processor._end_generation_span(otel_span, span_data, trace_content=True)
+
+        raw = otel_span.attributes.get(GenAIAttributes.GEN_AI_TOOL_DEFINITIONS)
+        assert raw is not None
+        defs = json.loads(raw)
+        assert len(defs) == 1
+        assert defs[0]["function"]["name"] == "search"
+
+        otel_span.end()
+
+    def test_no_tool_definitions_when_content_gated(self, tracer_and_exporter, processor):
+        """Tool definitions must NOT be set when trace_content=False."""
+        tracer, exporter = tracer_and_exporter
+        otel_span = tracer.start_span("test-gen")
+
+        func = MagicMock()
+        func.name = "search"
+        func.description = "Search"
+        func.parameters = {}
+        tool = MagicMock()
+        tool.function = func
+        tool.type = "function"
+
+        response = MagicMock()
+        response.tools = [tool]
+        response.output = []
+        response.model = "gpt-4o"
+        response.id = "resp-1"
+        response.temperature = None
+        response.max_output_tokens = None
+        response.top_p = None
+        response.frequency_penalty = None
+        response.finish_reason = "stop"
+        response.usage = None
+
+        span_data = MagicMock()
+        span_data.input = []
+        span_data.response = response
+
+        processor._end_generation_span(otel_span, span_data, trace_content=False)
+
+        assert GenAIAttributes.GEN_AI_TOOL_DEFINITIONS not in otel_span.attributes
+
+        otel_span.end()
+
+    def test_extracts_response_attributes(self, tracer_and_exporter, processor):
+        """Must extract response model, id, etc."""
+        tracer, exporter = tracer_and_exporter
+        otel_span = tracer.start_span("test-gen")
+
+        content_item = MagicMock()
+        content_item.type = "output_text"
+        content_item.text = "Hello!"
+
+        output_msg = MagicMock()
+        output_msg.type = "message"
+        output_msg.content = [content_item]
+        output_msg.role = "assistant"
+        output_msg.name = None
+
+        response = MagicMock()
+        response.tools = []
+        response.output = [output_msg]
+        response.model = "gpt-4o-mini"
+        response.id = "resp-abc"
+        response.temperature = 0.7
+        response.max_output_tokens = 100
+        response.top_p = 1.0
+        response.frequency_penalty = None
+        response.finish_reason = "stop"
+        response.usage = None
+
+        span_data = MagicMock()
+        span_data.input = []
+        span_data.response = response
+
+        processor._end_generation_span(otel_span, span_data, trace_content=True)
+
+        assert otel_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "gpt-4o-mini"
+        assert otel_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_ID) == "resp-abc"
+
+        otel_span.end()
+
+    def test_no_response_no_crash(self, tracer_and_exporter, processor):
+        """span_data.response=None must not raise."""
+        tracer, exporter = tracer_and_exporter
+        otel_span = tracer.start_span("test-gen")
+
+        span_data = MagicMock()
+        span_data.input = []
+        span_data.response = None
+
+        # Should not raise
+        processor._end_generation_span(otel_span, span_data, trace_content=True)
+
+        otel_span.end()
+
+    def test_tools_sourced_from_span_data_over_response(self, tracer_and_exporter, processor):
+        """Tool definitions should come from span_data (request), not response.
+
+        Tools are request metadata; the response may not always echo them.
+        """
+        from types import SimpleNamespace
+
+        tracer, exporter = tracer_and_exporter
+        otel_span = tracer.start_span("test-gen")
+
+        request_tool = SimpleNamespace(
+            type="function",
+            function=SimpleNamespace(name="from_request", description="Request", parameters={}),
+        )
+        response_tool = SimpleNamespace(
+            type="function",
+            function=SimpleNamespace(name="from_response", description="Response", parameters={}),
+        )
+
+        span_data = SimpleNamespace(
+            input=[],
+            response=SimpleNamespace(
+                temperature=None, max_output_tokens=None, top_p=None,
+                model="gpt-4o", id="resp_1", frequency_penalty=None,
+                finish_reason=None, status="completed", output=[], usage=None,
+                tools=[response_tool],
+            ),
+            tools=[request_tool],
+            model="gpt-4o",
+        )
+
+        processor._end_generation_span(otel_span, span_data, trace_content=True)
+
+        raw = otel_span.attributes.get(GenAIAttributes.GEN_AI_TOOL_DEFINITIONS)
+        assert raw is not None
+        defs = json.loads(raw)
+        tool_names = [d["function"]["name"] for d in defs]
+        assert "from_request" in tool_names, f"Expected request tool, got: {tool_names}"
+
+        otel_span.end()
+
+
+# ---------------------------------------------------------------------------
+# Tests: _set_realtime_io_attributes
+# ---------------------------------------------------------------------------
+
+class TestSetRealtimeIOAttributes:
+    """Unit tests for the extracted _set_realtime_io_attributes method."""
+
+    def test_speech_span_input_and_output(self, tracer_and_exporter, processor):
+        """SpeechSpanData with input text and output text → both messages set."""
+        tracer, _ = tracer_and_exporter
+        otel_span = tracer.start_span("test-rt")
+
+        span_data = MagicMock()
+        span_data.input = "What is the weather?"
+        span_data.output = "It's sunny."
+
+        processor._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+
+        raw_in = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw_in is not None
+        in_msgs = json.loads(raw_in)
+        assert in_msgs[0]["role"] == "user"
+        assert in_msgs[0]["parts"][0]["content"] == "What is the weather?"
+
+        raw_out = otel_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        assert raw_out is not None
+        out_msgs = json.loads(raw_out)
+        assert out_msgs[0]["role"] == "assistant"
+        assert out_msgs[0]["parts"][0]["content"] == "It's sunny."
+
+        otel_span.end()
+
+    def test_transcription_span_input_and_output(self, tracer_and_exporter, processor):
+        """TranscriptionSpanData with audio input (non-binary) and text output."""
+        tracer, _ = tracer_and_exporter
+        otel_span = tracer.start_span("test-rt")
+
+        span_data = MagicMock()
+        span_data.input = "audio-description-text"
+        span_data.output = "Transcribed text here"
+
+        processor._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+
+        raw_in = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw_in is not None
+
+        raw_out = otel_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        assert raw_out is not None
+
+        otel_span.end()
+
+    def test_binary_input_skipped(self, tracer_and_exporter, processor):
+        """Binary input (bytes/bytearray) must NOT be set as input message."""
+        tracer, _ = tracer_and_exporter
+        otel_span = tracer.start_span("test-rt")
+
+        span_data = MagicMock()
+        span_data.input = b"\x00\x01\x02"
+        span_data.output = "Transcribed"
+
+        processor._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+
+        assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in otel_span.attributes
+
+        otel_span.end()
+
+    def test_binary_output_skipped(self, tracer_and_exporter, processor):
+        """Binary output (bytes/bytearray) must NOT be set as output message."""
+        tracer, _ = tracer_and_exporter
+        otel_span = tracer.start_span("test-rt")
+
+        span_data = MagicMock()
+        span_data.input = "Hello"
+        span_data.output = b"\x00\x01\x02"
+
+        processor._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+
+        assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in otel_span.attributes
+
+        otel_span.end()
+
+    def test_speech_group_no_output(self, tracer_and_exporter, processor):
+        """SpeechGroupSpanData with has_output=False → only input set."""
+        tracer, _ = tracer_and_exporter
+        otel_span = tracer.start_span("test-rt")
+
+        span_data = MagicMock()
+        span_data.input = "Group input"
+        span_data.output = None
+
+        processor._set_realtime_io_attributes(otel_span, span_data, has_output=False)
+
+        raw_in = otel_span.attributes.get(GenAIAttributes.GEN_AI_INPUT_MESSAGES)
+        assert raw_in is not None
+        assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in otel_span.attributes
+
+        otel_span.end()
+
+    def test_none_input_skipped(self, tracer_and_exporter, processor):
+        """None input → no input message attribute."""
+        tracer, _ = tracer_and_exporter
+        otel_span = tracer.start_span("test-rt")
+
+        span_data = MagicMock()
+        span_data.input = None
+        span_data.output = "Output text"
+
+        processor._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+
+        assert GenAIAttributes.GEN_AI_INPUT_MESSAGES not in otel_span.attributes
+
+        otel_span.end()
+
+    def test_output_has_finish_reason_empty(self, tracer_and_exporter, processor):
+        """Realtime output messages must include finish_reason: '' (empty string)."""
+        tracer, _ = tracer_and_exporter
+        otel_span = tracer.start_span("test-rt")
+
+        span_data = MagicMock()
+        span_data.input = None
+        span_data.output = "Some output"
+
+        processor._set_realtime_io_attributes(otel_span, span_data, has_output=True)
+
+        raw_out = otel_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
+        out_msgs = json.loads(raw_out)
+        assert out_msgs[0]["finish_reason"] == ""
+
+        otel_span.end()
+
+
+# ---------------------------------------------------------------------------
+# Integration: on_span_start/on_span_end still work end-to-end
+# (These confirm refactoring doesn't break the public API)
+# ---------------------------------------------------------------------------
+
+class TestOnSpanStartEndToEnd:
+    """Verify on_span_start dispatches correctly after refactoring."""
+
+    def _run_span(self, processor, exporter, span_data, trace_id="e2e-trace"):
+        mock_trace = MagicMock()
+        mock_trace.trace_id = trace_id
+        processor.on_trace_start(mock_trace)
+
+        span = MockAgentSpan(span_data, trace_id=trace_id)
+        processor.on_span_start(span)
+        processor.on_span_end(span)
+        processor.on_trace_end(mock_trace)
+
+        return exporter.get_finished_spans()
+
+    def test_agent_span_created(self, tracer_and_exporter, processor):
+        from agents import AgentSpanData
+        _, exporter = tracer_and_exporter
+
+        spans = self._run_span(
+            processor, exporter,
+            AgentSpanData(name="TestAgent", handoffs=[], tools=[], output_type=""),
+        )
+        names = [s.name for s in spans]
+        assert "TestAgent.agent" in names
+
+    def test_handoff_span_created(self, tracer_and_exporter, processor):
+        from agents import HandoffSpanData
+        _, exporter = tracer_and_exporter
+
+        spans = self._run_span(
+            processor, exporter,
+            HandoffSpanData(from_agent="A", to_agent="B"),
+        )
+        names = [s.name for s in spans]
+        assert any("handoff" in n for n in names)
+
+    def test_function_span_created(self, tracer_and_exporter, processor):
+        from agents import FunctionSpanData
+        _, exporter = tracer_and_exporter
+
+        spans = self._run_span(
+            processor, exporter,
+            FunctionSpanData(name="my_tool", input="", output=""),
+        )
+        names = [s.name for s in spans]
+        assert "my_tool.tool" in names
+
+    def test_generation_span_created(self, tracer_and_exporter, processor):
+        from agents import GenerationSpanData
+        _, exporter = tracer_and_exporter
+
+        spans = self._run_span(
+            processor, exporter,
+            GenerationSpanData(model="gpt-4o", model_config={}),
+        )
+        names = [s.name for s in spans]
+        assert "openai.response" in names
+
+    def test_error_status_propagated(self, tracer_and_exporter, processor):
+        from agents import FunctionSpanData
+        _, exporter = tracer_and_exporter
+
+        mock_trace = MagicMock()
+        mock_trace.trace_id = "err-trace"
+        processor.on_trace_start(mock_trace)
+
+        span_data = FunctionSpanData(name="fail_tool", input="", output="")
+        span = MockAgentSpan(span_data, trace_id="err-trace", error=RuntimeError("boom"))
+        processor.on_span_start(span)
+        processor.on_span_end(span)
+        processor.on_trace_end(mock_trace)
+
+        spans = exporter.get_finished_spans()
+        tool_span = next(s for s in spans if s.name == "fail_tool.tool")
+        assert tool_span.status.status_code.name == "ERROR"
diff --git a/packages/opentelemetry-instrumentation-openai-agents/uv.lock b/packages/opentelemetry-instrumentation-openai-agents/uv.lock
index 0714339f61..a91d806b71 100644
--- a/packages/opentelemetry-instrumentation-openai-agents/uv.lock
+++ b/packages/opentelemetry-instrumentation-openai-agents/uv.lock
@@ -698,15 +698,12 @@ wheels = [
 ]
 
 [[package]]
-name = "griffe"
-version = "1.15.0"
+name = "griffelib"
+version = "2.0.2"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0d/0c/3a471b6e31951dce2360477420d0a8d1e00dea6cf33b70f3e8c3ab6e28e1/griffe-1.15.0.tar.gz", hash = "sha256:7726e3afd6f298fbc3696e67958803e7ac843c1cfe59734b6251a40cdbfb5eea", size = 424112, upload-time = "2025-11-10T15:03:15.52Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9d/82/74f4a3310cdabfbb10da554c3a672847f1ed33c6f61dd472681ce7f1fe67/griffelib-2.0.2.tar.gz", hash = "sha256:3cf20b3bc470e83763ffbf236e0076b1211bac1bc67de13daf494640f2de707e", size = 166461, upload-time = "2026-03-27T11:34:51.091Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9c/83/3b1d03d36f224edded98e9affd0467630fc09d766c0e56fb1498cbb04a9b/griffe-1.15.0-py3-none-any.whl", hash = "sha256:6f6762661949411031f5fcda9593f586e6ce8340f0ba88921a0f2ef7a81eb9a3", size = 150705, upload-time = "2025-11-10T15:03:13.549Z" },
+    { url = "https://files.pythonhosted.org/packages/11/8c/c9138d881c79aa0ea9ed83cbd58d5ca75624378b38cee225dcf5c42cc91f/griffelib-2.0.2-py3-none-any.whl", hash = "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1", size = 142357, upload-time = "2026-03-27T11:34:46.275Z" },
 ]
 
 [[package]]
@@ -1244,7 +1241,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.20.0"
+version = "2.32.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1256,27 +1253,28 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6e/5a/f495777c02625bfa18212b6e3b73f1893094f2bf660976eb4bc6f43a1ca2/openai-2.20.0.tar.gz", hash = "sha256:2654a689208cd0bf1098bb9462e8d722af5cbe961e6bba54e6f19fb843d88db1", size = 642355, upload-time = "2026-02-10T19:02:54.145Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/59/bdcc6b759b8c42dd73afaf5bf8f902c04b37987a5514dbc1c64dba390fef/openai-2.32.0.tar.gz", hash = "sha256:c54b27a9e4cb8d51f0dd94972ffd1a04437efeb259a9e60d8922b8bd26fe55e0", size = 693286, upload-time = "2026-04-15T22:28:19.434Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b5/a0/cf4297aa51bbc21e83ef0ac018947fa06aea8f2364aad7c96cbf148590e6/openai-2.20.0-py3-none-any.whl", hash = "sha256:38d989c4b1075cd1f76abc68364059d822327cf1a932531d429795f4fc18be99", size = 1098479, upload-time = "2026-02-10T19:02:52.157Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/c1/d6e64ccd0536bf616556f0cad2b6d94a8125f508d25cfd814b1d2db4e2f1/openai-2.32.0-py3-none-any.whl", hash = "sha256:4dcc9badeb4bf54ad0d187453742f290226d30150890b7890711bda4f32f192f", size = 1162570, upload-time = "2026-04-15T22:28:17.714Z" },
 ]
 
 [[package]]
 name = "openai-agents"
-version = "0.8.4"
+version = "0.14.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "griffe" },
+    { name = "griffelib" },
     { name = "mcp" },
     { name = "openai" },
     { name = "pydantic" },
     { name = "requests" },
     { name = "types-requests" },
     { name = "typing-extensions" },
+    { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ed/e0/9fa9eac9baf2816bc63cee28967d35a7ed9dc2f25e9fd2004f48ed6c8820/openai_agents-0.8.4.tar.gz", hash = "sha256:5d4c4861aedd56a82b15c6ddf6c53031a39859a222f08bbd5645d5967efa05e8", size = 2389744, upload-time = "2026-02-11T19:14:30.75Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e2/2c/74fea5c1b20f107d19742af400045542e17eb449d044f950758a6f68f32f/openai_agents-0.14.2.tar.gz", hash = "sha256:fd4e1ae84e00aef4339ad39913468a0da34cba8ff5382925d38028ac3b435ef2", size = 5288888, upload-time = "2026-04-18T00:17:39.709Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/55/dc/10df015aebb0797a8367aab65200ac4f5221df20bbae76930f5b6ac8e001/openai_agents-0.8.4-py3-none-any.whl", hash = "sha256:2383c6e8e59ed4146b89d1b6f53e34e55caf94bc14ae3fd704e7aad5021f4ff1", size = 380662, upload-time = "2026-02-11T19:14:28.864Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/02/bb3ddff9bca543cbcca0d362a645b03a0708b6c2cd6eb620d5f3de810bb3/openai_agents-0.14.2-py3-none-any.whl", hash = "sha256:acd75132f2ec70413a4e95680b629ccbc6f4f5d0d63414980153de09d409e695", size = 807364, upload-time = "2026-04-18T00:17:37.505Z" },
 ]
 
 [[package]]
@@ -1309,7 +1307,7 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-instrumentation-openai-agents"
-version = "0.53.3"
+version = "0.59.2"
 source = { editable = "." }
 dependencies = [
     { name = "opentelemetry-api" },
@@ -1360,7 +1358,7 @@ dev = [
 ]
 test = [
     { name = "litellm", specifier = ">=1.71.2,<2" },
-    { name = "openai-agents", specifier = ">=0.6.9" },
+    { name = "openai-agents", specifier = ">=0.14.2" },
     { name = "opentelemetry-sdk", specifier = ">=1.38.0,<2" },
     { name = "pytest", specifier = ">=8.2.2,<9" },
     { name = "pytest-asyncio", specifier = ">=1.0.0,<2" },
@@ -2491,6 +2489,65 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3a/d7/f79b05a5d728f8786876a7d75dfb0c5cae27e428081b2d60152fb52f155f/vcrpy-8.1.1-py3-none-any.whl", hash = "sha256:2d16f31ad56493efb6165182dd99767207031b0da3f68b18f975545ede8ac4b9", size = 42445, upload-time = "2026-01-04T19:22:02.532Z" },
 ]
 
+[[package]]
+name = "websockets"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423, upload-time = "2025-03-05T20:01:35.363Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080, upload-time = "2025-03-05T20:01:37.304Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329, upload-time = "2025-03-05T20:01:39.668Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312, upload-time = "2025-03-05T20:01:41.815Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319, upload-time = "2025-03-05T20:01:43.967Z" },
+    { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631, upload-time = "2025-03-05T20:01:46.104Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016, upload-time = "2025-03-05T20:01:47.603Z" },
+    { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426, upload-time = "2025-03-05T20:01:48.949Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360, upload-time = "2025-03-05T20:01:50.938Z" },
+    { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388, upload-time = "2025-03-05T20:01:52.213Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830, upload-time = "2025-03-05T20:01:53.922Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423, upload-time = "2025-03-05T20:01:56.276Z" },
+    { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082, upload-time = "2025-03-05T20:01:57.563Z" },
+    { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330, upload-time = "2025-03-05T20:01:59.063Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878, upload-time = "2025-03-05T20:02:00.305Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883, upload-time = "2025-03-05T20:02:03.148Z" },
+    { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252, upload-time = "2025-03-05T20:02:05.29Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521, upload-time = "2025-03-05T20:02:07.458Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958, upload-time = "2025-03-05T20:02:09.842Z" },
+    { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918, upload-time = "2025-03-05T20:02:11.968Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388, upload-time = "2025-03-05T20:02:13.32Z" },
+    { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828, upload-time = "2025-03-05T20:02:14.585Z" },
+    { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" },
+    { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" },
+    { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" },
+    { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" },
+    { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" },
+    { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" },
+    { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" },
+    { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" },
+    { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" },
+    { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109, upload-time = "2025-03-05T20:03:17.769Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343, upload-time = "2025-03-05T20:03:19.094Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599, upload-time = "2025-03-05T20:03:21.1Z" },
+    { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207, upload-time = "2025-03-05T20:03:23.221Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155, upload-time = "2025-03-05T20:03:25.321Z" },
+    { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884, upload-time = "2025-03-05T20:03:27.934Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
+]
+
 [[package]]
 name = "wrapt"
 version = "1.17.3"
diff --git a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py
index 453e34118d..4650a19e1e 100644
--- a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py
+++ b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/_testing.py
@@ -377,6 +377,45 @@ def test_core_metrics_unchanged(self):
         assert Meters.LLM_OPERATION_DURATION == "gen_ai.client.operation.duration"
 
 
+# ---------------------------------------------------------------------------
+# Upstream OTel GenAI constants — message & tool attributes
+# ---------------------------------------------------------------------------
+
+
+class TestUpstreamGenAIMessageAttributes:
+    """Verify upstream OTel constants for messages/tools are importable and correct."""
+
+    def test_gen_ai_input_messages(self):
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        assert GenAIAttributes.GEN_AI_INPUT_MESSAGES == "gen_ai.input.messages"
+
+    def test_gen_ai_output_messages(self):
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES == "gen_ai.output.messages"
+
+    def test_gen_ai_tool_definitions(self):
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        assert GenAIAttributes.GEN_AI_TOOL_DEFINITIONS == "gen_ai.tool.definitions"
+
+    def test_gen_ai_provider_name(self):
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME == "gen_ai.provider.name"
+
+    def test_gen_ai_operation_name(self):
+        from opentelemetry.semconv._incubating.attributes import (
+            gen_ai_attributes as GenAIAttributes,
+        )
+        assert GenAIAttributes.GEN_AI_OPERATION_NAME == "gen_ai.operation.name"
+
+
 class TestMetersVendorNamespacesKept:
     """
     Vendor-qualified metric names (llm.openai.*, llm.anthropic.*, llm.watsonx.*)