diff --git a/sdk/ai/azure-ai-projects/CHANGELOG.md b/sdk/ai/azure-ai-projects/CHANGELOG.md index 08840fce6a97..034b155b7702 100644 --- a/sdk/ai/azure-ai-projects/CHANGELOG.md +++ b/sdk/ai/azure-ai-projects/CHANGELOG.md @@ -21,6 +21,7 @@ * Rename class `MicrosoftFabricAgentTool` to `MicrosoftFabricPreviewTool`. * Rename class `SharepointAgentTool` to `SharepointPreviewTool`. * Rename class `ItemParam` to `InputItem`. +* Tracing: workflow actions in conversation item listings are now emitted as "gen_ai.conversation.item" events (with role="workflow") instead of "gen_ai.workflow.action" events in the list_conversation_items span. ## 2.0.0b3 (2026-01-06) diff --git a/sdk/ai/azure-ai-projects/assets.json b/sdk/ai/azure-ai-projects/assets.json index 490af2ec735e..cf8a8fe386d2 100644 --- a/sdk/ai/azure-ai-projects/assets.json +++ b/sdk/ai/azure-ai-projects/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "python", "TagPrefix": "python/ai/azure-ai-projects", - "Tag": "python/ai/azure-ai-projects_6f9985fe6d" + "Tag": "python/ai/azure-ai-projects_7cddb7d06f" } diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py index de2939f9a30f..904f714fd7ae 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py @@ -3998,9 +3998,7 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many- # Wrap in parts array for semantic convention compliance parts: List[Dict[str, Any]] = [{"type": "workflow_action", "content": workflow_details}] event_body = [{"role": role, "parts": parts}] - - # Use generic event name for workflow actions - event_name = GEN_AI_WORKFLOW_ACTION_EVENT + event_name = GEN_AI_CONVERSATION_ITEM_EVENT elif item_type == "message": # Regular message - use content format for consistency diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py index 3a7fbdff92ac..66fb4fb0972a 100644 --- a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py @@ -3989,30 +3989,38 @@ def test_workflow_agent_non_streaming_with_content_recording(self, **kwargs): assert "content" in part assert "status" in part["content"] - # Verify conversation items listing span also has workflow actions + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items - for event in list_workflow_events: + # Verify conversation item event content structure - check for workflow items + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # With content recording ON, action_id should be present - assert "action_id" in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # With content recording ON, action_id and previous_action_id should be present + assert ( + "action_id" in part["content"] + ), "action_id should be present when content recording is enabled" + assert ( + "previous_action_id" in part["content"] + ), "previous_action_id should be present when content recording is enabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_without_content") @servicePreparer() @@ -4095,31 +4103,38 @@ def test_workflow_agent_non_streaming_without_content_recording(self, **kwargs): assert "action_id" not in part["content"] assert "previous_action_id" not in part["content"] - # Verify conversation items listing span also has workflow actions + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items (content recording OFF) - for event in list_workflow_events: + # Verify conversation item event content structure (content recording OFF) + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # action_id and previous_action_id should NOT be present when content recording is off - assert "action_id" not in part["content"] - assert "previous_action_id" not in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # action_id and previous_action_id should NOT be present when content recording is off + assert ( + "action_id" not in part["content"] + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in part["content"] + ), "previous_action_id should not be present when content recording is disabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_with_content") @servicePreparer() @@ -4294,30 +4309,38 @@ def test_workflow_agent_streaming_with_content_recording(self, **kwargs): assert "content" in part assert "status" in part["content"] - # Verify conversation items listing span also has workflow actions + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items - for event in list_workflow_events: + # Verify conversation item event content structure - check for workflow items + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # With content recording ON, action_id should be present - assert "action_id" in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # With content recording ON, action_id and previous_action_id should be present + assert ( + "action_id" in part["content"] + ), "action_id should be present when content recording is enabled" + assert ( + "previous_action_id" in part["content"] + ), "previous_action_id should be present when content recording is enabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_without_content") @servicePreparer() @@ -4405,31 +4428,38 @@ def test_workflow_agent_streaming_without_content_recording(self, **kwargs): assert "action_id" not in part["content"] assert "previous_action_id" not in part["content"] - # Verify conversation items listing span also has workflow actions + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items (content recording OFF) - for event in list_workflow_events: + # Verify conversation item event content structure (content recording OFF) + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # action_id and previous_action_id should NOT be present when content recording is off - assert "action_id" not in part["content"] - assert "previous_action_id" not in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # action_id and previous_action_id should NOT be present when content recording is off + assert ( + "action_id" not in part["content"] + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in part["content"] + ), "previous_action_id should not be present when content recording is disabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_with_content") @servicePreparer() diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py index 31c65c8ea902..461abae0ce90 100644 --- a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py @@ -2899,30 +2899,38 @@ async def test_async_workflow_agent_non_streaming_with_content_recording(self, * assert "content" in part assert "status" in part["content"] - # Verify conversation items listing span also has workflow actions + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items - for event in list_workflow_events: + # Verify conversation item event content structure - check for workflow items + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # With content recording ON, action_id should be present - assert "action_id" in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # With content recording ON, action_id and previous_action_id should be present + assert ( + "action_id" in part["content"] + ), "action_id should be present when content recording is enabled" + assert ( + "previous_action_id" in part["content"] + ), "previous_action_id should be present when content recording is enabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_without_content") @servicePreparer() @@ -3007,34 +3015,45 @@ async def test_async_workflow_agent_non_streaming_without_content_recording(self assert "content" in part assert "status" in part["content"] # action_id and previous_action_id should NOT be present when content recording is off - assert "action_id" not in part["content"] - assert "previous_action_id" not in part["content"] - - # Verify conversation items listing span also has workflow actions + assert ( + "action_id" not in part["content"] + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in part["content"] + ), "previous_action_id should not be present when content recording is disabled" + + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items (content recording OFF) - for event in list_workflow_events: + # Verify conversation item event content structure (content recording OFF) + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # action_id and previous_action_id should NOT be present when content recording is off - assert "action_id" not in part["content"] - assert "previous_action_id" not in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # action_id and previous_action_id should NOT be present when content recording is off + assert ( + "action_id" not in part["content"] + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in part["content"] + ), "previous_action_id should not be present when content recording is disabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_with_content") @servicePreparer() @@ -3124,30 +3143,38 @@ async def test_async_workflow_agent_streaming_with_content_recording(self, **kwa assert "content" in part assert "status" in part["content"] - # Verify conversation items listing span also has workflow actions + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items - for event in list_workflow_events: + # Verify conversation item event content structure - check for workflow items + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # With content recording ON, action_id should be present - assert "action_id" in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # With content recording ON, action_id and previous_action_id should be present + assert ( + "action_id" in part["content"] + ), "action_id should be present when content recording is enabled" + assert ( + "previous_action_id" in part["content"] + ), "previous_action_id should be present when content recording is enabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_without_content") @servicePreparer() @@ -3237,34 +3264,45 @@ async def test_async_workflow_agent_streaming_without_content_recording(self, ** assert "content" in part assert "status" in part["content"] # action_id and previous_action_id should NOT be present when content recording is off - assert "action_id" not in part["content"] - assert "previous_action_id" not in part["content"] - - # Verify conversation items listing span also has workflow actions + assert ( + "action_id" not in part["content"] + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in part["content"] + ), "previous_action_id should not be present when content recording is disabled" + + # Verify conversation items listing span list_spans = self.exporter.get_spans_by_name("list_conversation_items") assert len(list_spans) >= 1 list_span = list_spans[0] - # Check for workflow action events in list items span - list_workflow_events = [e for e in list_span.events if e.name == "gen_ai.workflow.action"] - assert len(list_workflow_events) > 0 + # Check for conversation item events in list items span + list_item_events = [e for e in list_span.events if e.name == "gen_ai.conversation.item"] + assert len(list_item_events) > 0 - # Verify workflow event content structure in list items (content recording OFF) - for event in list_workflow_events: + # Verify conversation item event content structure (content recording OFF) + found_workflow_item = False + for event in list_item_events: content_str = event.attributes.get("gen_ai.event.content", "[]") content = json.loads(content_str) assert isinstance(content, list) - assert len(content) == 1 - assert content[0]["role"] == "workflow" - assert "parts" in content[0] - assert len(content[0]["parts"]) == 1 - part = content[0]["parts"][0] - assert part["type"] == "workflow_action" - assert "content" in part - assert "status" in part["content"] - # action_id and previous_action_id should NOT be present when content recording is off - assert "action_id" not in part["content"] - assert "previous_action_id" not in part["content"] + for item in content: + if item.get("role") == "workflow": + found_workflow_item = True + assert "parts" in item + assert len(item["parts"]) >= 1 + part = item["parts"][0] + assert part["type"] == "workflow_action" + assert "content" in part + assert "status" in part["content"] + # action_id and previous_action_id should NOT be present when content recording is off + assert ( + "action_id" not in part["content"] + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in part["content"] + ), "previous_action_id should not be present when content recording is disabled" + assert found_workflow_item, "Should have found workflow items in conversation items" @pytest.mark.usefixtures("instrument_with_content") @servicePreparer() diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_browser_automation.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_browser_automation.py new file mode 100644 index 000000000000..b17e6d197a0c --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_browser_automation.py @@ -0,0 +1,544 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Tests for ResponsesInstrumentor with browser automation agents. +""" +import os +import pytest +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils import recorded_by_proxy, RecordedTransport +from azure.ai.projects.models import PromptAgentDefinition + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorBrowserAutomation(TestAiAgentsInstrumentorBase): + """Tests for ResponsesInstrumentor with browser automation agents.""" + + # ======================================== + # Sync Browser Automation Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_browser_automation_non_streaming_with_content_recording(self, **kwargs): + """Test synchronous browser automation agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id, + ) + ) + ) + + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are an Agent helping with browser automation tasks.""", + tools=[tool], + ), + ) + + try: + conversation = openai_client.conversations.create() + response = openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response.output is not None + assert len(response.output) > 0 + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + tool_call_events = [e for e in span.events if e.name == "gen_ai.output.messages"] + assert len(tool_call_events) > 0 + found_browser_tool = False + for event in tool_call_events: + if event.attributes and "gen_ai.event.content" in event.attributes: + content = event.attributes["gen_ai.event.content"] + if isinstance(content, str) and "browser_automation_preview_call" in content: + found_browser_tool = True + assert "arguments" in content or "query" in content + assert found_browser_tool + + # Comprehensive event content validation - verify content IS present + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Check that content fields ARE present with content recording ON + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + + openai_client.conversations.delete(conversation_id=conversation.id) + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_browser_automation_non_streaming_without_content_recording(self, **kwargs): + """Test synchronous browser automation agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id, + ) + ) + ) + + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are an Agent helping with browser automation tasks.""", + tools=[tool], + ), + ) + + try: + conversation = openai_client.conversations.create() + response = openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + tool_call_events = [e for e in span.events if e.name == "gen_ai.output.messages"] + for event in tool_call_events: + if event.attributes and "gen_ai.event.content" in event.attributes: + content = event.attributes["gen_ai.event.content"] + if isinstance(content, str) and "browser_automation_preview_call" in content: + assert '"id"' in content + + # Comprehensive event content validation - verify content is NOT present + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Check that content fields are NOT present with content recording OFF + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + + openai_client.conversations.delete(conversation_id=conversation.id) + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Sync Browser Automation Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_browser_automation_streaming_with_content_recording(self, **kwargs): + """Test synchronous browser automation agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "True", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id + ) + ) + ) + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, instructions="""Browser automation helper.""", tools=[tool] + ), + ) + + try: + conversation = openai_client.conversations.create() + stream = openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + for _ in stream: + pass + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + tool_call_events = [e for e in span.events if e.name == "gen_ai.output.messages"] + assert len(tool_call_events) > 0 + + openai_client.conversations.delete(conversation_id=conversation.id) + # Strict event content checks for response generation span (after span assignment) + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("user", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + # Validate content fields ARE present when content recording is enabled + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_browser_automation_streaming_without_content_recording(self, **kwargs): + """Test synchronous browser automation agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "False", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id + ) + ) + ) + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition(model=deployment_name, instructions="Browser helper.", tools=[tool]), + ) + + try: + conversation = openai_client.conversations.create() + stream = openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + for _ in stream: + pass + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Strict event content checks for response generation span (after span assignment) + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("user", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + # Validate content fields are NOT present when content recording is disabled + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + + openai_client.conversations.delete(conversation_id=conversation.id) + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_browser_automation_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_browser_automation_async.py new file mode 100644 index 000000000000..ec677693213d --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_browser_automation_async.py @@ -0,0 +1,532 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Async tests for ResponsesInstrumentor with browser automation agents. +""" +import os +import pytest +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils.aio import recorded_by_proxy_async +from devtools_testutils import RecordedTransport +from azure.ai.projects.models import PromptAgentDefinition + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +import json + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorBrowserAutomationAsync(TestAiAgentsInstrumentorBase): + """Async tests for ResponsesInstrumentor with browser automation agents.""" + + # ======================================== + # Async Browser Automation Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_browser_automation_non_streaming_with_content_recording(self, **kwargs): + """Test asynchronous browser automation agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + async with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id, + ) + ) + ) + + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are an Agent helping with browser automation tasks.""", + tools=[tool], + ), + ) + + try: + conversation = await openai_client.conversations.create() + response = await openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response.output is not None + assert len(response.output) > 0 + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + tool_call_events = [e for e in span.events if e.name == "gen_ai.output.messages"] + assert len(tool_call_events) > 0 + found_browser_tool = False + for event in tool_call_events: + if event.attributes and "gen_ai.event.content" in event.attributes: + content = event.attributes["gen_ai.event.content"] + if isinstance(content, str) and "browser_automation_preview_call" in content: + found_browser_tool = True + assert "arguments" in content or "query" in content + assert found_browser_tool + + # Comprehensive event content validation - verify content IS present + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Check that content fields ARE present with content recording ON + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + + await openai_client.conversations.delete(conversation_id=conversation.id) + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_browser_automation_non_streaming_without_content_recording(self, **kwargs): + """Test asynchronous browser automation agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + async with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id, + ) + ) + ) + + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are an Agent helping with browser automation tasks.""", + tools=[tool], + ), + ) + + try: + conversation = await openai_client.conversations.create() + response = await openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + tool_call_events = [e for e in span.events if e.name == "gen_ai.output.messages"] + for event in tool_call_events: + if event.attributes and "gen_ai.event.content" in event.attributes: + content = event.attributes["gen_ai.event.content"] + if isinstance(content, str) and "browser_automation_preview_call" in content: + assert '"id"' in content + + # Comprehensive event content validation - verify content is NOT present + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Check that content fields are NOT present with content recording OFF + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + + await openai_client.conversations.delete(conversation_id=conversation.id) + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Async Browser Automation Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_browser_automation_streaming_with_content_recording(self, **kwargs): + """Test asynchronous browser automation agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "True", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + async with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id + ) + ) + ) + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, instructions="""Browser automation helper.""", tools=[tool] + ), + ) + + try: + conversation = await openai_client.conversations.create() + stream = await openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + async for _ in stream: + pass + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + tool_call_events = [e for e in span.events if e.name == "gen_ai.output.messages"] + assert len(tool_call_events) > 0 + + # Strict event content checks for response generation span + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("user", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + # Validate content fields ARE present when content recording is enabled + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + + await openai_client.conversations.delete(conversation_id=conversation.id) + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_browser_automation_streaming_without_content_recording(self, **kwargs): + """Test asynchronous browser automation agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + {CONTENT_TRACING_ENV_VARIABLE: "False", "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True"} + ) + self.setup_telemetry() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + browser_automation_connection_id = kwargs.get("browser_automation_project_connection_id") + assert deployment_name is not None + if browser_automation_connection_id is None: + pytest.skip("browser_automation_project_connection_id not configured") + + async with project_client: + openai_client = project_client.get_openai_client() + + from azure.ai.projects.models import ( + BrowserAutomationPreviewTool, + BrowserAutomationToolParameters, + BrowserAutomationToolConnectionParameters, + ) + + tool = BrowserAutomationPreviewTool( + browser_automation_preview=BrowserAutomationToolParameters( + connection=BrowserAutomationToolConnectionParameters( + project_connection_id=browser_automation_connection_id + ) + ) + ) + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition(model=deployment_name, instructions="Browser helper.", tools=[tool]), + ) + + try: + conversation = await openai_client.conversations.create() + stream = await openai_client.responses.create( + conversation=conversation.id, + tool_choice="required", + input=""" + Your task is to get the latests news story from Microsoft website. + Go to the website https://news.microsoft.com and click the "What's new today" link at the top of the page to open the latest + news stories and provide a summary of the most recent one. + """, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + async for _ in stream: + pass + + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Strict event content checks for response generation span - verify content recording is OFF + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("user", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + # Validate content fields are NOT present when content recording is disabled + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + + await openai_client.conversations.delete(conversation_id=conversation.id) + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_code_interpreter.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_code_interpreter.py new file mode 100644 index 000000000000..e51478a6f554 --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_code_interpreter.py @@ -0,0 +1,798 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Tests for ResponsesInstrumentor with Code Interpreter tool. +""" +import os +import pytest +from io import BytesIO +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils import recorded_by_proxy, RecordedTransport +from azure.ai.projects.models import ( + PromptAgentDefinition, + CodeInterpreterTool, + CodeInterpreterContainerAuto, +) + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorCodeInterpreter(TestAiAgentsInstrumentorBase): + """ + Test suite for Code Interpreter agent telemetry instrumentation. + + This class tests OpenTelemetry trace generation when using Code Interpreter tool + with both content recording enabled and disabled, in both streaming and non-streaming modes. + """ + + # ======================================== + # Sync Code Interpreter Agent Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_code_interpreter_non_streaming_with_content_recording(self, **kwargs): + """Test synchronous Code Interpreter agent with content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + # Create vector store is not needed for code interpreter, but we need to upload file + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers code interpreter + response = openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + # With content recording, code should be present + assert ( + "code" in tool_content + ), "code should be present when content recording is enabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" in code_interpreter + ), "code should be present when content recording is enabled" + assert ( + "status" in code_interpreter + ), "status should be present in code_interpreter" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.files.delete(file.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_code_interpreter_non_streaming_without_content_recording(self, **kwargs): + """Test synchronous Code Interpreter agent with content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers code interpreter + response = openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + # Without content recording, code should NOT be present + assert ( + "code" not in tool_content + ), "code should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + # Without content recording, code should NOT be present + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" not in code_interpreter + ), "code should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.files.delete(file.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Sync Code Interpreter Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_code_interpreter_streaming_with_content_recording(self, **kwargs): + """Test synchronous Code Interpreter agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers code interpreter with streaming + stream = openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + assert ( + "code" in tool_content + ), "code should be present when content recording is enabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" in code_interpreter + ), "code should be present when content recording is enabled" + assert ( + "status" in code_interpreter + ), "status should be present in code_interpreter" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.files.delete(file.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_code_interpreter_streaming_without_content_recording(self, **kwargs): + """Test synchronous Code Interpreter agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers code interpreter with streaming + stream = openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + assert ( + "code" not in tool_content + ), "code should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" not in code_interpreter + ), "code should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.files.delete(file.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_code_interpreter_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_code_interpreter_async.py new file mode 100644 index 000000000000..7b73aeeabec0 --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_code_interpreter_async.py @@ -0,0 +1,798 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Tests for ResponsesInstrumentor with Code Interpreter tool (async). +""" +import os +import pytest +from io import BytesIO +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils.aio import recorded_by_proxy_async +from devtools_testutils import RecordedTransport +from azure.ai.projects.models import ( + PromptAgentDefinition, + CodeInterpreterTool, + CodeInterpreterContainerAuto, +) + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorCodeInterpreterAsync(TestAiAgentsInstrumentorBase): + """ + Test suite for Code Interpreter agent telemetry instrumentation (async). + + This class tests OpenTelemetry trace generation when using Code Interpreter tool + with both content recording enabled and disabled, in both streaming and non-streaming modes. + """ + + # ======================================== + # Async Code Interpreter Agent Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_code_interpreter_non_streaming_with_content_recording(self, **kwargs): + """Test asynchronous Code Interpreter agent with content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = await openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers code interpreter + response = await openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + # With content recording, code should be present + assert ( + "code" in tool_content + ), "code should be present when content recording is enabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" in code_interpreter + ), "code should be present when content recording is enabled" + assert ( + "status" in code_interpreter + ), "status should be present in code_interpreter" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.files.delete(file.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_code_interpreter_non_streaming_without_content_recording(self, **kwargs): + """Test asynchronous Code Interpreter agent with content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = await openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers code interpreter + response = await openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + # Without content recording, code should NOT be present + assert ( + "code" not in tool_content + ), "code should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + # Without content recording, code should NOT be present + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" not in code_interpreter + ), "code should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.files.delete(file.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Async Code Interpreter Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_code_interpreter_streaming_with_content_recording(self, **kwargs): + """Test asynchronous Code Interpreter agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = await openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers code interpreter with streaming + stream = await openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + async for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + assert ( + "code" in tool_content + ), "code should be present when content recording is enabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" in code_interpreter + ), "code should be present when content recording is enabled" + assert ( + "status" in code_interpreter + ), "status should be present in code_interpreter" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.files.delete(file.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_code_interpreter_streaming_without_content_recording(self, **kwargs): + """Test asynchronous Code Interpreter agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create CSV data document + csv_data = """sector,name,operating_profit +TRANSPORTATION,Contoso shipping,850000 +TRANSPORTATION,Contoso rail,920000 +TRANSPORTATION,Contoso air,1100000 +""" + + csv_file = BytesIO(csv_data.encode("utf-8")) + csv_file.name = "transportation_data.csv" + + # Upload file for code interpreter + file = await openai_client.files.create(purpose="assistants", file=csv_file) + assert file.id is not None + + # Create agent with Code Interpreter tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can execute Python code to analyze data.", + tools=[CodeInterpreterTool(container=CodeInterpreterContainerAuto(file_ids=[file.id]))], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers code interpreter with streaming + stream = await openai_client.responses.create( + conversation=conversation.id, + input="Calculate the average operating profit from the transportation data", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + async for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_code_interpreter_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_call = True + assert "id" in tool_content, "code_interpreter_call should have id" + assert ( + "code" not in tool_content + ), "code should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_code_interpreter_call, "Should have found code_interpreter_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_code_interpreter_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "code_interpreter_call": + found_code_interpreter_in_items = True + assert ( + "id" in tool_content + ), "code_interpreter_call should have id in conversation items" + code_interpreter = tool_content.get("code_interpreter") + if code_interpreter: + assert ( + "code" not in code_interpreter + ), "code should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_code_interpreter_in_items, "Should have found code_interpreter_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.files.delete(file.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_file_search.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_file_search.py new file mode 100644 index 000000000000..5b01b733586c --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_file_search.py @@ -0,0 +1,853 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Tests for ResponsesInstrumentor with File Search tool. +""" +import os +import pytest +from io import BytesIO +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils import recorded_by_proxy, RecordedTransport +from azure.ai.projects.models import PromptAgentDefinition, FileSearchTool + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorFileSearch(TestAiAgentsInstrumentorBase): + """Tests for ResponsesInstrumentor with File Search tool.""" + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_file_search_non_streaming_with_content_recording(self, **kwargs): + """Test synchronous File Search agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers file search + response = openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response.output_text is not None + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + # Validate input text content IS present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + assert "Contoso" in part["content"], "Should contain the user query" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + # With content recording, queries should be present + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present in file_search when content recording is enabled" + queries = file_search["queries"] + assert ( + isinstance(queries, list) and len(queries) > 0 + ), "queries should be a non-empty list" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + assert ( + isinstance(part["content"], str) and len(part["content"]) > 0 + ), "text content should be non-empty" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present when content recording is enabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.vector_stores.delete(vector_store.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_file_search_non_streaming_without_content_recording(self, **kwargs): + """Test synchronous File Search agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers file search + response = openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response.output_text is not None + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + # Validate input text content is NOT present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + # Without content recording, queries should NOT be present + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.vector_stores.delete(vector_store.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Sync File Search Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_file_search_streaming_with_content_recording(self, **kwargs): + """Test synchronous File Search agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers file search with streaming + stream = openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present when content recording is enabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present when content recording is enabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.vector_stores.delete(vector_store.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_file_search_streaming_without_content_recording(self, **kwargs): + """Test synchronous File Search agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # Ask question that triggers file search with streaming + stream = openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + openai_client.vector_stores.delete(vector_store.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_file_search_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_file_search_async.py new file mode 100644 index 000000000000..ca5a4abd872b --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_file_search_async.py @@ -0,0 +1,854 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Async tests for ResponsesInstrumentor with File Search tool. +""" +import os +import pytest +from io import BytesIO +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils.aio import recorded_by_proxy_async +from devtools_testutils import RecordedTransport +from azure.ai.projects.models import PromptAgentDefinition, FileSearchTool + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorFileSearchAsync(TestAiAgentsInstrumentorBase): + """Async tests for ResponsesInstrumentor with File Search tool.""" + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_file_search_non_streaming_with_content_recording(self, **kwargs): + """Test asynchronous File Search agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = await openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = await openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers file search + response = await openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response.output_text is not None + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + # Validate input text content IS present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + assert "Contoso" in part["content"], "Should contain the user query" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + # With content recording, queries should be present + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present in file_search when content recording is enabled" + queries = file_search["queries"] + assert ( + isinstance(queries, list) and len(queries) > 0 + ), "queries should be a non-empty list" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + assert ( + isinstance(part["content"], str) and len(part["content"]) > 0 + ), "text content should be non-empty" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present when content recording is enabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.vector_stores.delete(vector_store.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_file_search_non_streaming_without_content_recording(self, **kwargs): + """Test asynchronous File Search agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = await openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = await openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers file search + response = await openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response.output_text is not None + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + # Validate input text content is NOT present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + # Without content recording, queries should NOT be present + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.vector_stores.delete(vector_store.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Async File Search Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_file_search_streaming_with_content_recording(self, **kwargs): + """Test asynchronous File Search agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = await openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = await openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers file search with streaming + stream = await openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + async for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert "content" in part and isinstance( + part["content"], str + ), "Text content should be present when content recording is enabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present when content recording is enabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert "content" in part, "text content should be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" in file_search + ), "queries should be present when content recording is enabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.vector_stores.delete(vector_store.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_file_search_streaming_without_content_recording(self, **kwargs): + """Test asynchronous File Search agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create product information document + product_info = """Contoso Galaxy Innovations SmartView Glasses + +Product Category: Smart Eyewear + +Key Features: +- Augmented Reality interface +- Voice-controlled AI agent +- HD video recording with 3D audio +- UV protection and blue light filtering +- Wireless charging with extended battery life + +Warranty: Two-year limited warranty on electronic components +Return Policy: 30-day return policy with no questions asked +""" + + # Create vector store and upload document + vector_store = await openai_client.vector_stores.create(name="ProductInfoStore") + + product_file = BytesIO(product_info.encode("utf-8")) + product_file.name = "product_info.txt" + + file = await openai_client.vector_stores.files.upload_and_poll( + vector_store_id=vector_store.id, + file=product_file, + ) + + assert file.status == "completed", f"File upload failed with status: {file.status}" + + # Create agent with File Search tool + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful assistant that can search through uploaded documents to answer questions.", + tools=[FileSearchTool(vector_store_ids=[vector_store.id])], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # Ask question that triggers file search with streaming + stream = await openai_client.responses.create( + conversation=conversation.id, + input="Tell me about Contoso products", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume the stream + async for event in stream: + pass + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 1, "Should have one response span" + + # Validate response span + span = spans[0] + assert span.attributes is not None + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None + + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + import json + + found_file_search_call = False + found_text_response = False + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_call = True + assert "id" in tool_content, "file_search_call should have id" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + elif part.get("type") == "text": + found_text_response = True + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + assert found_file_search_call, "Should have found file_search_call in output" + assert found_text_response, "Should have found text response type in output" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + found_file_search_in_items = False + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items" + elif part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "file_search_call": + found_file_search_in_items = True + assert ( + "id" in tool_content + ), "file_search_call should have id in conversation items" + file_search = tool_content.get("file_search") + if file_search: + assert ( + "queries" not in file_search + ), "queries should NOT be present when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + assert found_file_search_in_items, "Should have found file_search_call in conversation items" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + await openai_client.vector_stores.delete(vector_store.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_mcp.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_mcp.py new file mode 100644 index 000000000000..9c47dfab727f --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_mcp.py @@ -0,0 +1,1075 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Tests for ResponsesInstrumentor with MCP agents. +""" +import os +import pytest +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils import recorded_by_proxy, RecordedTransport +from azure.ai.projects.models import PromptAgentDefinition, MCPTool +from openai.types.responses.response_input_param import McpApprovalResponse + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorMCP(TestAiAgentsInstrumentorBase): + """Tests for ResponsesInstrumentor with MCP agents.""" + + # ======================================== + # Sync MCP Agent Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_mcp_non_streaming_with_content_recording(self, **kwargs): + """Test synchronous MCP agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # First request - triggers MCP tool + response = openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests + input_list = [] + for item in response.output: + if item.type == "mcp_approval_request": + if item.server_label == "api-specs" and item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=item.id, + ) + ) + + # Send approval response + response2 = openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response2.output_text is not None + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span (MCP tool trigger) + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation for first span - verify content IS present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Validate content fields ARE present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + parts = first.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + # Check for MCP-specific content + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + assert isinstance(tool_content, dict) + tool_type = tool_content.get("type") + if tool_type in ("mcp_list_tools", "mcp_approval_request"): + assert "id" in tool_content + if tool_type == "mcp_list_tools": + assert ( + "server_label" in tool_content + ), "server_label should be present for mcp_list_tools when content recording is enabled" + elif tool_type == "mcp_approval_request": + assert ( + "name" in tool_content + ), "name should be present for mcp_approval_request when content recording is enabled" + assert ( + "server_label" in tool_content + ), "server_label should be present for mcp_approval_request when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_approval_request when content recording is enabled" + + # Validate second response span (approval response) + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response2.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate MCP approval response and call in second span + for event in span2.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP approval response content + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "mcp": + mcp_content = part.get("content") + assert isinstance(mcp_content, dict) + if mcp_content.get("type") == "mcp_approval_response": + assert "id" in mcp_content + assert ( + "approval_request_id" in mcp_content + ), "approval_request_id should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP call content + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert "id" in tool_content + assert ( + "name" in tool_content + ), "name should be present for mcp_call when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_call when content recording is enabled" + assert ( + "server_label" in tool_content + ), "server_label should be present for mcp_call when content recording is enabled" + elif part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Validate MCP content in conversation items + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present in conversation items when content recording is enabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_approval_response": + assert ( + "approval_request_id" in mcp_content + ), "approval_request_id should be present when content recording is enabled" + elif entry.get("role") == "assistant": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present in conversation items when content recording is enabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content: + mcp_type = mcp_content.get("type") + if mcp_type in ("mcp_list_tools", "mcp_call", "mcp_approval_request"): + assert "id" in mcp_content + if mcp_type == "mcp_call": + assert ( + "name" in mcp_content + ), "name should be present for mcp_call in conversation items" + assert ( + "server_label" in mcp_content + ), "server_label should be present for mcp_call in conversation items" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_mcp_non_streaming_without_content_recording(self, **kwargs): + """Test synchronous MCP agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # First request - triggers MCP tool + response = openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests + input_list = [] + for item in response.output: + if item.type == "mcp_approval_request": + if item.server_label == "api-specs" and item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=item.id, + ) + ) + + # Send approval response + response2 = openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response2.output_text is not None + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span (MCP tool trigger) + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation for first span - verify content is NOT present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Validate content fields are NOT present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + parts = first.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + # Check for MCP-specific content - should have type and id but not detailed fields + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + assert isinstance(tool_content, dict) + tool_type = tool_content.get("type") + if tool_type in ("mcp_list_tools", "mcp_approval_request"): + assert "id" in tool_content + if tool_type == "mcp_list_tools": + # server_label might be present but other details should not + pass + elif tool_type == "mcp_approval_request": + # Should not have name, arguments when content recording is disabled + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_approval_request when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_approval_request when content recording is disabled" + + # Validate second response span (approval response) + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response2.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate MCP approval response and call in second span - content should be minimal + for event in span2.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP approval response content - should be minimal + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "mcp": + mcp_content = part.get("content") + assert isinstance(mcp_content, dict) + if mcp_content.get("type") == "mcp_approval_response": + assert "id" in mcp_content + # approval_request_id might not be present when content recording is disabled + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP call content - should be minimal + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert "id" in tool_content + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_call when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_call when content recording is disabled" + elif part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Validate MCP content in conversation items - should be minimal + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items when content recording is disabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_approval_response": + # Should have id but might not have other details + assert "id" in mcp_content + elif entry.get("role") == "assistant": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items when content recording is disabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content: + mcp_type = mcp_content.get("type") + if mcp_type == "mcp_call": + assert "id" in mcp_content + # Should not have name, server_label, arguments when content recording is disabled + assert ( + "name" not in mcp_content + ), "name should NOT be present for mcp_call in conversation items when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Sync MCP Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_mcp_streaming_with_content_recording(self, **kwargs): + """Test synchronous MCP agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # First streaming request - triggers MCP tool + stream = openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests from stream + input_list = [] + for event in stream: + if hasattr(event, "type") and event.type == "response.output_item.done": + if hasattr(event, "item") and hasattr(event.item, "type"): + if event.item.type == "mcp_approval_request": + if hasattr(event.item, "server_label") and event.item.server_label == "api-specs": + if hasattr(event.item, "id") and event.item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=event.item.id, + ) + ) + + # Send approval response as streaming + stream2 = openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume second stream + for event in stream2: + pass + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span + span1 = spans[0] + assert span1.attributes is not None + response_id_1 = span1.attributes.get("gen_ai.response.id") + assert response_id_1 is not None + + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_1), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content: + tool_type = tool_content.get("type") + if tool_type == "mcp_approval_request": + assert ( + "name" in tool_content + ), "name should be present for mcp_approval_request when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_approval_request when content recording is enabled" + + # Validate second response span + span2 = spans[1] + assert span2.attributes is not None + response_id_2 = span2.attributes.get("gen_ai.response.id") + assert response_id_2 is not None + + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_2), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate second span events + for event in span2.events: + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert ( + "name" in tool_content + ), "name should be present for mcp_call when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_call when content recording is enabled" + elif part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1 + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present in conversation items when content recording is enabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_call": + assert ( + "name" in mcp_content + ), "name should be present for mcp_call in conversation items when content recording is enabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_mcp_streaming_without_content_recording(self, **kwargs): + """Test synchronous MCP agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = openai_client.conversations.create() + + # First streaming request - triggers MCP tool + stream = openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests from stream + input_list = [] + for event in stream: + if hasattr(event, "type") and event.type == "response.output_item.done": + if hasattr(event, "item") and hasattr(event.item, "type"): + if event.item.type == "mcp_approval_request": + if hasattr(event.item, "server_label") and event.item.server_label == "api-specs": + if hasattr(event.item, "id") and event.item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=event.item.id, + ) + ) + + # Send approval response as streaming + stream2 = openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume second stream + for event in stream2: + pass + + # Explicitly call and iterate through conversation items + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span + span1 = spans[0] + assert span1.attributes is not None + response_id_1 = span1.attributes.get("gen_ai.response.id") + assert response_id_1 is not None + + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_1), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content: + tool_type = tool_content.get("type") + if tool_type == "mcp_approval_request": + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_approval_request when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_approval_request when content recording is disabled" + + # Validate second response span + span2 = spans[1] + assert span2.attributes is not None + response_id_2 = span2.attributes.get("gen_ai.response.id") + assert response_id_2 is not None + + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_2), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate second span events - content should be minimal + for event in span2.events: + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_call when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_call when content recording is disabled" + elif part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1 + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items when content recording is disabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_call": + assert ( + "name" not in mcp_content + ), "name should NOT be present for mcp_call in conversation items when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_mcp_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_mcp_async.py new file mode 100644 index 000000000000..a5161456ed70 --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_mcp_async.py @@ -0,0 +1,1076 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Async tests for ResponsesInstrumentor with MCP agents. +""" +import os +import pytest +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils.aio import recorded_by_proxy_async +from devtools_testutils import RecordedTransport +from azure.ai.projects.models import PromptAgentDefinition, MCPTool +from openai.types.responses.response_input_param import McpApprovalResponse + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +class TestResponsesInstrumentorMCPAsync(TestAiAgentsInstrumentorBase): + """Async tests for ResponsesInstrumentor with MCP agents.""" + + # ======================================== + # Async MCP Agent Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_mcp_non_streaming_with_content_recording(self, **kwargs): + """Test asynchronous MCP agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # First request - triggers MCP tool + response = await openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests + input_list = [] + for item in response.output: + if item.type == "mcp_approval_request": + if item.server_label == "api-specs" and item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=item.id, + ) + ) + + # Send approval response + response2 = await openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response2.output_text is not None + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span (MCP tool trigger) + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation for first span - verify content IS present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Validate content fields ARE present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + parts = first.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + # Check for MCP-specific content + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + assert isinstance(tool_content, dict) + tool_type = tool_content.get("type") + if tool_type in ("mcp_list_tools", "mcp_approval_request"): + assert "id" in tool_content + if tool_type == "mcp_list_tools": + assert ( + "server_label" in tool_content + ), "server_label should be present for mcp_list_tools when content recording is enabled" + elif tool_type == "mcp_approval_request": + assert ( + "name" in tool_content + ), "name should be present for mcp_approval_request when content recording is enabled" + assert ( + "server_label" in tool_content + ), "server_label should be present for mcp_approval_request when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_approval_request when content recording is enabled" + + # Validate second response span (approval response) + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response2.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate MCP approval response and call in second span + for event in span2.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP approval response content + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "mcp": + mcp_content = part.get("content") + assert isinstance(mcp_content, dict) + if mcp_content.get("type") == "mcp_approval_response": + assert "id" in mcp_content + assert ( + "approval_request_id" in mcp_content + ), "approval_request_id should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP call content + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert "id" in tool_content + assert ( + "name" in tool_content + ), "name should be present for mcp_call when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_call when content recording is enabled" + assert ( + "server_label" in tool_content + ), "server_label should be present for mcp_call when content recording is enabled" + elif part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Validate MCP content in conversation items + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present in conversation items when content recording is enabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_approval_response": + assert ( + "approval_request_id" in mcp_content + ), "approval_request_id should be present when content recording is enabled" + elif entry.get("role") == "assistant": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present in conversation items when content recording is enabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content: + mcp_type = mcp_content.get("type") + if mcp_type in ("mcp_list_tools", "mcp_call", "mcp_approval_request"): + assert "id" in mcp_content + if mcp_type == "mcp_call": + assert ( + "name" in mcp_content + ), "name should be present for mcp_call in conversation items" + assert ( + "server_label" in mcp_content + ), "server_label should be present for mcp_call in conversation items" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_mcp_non_streaming_without_content_recording(self, **kwargs): + """Test asynchronous MCP agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # First request - triggers MCP tool + response = await openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests + input_list = [] + for item in response.output: + if item.type == "mcp_approval_request": + if item.server_label == "api-specs" and item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=item.id, + ) + ) + + # Send approval response + response2 = await openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=False, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + assert response2.output_text is not None + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span (MCP tool trigger) + span1 = spans[0] + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation for first span - verify content is NOT present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + # Validate content fields are NOT present + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + parts = first.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + # Check for MCP-specific content - should have type and id but not detailed fields + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + assert isinstance(tool_content, dict) + tool_type = tool_content.get("type") + if tool_type in ("mcp_list_tools", "mcp_approval_request"): + assert "id" in tool_content + if tool_type == "mcp_list_tools": + # server_label might be present but other details should not + pass + elif tool_type == "mcp_approval_request": + # Should not have name, arguments when content recording is disabled + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_approval_request when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_approval_request when content recording is disabled" + + # Validate second response span (approval response) + span2 = spans[1] + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response2.id), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate MCP approval response and call in second span - content should be minimal + for event in span2.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP approval response content - should be minimal + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "mcp": + mcp_content = part.get("content") + assert isinstance(mcp_content, dict) + if mcp_content.get("type") == "mcp_approval_response": + assert "id" in mcp_content + # approval_request_id might not be present when content recording is disabled + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Check for MCP call content - should be minimal + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert "id" in tool_content + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_call when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_call when content recording is disabled" + elif part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + # Validate MCP content in conversation items - should be minimal + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items when content recording is disabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_approval_response": + # Should have id but might not have other details + assert "id" in mcp_content + elif entry.get("role") == "assistant": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items when content recording is disabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content: + mcp_type = mcp_content.get("type") + if mcp_type == "mcp_call": + assert "id" in mcp_content + # Should not have name, server_label, arguments when content recording is disabled + assert ( + "name" not in mcp_content + ), "name should NOT be present for mcp_call in conversation items when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + # ======================================== + # Async MCP Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_mcp_streaming_with_content_recording(self, **kwargs): + """Test asynchronous MCP agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # First streaming request - triggers MCP tool + stream = await openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests from stream + input_list = [] + async for event in stream: + if hasattr(event, "type") and event.type == "response.output_item.done": + if hasattr(event, "item") and hasattr(event.item, "type"): + if event.item.type == "mcp_approval_request": + if hasattr(event.item, "server_label") and event.item.server_label == "api-specs": + if hasattr(event.item, "id") and event.item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=event.item.id, + ) + ) + + # Send approval response as streaming + stream2 = await openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume second stream + async for event in stream2: + pass + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span + span1 = spans[0] + assert span1.attributes is not None + response_id_1 = span1.attributes.get("gen_ai.response.id") + assert response_id_1 is not None + + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_1), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation - verify content IS present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + and isinstance(part["content"], str) + and part["content"].strip() != "" + ), "Text content should be present when content recording is enabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content: + tool_type = tool_content.get("type") + if tool_type == "mcp_approval_request": + assert ( + "name" in tool_content + ), "name should be present for mcp_approval_request when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_approval_request when content recording is enabled" + + # Validate second response span + span2 = spans[1] + assert span2.attributes is not None + response_id_2 = span2.attributes.get("gen_ai.response.id") + assert response_id_2 is not None + + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_2), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate second span events + for event in span2.events: + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert ( + "name" in tool_content + ), "name should be present for mcp_call when content recording is enabled" + assert ( + "arguments" in tool_content + ), "arguments should be present for mcp_call when content recording is enabled" + elif part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present when content recording is enabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1 + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" in part + ), "text content should be present in conversation items when content recording is enabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_call": + assert ( + "name" in mcp_content + ), "name should be present for mcp_call in conversation items when content recording is enabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_mcp_streaming_without_content_recording(self, **kwargs): + """Test asynchronous MCP agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create MCP tool + mcp_tool = MCPTool( + server_label="api-specs", + server_url="https://gitmcp.io/Azure/azure-rest-api-specs", + require_approval="always", + ) + + agent = await project_client.agents.create_version( + agent_name="MyAgent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="You are a helpful agent that can use MCP tools to assist users.", + tools=[mcp_tool], + ), + ) + + try: + conversation = await openai_client.conversations.create() + + # First streaming request - triggers MCP tool + stream = await openai_client.responses.create( + conversation=conversation.id, + input="Please summarize the Azure REST API specifications Readme", + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Collect approval requests from stream + input_list = [] + async for event in stream: + if hasattr(event, "type") and event.type == "response.output_item.done": + if hasattr(event, "item") and hasattr(event.item, "type"): + if event.item.type == "mcp_approval_request": + if hasattr(event.item, "server_label") and event.item.server_label == "api-specs": + if hasattr(event.item, "id") and event.item.id: + input_list.append( + McpApprovalResponse( + type="mcp_approval_response", + approve=True, + approval_request_id=event.item.id, + ) + ) + + # Send approval response as streaming + stream2 = await openai_client.responses.create( + conversation=conversation.id, + input=input_list, + stream=True, + extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, + ) + + # Consume second stream + async for event in stream2: + pass + + # Explicitly call and iterate through conversation items + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {agent.name}") + assert len(spans) == 2, "Should have two response spans (initial + approval)" + + # Validate first response span + span1 = spans[0] + assert span1.attributes is not None + response_id_1 = span1.attributes.get("gen_ai.response.id") + assert response_id_1 is not None + + expected_attributes_1 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_1), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span1, expected_attributes_1) + + # Comprehensive event validation - verify content is NOT present + from collections.abc import Mapping + + for event in span1.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + if entry.get("role") == "user": + parts = entry.get("parts") + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "Text content should NOT be present when content recording is disabled" + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content: + tool_type = tool_content.get("type") + if tool_type == "mcp_approval_request": + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_approval_request when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_approval_request when content recording is disabled" + + # Validate second response span + span2 = spans[1] + assert span2.attributes is not None + response_id_2 = span2.attributes.get("gen_ai.response.id") + assert response_id_2 is not None + + expected_attributes_2 = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", agent.name), + ("gen_ai.response.model", deployment_name), + ("gen_ai.response.id", response_id_2), + ("gen_ai.usage.input_tokens", "+"), + ("gen_ai.usage.output_tokens", "+"), + ] + assert GenAiTraceVerifier().check_span_attributes(span2, expected_attributes_2) + + # Validate second span events - content should be minimal + for event in span2.events: + if event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "tool_call": + tool_content = part.get("content") + if tool_content and tool_content.get("type") == "mcp_call": + assert ( + "name" not in tool_content + ), "name should NOT be present for mcp_call when content recording is disabled" + assert ( + "arguments" not in tool_content + ), "arguments should NOT be present for mcp_call when content recording is disabled" + elif part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present when content recording is disabled" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1 + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + for entry in data: + parts = entry.get("parts") + if parts: + for part in parts: + if part.get("type") == "text": + assert ( + "content" not in part + ), "text content should NOT be present in conversation items when content recording is disabled" + elif part.get("type") == "mcp": + mcp_content = part.get("content") + if mcp_content and mcp_content.get("type") == "mcp_call": + assert ( + "name" not in mcp_content + ), "name should NOT be present for mcp_call in conversation items when content recording is disabled" + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_workflow.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_workflow.py new file mode 100644 index 000000000000..e4b5e0c40a94 --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_workflow.py @@ -0,0 +1,847 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Tests for ResponsesInstrumentor with workflow agents. +""" +import os +import pytest +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils import recorded_by_proxy, RecordedTransport +from azure.ai.projects.models import ( + AgentReference, + PromptAgentDefinition, + WorkflowAgentDefinition, +) + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +def checkWorkflowEventContents(content, content_recording_enabled): + """Validate workflow event content structure and required fields.""" + import json + + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + for entry in data: + assert entry.get("role") == "workflow" + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + found_workflow_action = False + for part in parts: + if part.get("type") == "workflow_action": + found_workflow_action = True + workflow_content = part.get("content") + assert isinstance(workflow_content, dict) + # status is always present + assert ( + "status" in workflow_content + and isinstance(workflow_content["status"], str) + and workflow_content["status"] + ) + if content_recording_enabled: + # action_id and previous_action_id should be present and non-empty + assert ( + "action_id" in workflow_content + and isinstance(workflow_content["action_id"], str) + and workflow_content["action_id"] + ) + assert ( + "previous_action_id" in workflow_content + and isinstance(workflow_content["previous_action_id"], str) + and workflow_content["previous_action_id"] + ) + else: + # action_id and previous_action_id should NOT be present when content recording is disabled + assert ( + "action_id" not in workflow_content + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in workflow_content + ), "previous_action_id should not be present when content recording is disabled" + assert found_workflow_action, "No workflow_action part found in workflow event" + + +def checkInputMessageEventContents(content, content_recording_enabled): + """Validate input message event content structure and required fields.""" + import json + + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + for entry in data: + assert entry.get("role") == "user" + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + found_text = False + for part in parts: + if part.get("type") == "text": + found_text = True + if content_recording_enabled: + assert "content" in part and isinstance(part["content"], str) and part["content"].strip() != "" + else: + # content field should NOT be present in text parts when content recording is disabled + assert ( + "content" not in part + ), "Text content should not be present when content recording is disabled" + assert found_text, "No text part found in input message event" + + +class TestResponsesInstrumentorWorkflow(TestAiAgentsInstrumentorBase): + """Tests for ResponsesInstrumentor with workflow agents.""" + + def _create_student_teacher_workflow(self, project_client, student_agent, teacher_agent): + """Create a multi-agent workflow with student and teacher agents.""" + workflow_yaml = f""" +kind: workflow +trigger: + kind: OnConversationStart + id: my_workflow + actions: + - kind: SetVariable + id: set_variable_input_task + variable: Local.LatestMessage + value: "=UserMessage(System.LastMessageText)" + + - kind: CreateConversation + id: create_student_conversation + conversationId: Local.StudentConversationId + + - kind: CreateConversation + id: create_teacher_conversation + conversationId: Local.TeacherConversationId + + - kind: InvokeAzureAgent + id: student_agent + description: The student node + conversationId: "=Local.StudentConversationId" + agent: + name: {student_agent.name} + input: + messages: "=Local.LatestMessage" + output: + messages: Local.LatestMessage + + - kind: InvokeAzureAgent + id: teacher_agent + description: The teacher node + conversationId: "=Local.TeacherConversationId" + agent: + name: {teacher_agent.name} + input: + messages: "=Local.LatestMessage" + output: + messages: Local.LatestMessage + + - kind: SetVariable + id: set_variable_turncount + variable: Local.TurnCount + value: "=Local.TurnCount + 1" + + - kind: ConditionGroup + id: completion_check + conditions: + - condition: '=!IsBlank(Find("[COMPLETE]", Upper(Last(Local.LatestMessage).Text)))' + id: check_done + actions: + - kind: EndConversation + id: end_workflow + + - condition: "=Local.TurnCount >= 4" + id: check_turn_count_exceeded + actions: + - kind: SendActivity + id: send_activity_tired + activity: "Let's try again later...I am tired." + + elseActions: + - kind: GotoAction + id: goto_student_agent + actionId: student_agent +""" + + workflow = project_client.agents.create_version( + agent_name="student-teacher-workflow", + definition=WorkflowAgentDefinition(workflow=workflow_yaml), + ) + return workflow + + # ======================================== + # Sync Workflow Agent Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_workflow_non_streaming_with_content_recording(self, **kwargs): + """Test synchronous workflow agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow + workflow = self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = openai_client.conversations.create() + + # Non-streaming request + response = openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=False, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Verify response has output + assert response.output is not None + assert len(response.output) > 0 + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans for list_conversation_items + self.exporter.force_flush() + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response.id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation and conversation item listing + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, True) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + import json + + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, True) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), True) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), True) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + project_client.agents.delete_version(agent_name=student_agent.name, agent_version=student_agent.version) + project_client.agents.delete_version(agent_name=teacher_agent.name, agent_version=teacher_agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_workflow_non_streaming_without_content_recording(self, **kwargs): + """Test synchronous workflow agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow + workflow = self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = openai_client.conversations.create() + + # Non-streaming request + response = openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=False, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Verify response has output + assert response.output is not None + assert len(response.output) > 0 + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response.id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events (should exist even without content recording) + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation span - verify content recording is OFF + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, False) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + import json + + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, False) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), False) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), False) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + project_client.agents.delete_version(agent_name=student_agent.name, agent_version=student_agent.version) + project_client.agents.delete_version(agent_name=teacher_agent.name, agent_version=teacher_agent.version) + + # ======================================== + # Sync Workflow Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_workflow_streaming_with_content_recording(self, **kwargs): + """Test synchronous workflow agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow + workflow = self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = openai_client.conversations.create() + + # Streaming request + stream = openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=True, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Consume stream + for event in stream: + pass # Just consume events + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response_id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation span + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, True) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + import json + + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, True) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), True) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), True) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + project_client.agents.delete_version(agent_name=student_agent.name, agent_version=student_agent.version) + project_client.agents.delete_version(agent_name=teacher_agent.name, agent_version=teacher_agent.version) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_sync_workflow_streaming_without_content_recording(self, **kwargs): + """Test synchronous workflow agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow + workflow = self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = openai_client.conversations.create() + + # Streaming request + stream = openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=True, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Consume stream + for event in stream: + pass # Just consume events + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = openai_client.conversations.items.list(conversation_id=conversation.id) + for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response_id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events (should exist even without content recording) + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation span - verify content recording is OFF + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, False) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + import json + + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, False) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + import json + + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), False) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), False) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + project_client.agents.delete_version(agent_name=student_agent.name, agent_version=student_agent.version) + project_client.agents.delete_version(agent_name=teacher_agent.name, agent_version=teacher_agent.version) diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_workflow_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_workflow_async.py new file mode 100644 index 000000000000..22d5580bdd09 --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_workflow_async.py @@ -0,0 +1,836 @@ +# pylint: disable=too-many-lines,line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +Async tests for ResponsesInstrumentor with workflow agents. +""" +import os +import pytest +from azure.ai.projects.telemetry import AIProjectInstrumentor, _utils +from azure.core.settings import settings +from gen_ai_trace_verifier import GenAiTraceVerifier +from devtools_testutils.aio import recorded_by_proxy_async +from devtools_testutils import RecordedTransport +from azure.ai.projects.models import ( + AgentReference, + PromptAgentDefinition, + WorkflowAgentDefinition, +) + +from test_base import servicePreparer +from test_ai_instrumentor_base import ( + TestAiAgentsInstrumentorBase, + CONTENT_TRACING_ENV_VARIABLE, +) + +import json + +settings.tracing_implementation = "OpenTelemetry" +_utils._span_impl_type = settings.tracing_implementation() + + +def checkWorkflowEventContents(content, content_recording_enabled): + """Validate workflow event content structure and required fields.""" + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + for entry in data: + assert entry.get("role") == "workflow" + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + found_workflow_action = False + for part in parts: + if part.get("type") == "workflow_action": + found_workflow_action = True + workflow_content = part.get("content") + assert isinstance(workflow_content, dict) + # status is always present + assert ( + "status" in workflow_content + and isinstance(workflow_content["status"], str) + and workflow_content["status"] + ) + if content_recording_enabled: + # action_id and previous_action_id should be present and non-empty + assert ( + "action_id" in workflow_content + and isinstance(workflow_content["action_id"], str) + and workflow_content["action_id"] + ) + assert ( + "previous_action_id" in workflow_content + and isinstance(workflow_content["previous_action_id"], str) + and workflow_content["previous_action_id"] + ) + else: + # action_id and previous_action_id should NOT be present when content recording is disabled + assert ( + "action_id" not in workflow_content + ), "action_id should not be present when content recording is disabled" + assert ( + "previous_action_id" not in workflow_content + ), "previous_action_id should not be present when content recording is disabled" + assert found_workflow_action, "No workflow_action part found in workflow event" + + +def checkInputMessageEventContents(content, content_recording_enabled): + """Validate input message event content structure and required fields.""" + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + for entry in data: + assert entry.get("role") == "user" + parts = entry.get("parts") + assert isinstance(parts, list) and len(parts) > 0 + found_text = False + for part in parts: + if part.get("type") == "text": + found_text = True + if content_recording_enabled: + assert "content" in part and isinstance(part["content"], str) and part["content"].strip() != "" + else: + # content field should NOT be present in text parts when content recording is disabled + assert ( + "content" not in part + ), "Text content should not be present when content recording is disabled" + assert found_text, "No text part found in input message event" + + +class TestResponsesInstrumentorWorkflowAsync(TestAiAgentsInstrumentorBase): + """Async tests for ResponsesInstrumentor with workflow agents.""" + + async def _create_student_teacher_workflow(self, project_client, student_agent, teacher_agent): + """Create a multi-agent workflow with student and teacher agents.""" + workflow_yaml = f""" +kind: workflow +trigger: + kind: OnConversationStart + id: my_workflow + actions: + - kind: SetVariable + id: set_variable_input_task + variable: Local.LatestMessage + value: "=UserMessage(System.LastMessageText)" + + - kind: CreateConversation + id: create_student_conversation + conversationId: Local.StudentConversationId + + - kind: CreateConversation + id: create_teacher_conversation + conversationId: Local.TeacherConversationId + + - kind: InvokeAzureAgent + id: student_agent + description: The student node + conversationId: "=Local.StudentConversationId" + agent: + name: {student_agent.name} + input: + messages: "=Local.LatestMessage" + output: + messages: Local.LatestMessage + + - kind: InvokeAzureAgent + id: teacher_agent + description: The teacher node + conversationId: "=Local.TeacherConversationId" + agent: + name: {teacher_agent.name} + input: + messages: "=Local.LatestMessage" + output: + messages: Local.LatestMessage + + - kind: SetVariable + id: set_variable_turncount + variable: Local.TurnCount + value: "=Local.TurnCount + 1" + + - kind: ConditionGroup + id: completion_check + conditions: + - condition: '=!IsBlank(Find("[COMPLETE]", Upper(Last(Local.LatestMessage).Text)))' + id: check_done + actions: + - kind: EndConversation + id: end_workflow + + - condition: "=Local.TurnCount >= 4" + id: check_turn_count_exceeded + actions: + - kind: SendActivity + id: send_activity_tired + activity: "Let's try again later...I am tired." + + elseActions: + - kind: GotoAction + id: goto_student_agent + actionId: student_agent +""" + + workflow = await project_client.agents.create_version( + agent_name="student-teacher-workflow", + definition=WorkflowAgentDefinition(workflow=workflow_yaml), + ) + return workflow + + # ======================================== + # Async Workflow Agent Tests - Non-Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_workflow_non_streaming_with_content_recording(self, **kwargs): + """Test asynchronous workflow agent with non-streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = await project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = await project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow using helper method + workflow = await self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = await openai_client.conversations.create() + + # Non-streaming request + response = await openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=False, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Verify response has output + assert response.output is not None + assert len(response.output) > 0 + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response.id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation span + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, True) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, True) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), True) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), True) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + await project_client.agents.delete_version( + agent_name=student_agent.name, agent_version=student_agent.version + ) + await project_client.agents.delete_version( + agent_name=teacher_agent.name, agent_version=teacher_agent.version + ) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_workflow_non_streaming_without_content_recording(self, **kwargs): + """Test asynchronous workflow agent with non-streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = await project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = await project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow using helper method + workflow = await self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = await openai_client.conversations.create() + + # Non-streaming request + response = await openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=False, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Verify response has output + assert response.output is not None + assert len(response.output) > 0 + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response.id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events (should exist even without content recording) + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation span - verify content recording is OFF + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, False) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, False) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), False) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), False) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + await project_client.agents.delete_version( + agent_name=student_agent.name, agent_version=student_agent.version + ) + await project_client.agents.delete_version( + agent_name=teacher_agent.name, agent_version=teacher_agent.version + ) + + # ======================================== + # Async Workflow Agent Tests - Streaming + # ======================================== + + @pytest.mark.usefixtures("instrument_with_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_workflow_streaming_with_content_recording(self, **kwargs): + """Test asynchronous workflow agent with streaming and content recording enabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "True", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = await project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = await project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow using helper method + workflow = await self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = await openai_client.conversations.create() + + # Streaming request + stream = await openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=True, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Consume stream + async for event in stream: + pass # Just consume events + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response_id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation span + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, True) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, True) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), True) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), True) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + await project_client.agents.delete_version( + agent_name=student_agent.name, agent_version=student_agent.version + ) + await project_client.agents.delete_version( + agent_name=teacher_agent.name, agent_version=teacher_agent.version + ) + + @pytest.mark.usefixtures("instrument_without_content") + @servicePreparer() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_async_workflow_streaming_without_content_recording(self, **kwargs): + """Test asynchronous workflow agent with streaming and content recording disabled.""" + self.cleanup() + os.environ.update( + { + CONTENT_TRACING_ENV_VARIABLE: "False", + "AZURE_TRACING_GEN_AI_INSTRUMENT_RESPONSES_API": "True", + } + ) + self.setup_telemetry() + assert not AIProjectInstrumentor().is_content_recording_enabled() + assert AIProjectInstrumentor().is_instrumented() + + project_client = self.create_async_client(operation_group="tracing", **kwargs) + deployment_name = kwargs.get("azure_ai_model_deployment_name") + assert deployment_name is not None + + async with project_client: + openai_client = project_client.get_openai_client() + + # Create Teacher Agent + teacher_agent = await project_client.agents.create_version( + agent_name="teacher-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a teacher that creates pre-school math questions for students and checks answers. + If the answer is correct, you stop the conversation by saying [COMPLETE]. + If the answer is wrong, you ask student to fix it.""", + ), + ) + + # Create Student Agent + student_agent = await project_client.agents.create_version( + agent_name="student-agent", + definition=PromptAgentDefinition( + model=deployment_name, + instructions="""You are a student who answers questions from the teacher. + When the teacher gives you a question, you answer it.""", + ), + ) + + # Create workflow using helper method + workflow = await self._create_student_teacher_workflow(project_client, student_agent, teacher_agent) + + try: + # Create conversation + conversation = await openai_client.conversations.create() + + # Streaming request + stream = await openai_client.responses.create( + conversation=conversation.id, + extra_body={"agent": AgentReference(name=workflow.name).as_dict()}, + input="1 + 1 = ?", + stream=True, + metadata={"x-ms-debug-mode-enabled": "1"}, + ) + + # Consume stream + async for event in stream: + pass # Just consume events + + # Explicitly call and iterate through conversation items to generate the list_conversation_items span + items = await openai_client.conversations.items.list(conversation_id=conversation.id) + async for item in items: + pass # Just iterate to consume items + + # Check spans + self.exporter.force_flush() + spans = self.exporter.get_spans_by_name(f"responses {workflow.name}") + assert len(spans) == 1 + span = spans[0] + + # Get response ID from span + assert span.attributes is not None, "Span should have attributes" + response_id = span.attributes.get("gen_ai.response.id") + assert response_id is not None, "Response ID should be present in span" + + # Check span attributes + expected_attributes = [ + ("az.namespace", "Microsoft.CognitiveServices"), + ("gen_ai.operation.name", "responses"), + ("gen_ai.provider.name", "azure.openai"), + ("server.address", ""), + ("gen_ai.conversation.id", conversation.id), + ("gen_ai.agent.name", workflow.name), + ("gen_ai.response.id", response_id), + ] + attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes) + assert attributes_match + + # Check for workflow action events (should exist even without content recording) + workflow_events = [e for e in span.events if e.name == "gen_ai.workflow.action"] + assert len(workflow_events) > 0, "Should have workflow action events" + + # Strict event content checks for response generation span - verify content recording is OFF + from collections.abc import Mapping + + for event in span.events: + if event.name == "gen_ai.input.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + checkInputMessageEventContents(content, False) + elif event.name == "gen_ai.output.messages": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) and len(data) > 0 + first = data[0] + assert first.get("role") in ("assistant", "tool") + assert isinstance(first.get("parts"), list) and len(first["parts"]) > 0 + elif event.attributes: + # Check workflow events in response generation span + event_content = event.attributes.get("gen_ai.event.content") + if not isinstance(event_content, str) or not event_content.strip(): + continue + try: + data = json.loads(event_content) + except Exception: + continue + if isinstance(data, list) and any(entry.get("role") == "workflow" for entry in data): + checkWorkflowEventContents(event_content, False) + else: + assert False, f"Unexpected event name in responses span: {event.name}" + + # Check list_conversation_items span + list_spans = self.exporter.get_spans_by_name("list_conversation_items") + assert len(list_spans) == 1, "Should have one list_conversation_items span" + list_span = list_spans[0] + + for event in list_span.events: + if event.name == "gen_ai.conversation.item": + attrs = event.attributes + assert attrs is not None and isinstance(attrs, Mapping) + content = attrs.get("gen_ai.event.content") + assert isinstance(content, str) and content.strip() != "" + data = json.loads(content) + assert isinstance(data, list) + for item in data: + if item.get("role") == "workflow": + checkWorkflowEventContents(json.dumps([item]), False) + elif item.get("role") == "user": + checkInputMessageEventContents(json.dumps([item]), False) + else: + pass + else: + assert False, f"Unexpected event name in list_conversation_items span: {event.name}" + + # Cleanup + await openai_client.conversations.delete(conversation_id=conversation.id) + + finally: + await project_client.agents.delete_version(agent_name=workflow.name, agent_version=workflow.version) + await project_client.agents.delete_version( + agent_name=student_agent.name, agent_version=student_agent.version + ) + await project_client.agents.delete_version( + agent_name=teacher_agent.name, agent_version=teacher_agent.version + ) diff --git a/sdk/ai/azure-ai-projects/tests/test_base.py b/sdk/ai/azure-ai-projects/tests/test_base.py index 45945bc99c74..15cc8f131aae 100644 --- a/sdk/ai/azure-ai-projects/tests/test_base.py +++ b/sdk/ai/azure-ai-projects/tests/test_base.py @@ -53,6 +53,7 @@ ai_search_project_connection_id="/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/sanitized-resource-group/providers/Microsoft.CognitiveServices/accounts/sanitized-account/projects/sanitized-project/connections/sanitized-ai-search-connection", ai_search_index_name="sanitized-index-name", mcp_project_connection_id="/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/sanitized-resource-group/providers/Microsoft.CognitiveServices/accounts/sanitized-account/projects/sanitized-project/connections/sanitized-mcp-connection", + browser_automation_project_connection_id="/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/sanitized-resource-group/providers/Microsoft.CognitiveServices/accounts/sanitized-account/projects/sanitized-project/connections/sanitized-browser-automation-connection", sharepoint_project_connection_id="/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/sanitized-resource-group/providers/Microsoft.CognitiveServices/accounts/sanitized-account/projects/sanitized-project/connections/sanitized-sharepoint-connection", completed_oai_model_sft_fine_tuning_job_id="sanitized-ftjob-id", completed_oai_model_rft_fine_tuning_job_id="sanitized-ftjob-id",