traceloop · galkleinman · Jul 17, 2025 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
diff --git a/package-lock.json b/package-lock.json
diff --git a/...try-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/__init__.py b/...try-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/__init__.py
diff --git a/...cassettes/test_openai_agents/test_recipe_workflow_agent_handoffs_with_function_tools.yaml b/...cassettes/test_openai_agents/test_recipe_workflow_agent_handoffs_with_function_tools.yaml
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/conftest.py b/packages/opentelemetry-instrumentation-openai-agents/tests/conftest.py
@@ -18,6 +18,7 @@
 
 from agents import Agent, function_tool, ModelSettings, WebSearchTool
 from pydantic import BaseModel
+from typing import List, Dict, Union
 
 pytest_plugins = []
 
@@ -45,6 +46,10 @@ def environment():
 @pytest.fixture(autouse=True)
 def clear_exporter(exporter):
     exporter.clear()
+    # Clear the global span storage between tests
+    from opentelemetry.instrumentation.openai_agents import _root_span_storage, _instrumented_tools
+    _root_span_storage.clear()
+    _instrumented_tools.clear()
 
 
 @pytest.fixture(scope="session")
@@ -136,6 +141,114 @@ class HandoffExample(BaseModel):
     return triage_agent
 
 
+@pytest.fixture(scope="session")
+def recipe_workflow_agents():
+    """Create Main Chat Agent and Recipe Editor Agent with function tools for recipe management."""
+
+    # Mock recipe data structure
+    class Recipe(BaseModel):
+        id: str
+        name: str
+        ingredients: List[str]
+        instructions: List[str]
+        prep_time: str
+        cook_time: str
+        servings: int
+
+    class SearchResponse(BaseModel):
+        status: str
+        message: str
+        recipes: Union[Dict[str, Recipe], None] = None
+        recipe_count: Union[int, None] = None
+        query: Union[str, None] = None
+
+    class EditResponse(BaseModel):
+        status: str
+        message: str
+        modified_recipe: Union[Recipe, None] = None
+        changes_made: Union[List[str], None] = None
+        original_recipe: Union[Recipe, None] = None
+
+    # Mock recipe database
+    MOCK_RECIPES = {
+        "spaghetti_carbonara": {
+            "id": "spaghetti_carbonara",
+            "name": "Spaghetti Carbonara",
+            "ingredients": ["400g spaghetti", "200g pancetta", "4 large eggs", "100g Pecorino Romano cheese"],
+            "instructions": ["Cook spaghetti", "Dice pancetta", "Whisk eggs with cheese"],
+            "prep_time": "10 minutes",
+            "cook_time": "15 minutes",
+            "servings": 4
+        }
+    }
+
+    @function_tool
+    async def search_recipes(query: str = "") -> SearchResponse:
+        """Search and browse recipes in the database."""
+        if "carbonara" in query.lower():
+            recipe_data = MOCK_RECIPES["spaghetti_carbonara"]
+            recipes_dict = {"spaghetti_carbonara": Recipe(**recipe_data)}
+            return SearchResponse(
+                status='success',
+                message=f'Found 1 recipes matching "{query}"',
+                recipes=recipes_dict,
+                recipe_count=1,
+                query=query
+            )
+        return SearchResponse(
+            status='success',
+            message='No recipes found',
+            recipes={},
+            recipe_count=0,
+            query=query
+        )
+
+    @function_tool
+    async def plan_and_apply_recipe_modifications(recipe: Recipe, modification_request: str) -> EditResponse:
+        """Plan modifications to a recipe based on user request and apply them."""
+        # Mock modification for vegetarian carbonara
+        if "vegetarian" in modification_request.lower() and "carbonara" in recipe.name.lower():
+            modified_recipe = Recipe(
+                id=recipe.id,
+                name="Vegetarian Carbonara",
+                ingredients=["400g spaghetti", "200g mushrooms", "4 large eggs", "100g Pecorino Romano cheese"],
+                instructions=["Cook spaghetti", "Sauté mushrooms", "Whisk eggs with cheese"],
+                prep_time=recipe.prep_time,
+                cook_time=recipe.cook_time,
+                servings=recipe.servings
+            )
+            return EditResponse(
+                status='success',
+                message='Successfully modified Spaghetti Carbonara to be vegetarian',
+                modified_recipe=modified_recipe,
+                changes_made=["Replaced pancetta with mushrooms"],
+                original_recipe=recipe
+            )
+
+        return EditResponse(
+            status='error',
+            message='Could not modify recipe'
+        )
+
+    # Create Recipe Editor Agent with function tools
+    recipe_editor_agent = Agent(
+        name="Recipe Editor Agent",
+        instructions="You are a recipe editor specialist. Help users search and modify recipes using your tools.",
+        model="gpt-4o",
+        tools=[search_recipes, plan_and_apply_recipe_modifications]
+    )
+
+    # Create Main Chat Agent with handoff capability
+    main_chat_agent = Agent(
+        name="Main Chat Agent",
+        instructions="You handle general conversation and route recipe tasks to the recipe editor agent.",
+        model="gpt-4o",
+        handoffs=[recipe_editor_agent]
+    )
+
+    return main_chat_agent, recipe_editor_agent
+
+
 @pytest.fixture(scope="module")
 def vcr_config():
     return {"filter_headers": ["authorization", "api-key"]}
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_openai_agents.py
@@ -98,7 +98,7 @@ def test_agent_with_function_tool_spans(exporter, function_tool_agent):
     )
     spans = exporter.get_finished_spans()
 
-    assert len(spans) == 4
+    assert len(spans) == 3
 
     agent_span = next(s for s in spans if s.name == "WeatherAgent.agent")
     tool_span = next(s for s in spans if s.name == "get_weather.tool")
@@ -270,3 +270,109 @@ def test_generate_metrics(metrics_test_context, test_agent):
 
         assert found_token_metric is True
         assert found_duration_metric is True
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_recipe_workflow_agent_handoffs_with_function_tools(exporter, recipe_workflow_agents):
+    """Test agent handoffs with function tools matching the recipe management example."""
+
+    main_chat_agent, recipe_editor_agent = recipe_workflow_agents
+
+    query = "Can you edit the carbonara recipe to be vegetarian?"
+
+    messages = [{"role": "user", "content": query}]
+    main_runner = Runner().run_streamed(starting_agent=main_chat_agent, input=messages)
+
+    handoff_info = None
+    async for event in main_runner.stream_events():
+        if event.type == "run_item_stream_event" and event.name == "handoff_occurred":
+            handoff_info = event.item.raw_item
+
+    if handoff_info and "recipe" in str(handoff_info).lower():
+        recipe_messages = [{"role": "user", "content": query}]
+        recipe_runner = Runner().run_streamed(
+            starting_agent=recipe_editor_agent, input=recipe_messages
+        )
+        async for event in recipe_runner.stream_events():
+            pass
+
+    spans = exporter.get_finished_spans()
+    non_rest_spans = [span for span in spans if not span.name.endswith("v1/responses")]
+    span_names = [span.name for span in non_rest_spans]
+
+    assert span_names.count("Main Chat Agent.agent") == 1
+    assert span_names.count("Recipe Editor Agent.agent") == 3  # 3 turns is correct behavior
+    assert span_names.count("search_recipes.tool") == 1
+    assert span_names.count("plan_and_apply_recipe_modifications.tool") == 1
+
+    assert "Main Chat Agent.agent" in span_names
+    assert "Recipe Editor Agent.agent" in span_names
+
+    assert "search_recipes.tool" in span_names
+    assert "plan_and_apply_recipe_modifications.tool" in span_names
+
+    main_chat_span = next(s for s in non_rest_spans if s.name == "Main Chat Agent.agent")
+    recipe_editor_spans = [s for s in non_rest_spans if s.name == "Recipe Editor Agent.agent"]
+    search_tool_span = next(s for s in non_rest_spans if s.name == "search_recipes.tool")
+    modify_tool_span = next(s for s in non_rest_spans if s.name == "plan_and_apply_recipe_modifications.tool")
+
+    assert main_chat_span.attributes[SpanAttributes.LLM_SYSTEM] == "openai"
+    assert main_chat_span.attributes["gen_ai.agent.name"] == "Main Chat Agent"
+    assert main_chat_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value
+
+    assert "traceloop.entity.input" in main_chat_span.attributes
+    assert "traceloop.entity.output" in main_chat_span.attributes
+
+    assert "openai.agent.handoff0" in main_chat_span.attributes
+    handoff_info = json.loads(main_chat_span.attributes["openai.agent.handoff0"])
+    assert handoff_info["name"] == "Recipe Editor Agent"
+
+    recipe_editor_span = recipe_editor_spans[0]
+    assert recipe_editor_span.attributes[SpanAttributes.LLM_SYSTEM] == "openai"
+    assert recipe_editor_span.attributes["gen_ai.agent.name"] == "Recipe Editor Agent"
+    assert recipe_editor_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value
+
+    assert "traceloop.entity.input" in recipe_editor_span.attributes
+    assert "traceloop.entity.output" in recipe_editor_span.attributes
+
+    assert search_tool_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.TOOL.value
+    assert search_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.name"] == "search_recipes"
+    assert search_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.type"] == "FunctionTool"
+
+    assert "traceloop.entity.input" in search_tool_span.attributes
+    assert "traceloop.entity.output" in search_tool_span.attributes
+
+    assert modify_tool_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.TOOL.value
+    assert modify_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.name"] == "plan_and_apply_recipe_modifications"
+    assert modify_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.type"] == "FunctionTool"
+
+    assert "traceloop.entity.input" in modify_tool_span.attributes
+    assert "traceloop.entity.output" in modify_tool_span.attributes
+
+    assert main_chat_span.parent is None
+
+    assert search_tool_span.parent is not None
+    assert modify_tool_span.parent is not None
+
+    assert main_chat_span.status.status_code == StatusCode.OK
+    for span in recipe_editor_spans:
+        assert span.status.status_code == StatusCode.OK
+    assert search_tool_span.status.status_code == StatusCode.OK
+    assert modify_tool_span.status.status_code == StatusCode.OK
+
+    main_trace_id = main_chat_span.get_span_context().trace_id
+    all_trace_ids = {main_trace_id}
+
+    for span in recipe_editor_spans:
+        span_trace_id = span.get_span_context().trace_id
+        assert span_trace_id == main_trace_id
+        all_trace_ids.add(span_trace_id)
+
+    assert search_tool_span.get_span_context().trace_id == main_trace_id
+    all_trace_ids.add(search_tool_span.get_span_context().trace_id)
+
+    assert modify_tool_span.get_span_context().trace_id == main_trace_id
+    all_trace_ids.add(modify_tool_span.get_span_context().trace_id)
+
+    assert len(all_trace_ids) == 1