Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
581 changes: 324 additions & 257 deletions package-lock.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

113 changes: 113 additions & 0 deletions packages/opentelemetry-instrumentation-openai-agents/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from agents import Agent, function_tool, ModelSettings, WebSearchTool
from pydantic import BaseModel
from typing import List, Dict, Union

pytest_plugins = []

Expand Down Expand Up @@ -45,6 +46,10 @@ def environment():
@pytest.fixture(autouse=True)
def clear_exporter(exporter):
exporter.clear()
# Clear the global span storage between tests
from opentelemetry.instrumentation.openai_agents import _root_span_storage, _instrumented_tools
_root_span_storage.clear()
_instrumented_tools.clear()


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -136,6 +141,114 @@ class HandoffExample(BaseModel):
return triage_agent


@pytest.fixture(scope="session")
def recipe_workflow_agents():
"""Create Main Chat Agent and Recipe Editor Agent with function tools for recipe management."""

# Mock recipe data structure
class Recipe(BaseModel):
id: str
name: str
ingredients: List[str]
instructions: List[str]
prep_time: str
cook_time: str
servings: int

class SearchResponse(BaseModel):
status: str
message: str
recipes: Union[Dict[str, Recipe], None] = None
recipe_count: Union[int, None] = None
query: Union[str, None] = None

class EditResponse(BaseModel):
status: str
message: str
modified_recipe: Union[Recipe, None] = None
changes_made: Union[List[str], None] = None
original_recipe: Union[Recipe, None] = None

# Mock recipe database
MOCK_RECIPES = {
"spaghetti_carbonara": {
"id": "spaghetti_carbonara",
"name": "Spaghetti Carbonara",
"ingredients": ["400g spaghetti", "200g pancetta", "4 large eggs", "100g Pecorino Romano cheese"],
"instructions": ["Cook spaghetti", "Dice pancetta", "Whisk eggs with cheese"],
"prep_time": "10 minutes",
"cook_time": "15 minutes",
"servings": 4
}
}

@function_tool
async def search_recipes(query: str = "") -> SearchResponse:
"""Search and browse recipes in the database."""
if "carbonara" in query.lower():
recipe_data = MOCK_RECIPES["spaghetti_carbonara"]
recipes_dict = {"spaghetti_carbonara": Recipe(**recipe_data)}
return SearchResponse(
status='success',
message=f'Found 1 recipes matching "{query}"',
recipes=recipes_dict,
recipe_count=1,
query=query
)
return SearchResponse(
status='success',
message='No recipes found',
recipes={},
recipe_count=0,
query=query
)

@function_tool
async def plan_and_apply_recipe_modifications(recipe: Recipe, modification_request: str) -> EditResponse:
"""Plan modifications to a recipe based on user request and apply them."""
# Mock modification for vegetarian carbonara
if "vegetarian" in modification_request.lower() and "carbonara" in recipe.name.lower():
modified_recipe = Recipe(
id=recipe.id,
name="Vegetarian Carbonara",
ingredients=["400g spaghetti", "200g mushrooms", "4 large eggs", "100g Pecorino Romano cheese"],
instructions=["Cook spaghetti", "Sauté mushrooms", "Whisk eggs with cheese"],
prep_time=recipe.prep_time,
cook_time=recipe.cook_time,
servings=recipe.servings
)
return EditResponse(
status='success',
message='Successfully modified Spaghetti Carbonara to be vegetarian',
modified_recipe=modified_recipe,
changes_made=["Replaced pancetta with mushrooms"],
original_recipe=recipe
)

return EditResponse(
status='error',
message='Could not modify recipe'
)

# Create Recipe Editor Agent with function tools
recipe_editor_agent = Agent(
name="Recipe Editor Agent",
instructions="You are a recipe editor specialist. Help users search and modify recipes using your tools.",
model="gpt-4o",
tools=[search_recipes, plan_and_apply_recipe_modifications]
)

# Create Main Chat Agent with handoff capability
main_chat_agent = Agent(
name="Main Chat Agent",
instructions="You handle general conversation and route recipe tasks to the recipe editor agent.",
model="gpt-4o",
handoffs=[recipe_editor_agent]
)

return main_chat_agent, recipe_editor_agent


@pytest.fixture(scope="module")
def vcr_config():
return {"filter_headers": ["authorization", "api-key"]}
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_agent_with_function_tool_spans(exporter, function_tool_agent):
)
spans = exporter.get_finished_spans()

assert len(spans) == 4
assert len(spans) == 3

agent_span = next(s for s in spans if s.name == "WeatherAgent.agent")
tool_span = next(s for s in spans if s.name == "get_weather.tool")
Expand Down Expand Up @@ -270,3 +270,109 @@ def test_generate_metrics(metrics_test_context, test_agent):

assert found_token_metric is True
assert found_duration_metric is True


@pytest.mark.vcr
@pytest.mark.asyncio
async def test_recipe_workflow_agent_handoffs_with_function_tools(exporter, recipe_workflow_agents):
"""Test agent handoffs with function tools matching the recipe management example."""

main_chat_agent, recipe_editor_agent = recipe_workflow_agents

query = "Can you edit the carbonara recipe to be vegetarian?"

messages = [{"role": "user", "content": query}]
main_runner = Runner().run_streamed(starting_agent=main_chat_agent, input=messages)

handoff_info = None
async for event in main_runner.stream_events():
if event.type == "run_item_stream_event" and event.name == "handoff_occurred":
handoff_info = event.item.raw_item

if handoff_info and "recipe" in str(handoff_info).lower():
recipe_messages = [{"role": "user", "content": query}]
recipe_runner = Runner().run_streamed(
starting_agent=recipe_editor_agent, input=recipe_messages
)
async for event in recipe_runner.stream_events():
pass

spans = exporter.get_finished_spans()
non_rest_spans = [span for span in spans if not span.name.endswith("v1/responses")]
span_names = [span.name for span in non_rest_spans]

assert span_names.count("Main Chat Agent.agent") == 1
assert span_names.count("Recipe Editor Agent.agent") == 3 # 3 turns is correct behavior
assert span_names.count("search_recipes.tool") == 1
assert span_names.count("plan_and_apply_recipe_modifications.tool") == 1

assert "Main Chat Agent.agent" in span_names
assert "Recipe Editor Agent.agent" in span_names

assert "search_recipes.tool" in span_names
assert "plan_and_apply_recipe_modifications.tool" in span_names

main_chat_span = next(s for s in non_rest_spans if s.name == "Main Chat Agent.agent")
recipe_editor_spans = [s for s in non_rest_spans if s.name == "Recipe Editor Agent.agent"]
search_tool_span = next(s for s in non_rest_spans if s.name == "search_recipes.tool")
modify_tool_span = next(s for s in non_rest_spans if s.name == "plan_and_apply_recipe_modifications.tool")

assert main_chat_span.attributes[SpanAttributes.LLM_SYSTEM] == "openai"
assert main_chat_span.attributes["gen_ai.agent.name"] == "Main Chat Agent"
assert main_chat_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value

assert "traceloop.entity.input" in main_chat_span.attributes
assert "traceloop.entity.output" in main_chat_span.attributes

assert "openai.agent.handoff0" in main_chat_span.attributes
handoff_info = json.loads(main_chat_span.attributes["openai.agent.handoff0"])
assert handoff_info["name"] == "Recipe Editor Agent"

recipe_editor_span = recipe_editor_spans[0]
assert recipe_editor_span.attributes[SpanAttributes.LLM_SYSTEM] == "openai"
assert recipe_editor_span.attributes["gen_ai.agent.name"] == "Recipe Editor Agent"
assert recipe_editor_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.AGENT.value

assert "traceloop.entity.input" in recipe_editor_span.attributes
assert "traceloop.entity.output" in recipe_editor_span.attributes

assert search_tool_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.TOOL.value
assert search_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.name"] == "search_recipes"
assert search_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.type"] == "FunctionTool"

assert "traceloop.entity.input" in search_tool_span.attributes
assert "traceloop.entity.output" in search_tool_span.attributes

assert modify_tool_span.attributes[SpanAttributes.TRACELOOP_SPAN_KIND] == TraceloopSpanKindValues.TOOL.value
assert modify_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.name"] == "plan_and_apply_recipe_modifications"
assert modify_tool_span.attributes[f"{GEN_AI_COMPLETION}.tool.type"] == "FunctionTool"

assert "traceloop.entity.input" in modify_tool_span.attributes
assert "traceloop.entity.output" in modify_tool_span.attributes

assert main_chat_span.parent is None

assert search_tool_span.parent is not None
assert modify_tool_span.parent is not None

assert main_chat_span.status.status_code == StatusCode.OK
for span in recipe_editor_spans:
assert span.status.status_code == StatusCode.OK
assert search_tool_span.status.status_code == StatusCode.OK
assert modify_tool_span.status.status_code == StatusCode.OK

main_trace_id = main_chat_span.get_span_context().trace_id
all_trace_ids = {main_trace_id}

for span in recipe_editor_spans:
span_trace_id = span.get_span_context().trace_id
assert span_trace_id == main_trace_id
all_trace_ids.add(span_trace_id)

assert search_tool_span.get_span_context().trace_id == main_trace_id
all_trace_ids.add(search_tool_span.get_span_context().trace_id)

assert modify_tool_span.get_span_context().trace_id == main_trace_id
all_trace_ids.add(modify_tool_span.get_span_context().trace_id)

assert len(all_trace_ids) == 1
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Loading
Loading