Skip to content
Merged
11 changes: 3 additions & 8 deletions python/packages/core/agent_framework/_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,9 +651,7 @@ def _validate_compatibility(compatibility: str | None) -> None:
ValueError: If the value exceeds the maximum allowed length.
"""
if compatibility is not None and len(compatibility) > MAX_COMPATIBILITY_LENGTH:
raise ValueError(
f"Skill compatibility must be {MAX_COMPATIBILITY_LENGTH} characters or fewer."
)
raise ValueError(f"Skill compatibility must be {MAX_COMPATIBILITY_LENGTH} characters or fewer.")


def _build_skill_content(
Expand Down Expand Up @@ -733,6 +731,7 @@ class InlineSkill(Skill):
instructions="Use this skill for DB tasks.",
)


@skill.resource
def get_schema() -> str:
return "CREATE TABLE ..."
Expand Down Expand Up @@ -2613,11 +2612,7 @@ def _validate_and_normalize_directory_names(

# Reject absolute paths (check both POSIX and Windows-style roots
# so validation is consistent regardless of the host OS)
if (
os.path.isabs(directory)
or normalized.startswith("/")
or re.match(r"^[A-Za-z]:[/\\]", directory)
):
if os.path.isabs(directory) or normalized.startswith("/") or re.match(r"^[A-Za-z]:[/\\]", directory):
logger.warning(
"Skipping directory '%s': absolute paths are not allowed.",
directory,
Expand Down
14 changes: 14 additions & 0 deletions python/packages/core/agent_framework/observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@
INNER_RESPONSE_ID_CAPTURED_FIELD: Final[str] = "response_id"
INNER_USAGE_CAPTURED_FIELD: Final[str] = "usage"

# Response header set by Azure OpenAI naming the model that actually served the
# request (which can differ from the deployment alias the caller sent). Chat
# clients may surface this on ``ChatResponse.additional_properties`` so the
# telemetry layer can promote it to ``gen_ai.response.model``.
AZURE_OPENAI_SERVED_MODEL_HEADER: Final[str] = "x-ms-served-model"

# Tracks accumulated token usage from all inner chat completion spans within an agent invoke.
INNER_ACCUMULATED_USAGE: Final[contextvars.ContextVar[UsageDetails | None]] = contextvars.ContextVar(
"inner_accumulated_usage", default=None
Expand Down Expand Up @@ -2125,6 +2131,14 @@ def _get_response_attributes(
attributes[OtelAttr.FINISH_REASONS] = json.dumps([finish_reason])
if model := getattr(response, "model", None):
Comment thread
eavanvalkenburg marked this conversation as resolved.
attributes[OtelAttr.RESPONSE_MODEL] = model
# If the underlying provider reports the actually served model via the
# ``x-ms-served-model`` response header (Azure OpenAI), prefer it over the
# model reported on the response body for the response model attribute.
additional_properties = getattr(response, "additional_properties", None)
if isinstance(additional_properties, Mapping):
candidate = cast("Mapping[str, Any]", additional_properties).get(AZURE_OPENAI_SERVED_MODEL_HEADER)
if isinstance(candidate, str) and candidate:
attributes[OtelAttr.RESPONSE_MODEL] = candidate
if capture_usage and (usage := response.usage_details):
input_tokens = usage.get("input_token_count")
if input_tokens:
Expand Down
9 changes: 7 additions & 2 deletions python/packages/core/tests/core/test_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2567,10 +2567,15 @@ def search_hotels(city: str) -> str:
responses_second.incomplete = None
responses_second.output = [responses_text_item]

def _as_raw(resp: MagicMock) -> MagicMock:
resp.parse = MagicMock(return_value=resp)
resp.headers = {}
return resp

with patch.object(
responses_client.client.responses,
responses_client.client.responses.with_raw_response,
"create",
side_effect=[responses_first, responses_second],
side_effect=[_as_raw(responses_first), _as_raw(responses_second)],
) as mock_responses_create:
responses_result = await responses_agent.run("Find me a hotel in Paris", session=session)

Expand Down
4 changes: 1 addition & 3 deletions python/packages/core/tests/core/test_mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4227,9 +4227,7 @@ async def connect(self):
self.session.call_tool = AsyncMock(
return_value=types.CallToolResult(content=[types.TextContent(type="text", text="result")])
)
self.session.list_prompts = AsyncMock(
return_value=types.ListPromptsResult(prompts=[])
)
self.session.list_prompts = AsyncMock(return_value=types.ListPromptsResult(prompts=[]))

def get_mcp_client(self) -> _AsyncGeneratorContextManager[Any, None]:
return None
Expand Down
94 changes: 94 additions & 0 deletions python/packages/core/tests/core/test_observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -1739,6 +1739,78 @@ def test_get_response_attributes_capture_response_id_false():
assert OtelAttr.RESPONSE_ID not in result


def test_get_response_attributes_served_model_overrides_response_model():
"""When the response carries the Azure ``x-ms-served-model`` header, it should override RESPONSE_MODEL."""
from unittest.mock import Mock

from agent_framework.observability import (
AZURE_OPENAI_SERVED_MODEL_HEADER,
OtelAttr,
_get_response_attributes,
)

response = Mock()
response.response_id = None
response.finish_reason = None
response.raw_representation = None
response.usage_details = None
response.model = "gpt-4"
response.additional_properties = {AZURE_OPENAI_SERVED_MODEL_HEADER: "gpt-4o-2024-08-06"}

attrs = {OtelAttr.REQUEST_MODEL: "my-deployment-alias"}
result = _get_response_attributes(attrs, response)

# REQUEST_MODEL is left untouched; RESPONSE_MODEL is overridden by the served-model header.
assert result[OtelAttr.REQUEST_MODEL] == "my-deployment-alias"
assert result[OtelAttr.RESPONSE_MODEL] == "gpt-4o-2024-08-06"


def test_get_response_attributes_no_served_model_keeps_response_model():
"""Without the served-model header RESPONSE_MODEL should reflect the response's reported model."""
from unittest.mock import Mock

from agent_framework.observability import OtelAttr, _get_response_attributes

response = Mock()
response.response_id = None
response.finish_reason = None
response.raw_representation = None
response.usage_details = None
response.model = "gpt-4"
response.additional_properties = {}

attrs = {OtelAttr.REQUEST_MODEL: "my-deployment-alias"}
result = _get_response_attributes(attrs, response)

assert result[OtelAttr.REQUEST_MODEL] == "my-deployment-alias"
assert result[OtelAttr.RESPONSE_MODEL] == "gpt-4"


def test_get_response_attributes_ignores_non_string_served_model():
"""A non-string / empty value in the served-model header should not override RESPONSE_MODEL."""
from unittest.mock import Mock

from agent_framework.observability import (
AZURE_OPENAI_SERVED_MODEL_HEADER,
OtelAttr,
_get_response_attributes,
)

response = Mock()
response.response_id = None
response.finish_reason = None
response.raw_representation = None
response.usage_details = None
response.model = "gpt-4"
response.additional_properties = {AZURE_OPENAI_SERVED_MODEL_HEADER: ""}

attrs = {OtelAttr.REQUEST_MODEL: "my-deployment-alias"}
result = _get_response_attributes(attrs, response)

assert result[OtelAttr.REQUEST_MODEL] == "my-deployment-alias"
assert result[OtelAttr.RESPONSE_MODEL] == "gpt-4"


# region Test _get_exporters_from_env


Expand Down Expand Up @@ -2481,6 +2553,28 @@ def test_capture_response(span_exporter: InMemorySpanExporter):
assert spans[0].attributes.get(OtelAttr.OUTPUT_TOKENS) == 50


def test_capture_response_does_not_update_span_name_with_request_model(span_exporter: InMemorySpanExporter):
"""_capture_response should not rename the span even when REQUEST_MODEL is set."""
from agent_framework.observability import OtelAttr, _capture_response, get_tracer

span_exporter.clear()
tracer = get_tracer()

attrs = {
OtelAttr.OPERATION: "chat",
OtelAttr.REQUEST_MODEL: "my-deployment-alias",
OtelAttr.RESPONSE_MODEL: "gpt-4o-2024-08-06",
}

with tracer.start_as_current_span("chat my-deployment-alias") as span:
_capture_response(span=span, attributes=attrs)

spans = span_exporter.get_finished_spans()
assert len(spans) == 1
assert spans[0].name == "chat my-deployment-alias"
assert spans[0].attributes.get(OtelAttr.RESPONSE_MODEL) == "gpt-4o-2024-08-06"


async def test_layer_ordering_span_sequence_with_function_calling(span_exporter: InMemorySpanExporter):
"""Test that with correct layer ordering, spans appear in the expected sequence.

Expand Down
26 changes: 21 additions & 5 deletions python/packages/foundry/tests/foundry/test_foundry_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,28 @@ async def wrapper(*args: Any, **kwargs: Any) -> Any:
return decorator


def _as_raw(mock_response: MagicMock) -> MagicMock:
"""Wrap ``mock_response`` so it looks like an OpenAI ``with_raw_response`` wrapper.

The chat client now calls ``responses.with_raw_response.{create,parse}`` and then
``.parse()`` on the returned wrapper to get the actual response payload, plus
``.headers`` to surface the ``x-ms-served-model`` Azure header.
"""
mock_response.parse = MagicMock(return_value=mock_response)
mock_response.headers = {}
return mock_response


def _make_mock_openai_client() -> MagicMock:
client = MagicMock()
client.default_headers = {}
client.responses = MagicMock()
client.responses.create = AsyncMock()
client.responses.parse = AsyncMock()
client.responses.with_raw_response = MagicMock()
client.responses.with_raw_response.create = AsyncMock()
client.responses.with_raw_response.parse = AsyncMock()
client.responses.with_raw_response.retrieve = AsyncMock()
client.files = MagicMock()
client.files.create = AsyncMock()
client.files.delete = AsyncMock()
Expand Down Expand Up @@ -470,7 +486,7 @@ async def test_content_filter_exception() -> None:
body={"error": {"code": "content_filter", "message": "Content filter error"}},
)
mock_error.code = "content_filter"
client.client.responses.create.side_effect = mock_error
client.client.responses.with_raw_response.create.side_effect = mock_error

with pytest.raises(OpenAIContentFilterException) as exc_info:
await client.get_response(messages=[Message(role="user", contents=["Test message"])])
Expand All @@ -494,7 +510,7 @@ async def test_response_format_parse_path() -> None:
mock_parsed_response.usage = None
mock_parsed_response.finish_reason = None
mock_parsed_response.conversation = None
client.client.responses.parse = AsyncMock(return_value=mock_parsed_response)
client.client.responses.with_raw_response.parse = AsyncMock(return_value=_as_raw(mock_parsed_response))

response = await client.get_response(
messages=[Message(role="user", contents=["Test message"])],
Expand Down Expand Up @@ -522,7 +538,7 @@ async def test_response_format_parse_path_with_conversation_id() -> None:
mock_parsed_response.finish_reason = None
mock_parsed_response.conversation = MagicMock()
mock_parsed_response.conversation.id = "conversation_456"
client.client.responses.parse = AsyncMock(return_value=mock_parsed_response)
client.client.responses.with_raw_response.parse = AsyncMock(return_value=_as_raw(mock_parsed_response))

response = await client.get_response(
messages=[Message(role="user", contents=["Test message"])],
Expand Down Expand Up @@ -562,7 +578,7 @@ async def test_response_format_dict_parse_path() -> None:
mock_message_item.type = "message"
mock_message_item.content = [mock_message_content]
mock_response.output = [mock_message_item]
client.client.responses.create = AsyncMock(return_value=mock_response)
client.client.responses.with_raw_response.create = AsyncMock(return_value=_as_raw(mock_response))

response = await client.get_response(
messages=[Message(role="user", contents=["Test message"])],
Expand All @@ -587,7 +603,7 @@ async def test_bad_request_error_non_content_filter() -> None:
body={"error": {"code": "invalid_request", "message": "Invalid request"}},
)
mock_error.code = "invalid_request"
client.client.responses.parse = AsyncMock(side_effect=mock_error)
client.client.responses.with_raw_response.parse = AsyncMock(side_effect=mock_error)

with pytest.raises(ChatClientException) as exc_info:
await client.get_response(
Expand Down
Loading
Loading