diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py index 217df4cff0..38df9731a7 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py @@ -285,6 +285,14 @@ async def _handle_request(span, kwargs, instance): if Config.enable_trace_context_propagation: propagate_trace_context(span, kwargs) + # Reasoning request attributes + reasoning_effort = kwargs.get("reasoning_effort") + _set_span_attribute( + span, + SpanAttributes.LLM_REQUEST_REASONING_EFFORT, + reasoning_effort or () + ) + @dont_throw def _handle_response( @@ -316,6 +324,28 @@ def _handle_response( # span attributes _set_response_attributes(span, response_dict) + # Reasoning usage attributes + usage = response_dict.get("usage") + reasoning_tokens = None + if usage: + # Support both dict-style and object-style `usage` + tokens_details = ( + usage.get("completion_tokens_details") if isinstance(usage, dict) + else getattr(usage, "completion_tokens_details", None) + ) + + if tokens_details: + reasoning_tokens = ( + tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict) + else getattr(tokens_details, "reasoning_tokens", None) + ) + + _set_span_attribute( + span, + SpanAttributes.LLM_USAGE_REASONING_TOKENS, + reasoning_tokens or 0, + ) + if should_emit_events(): if response.choices is not None: for choice in response.choices: diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py index ed3029f005..54ce157c97 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py @@ -5,6 +5,7 @@ import traceback from contextlib import asynccontextmanager from importlib.metadata import version +from packaging import version as pkg_version from opentelemetry import context as context_api from opentelemetry._events import EventLogger @@ -18,7 +19,15 @@ def is_openai_v1(): - return _OPENAI_VERSION >= "1.0.0" + return pkg_version.parse(_OPENAI_VERSION) >= pkg_version.parse("1.0.0") + + +def is_reasoning_supported(): + # Reasoning has been introduced in OpenAI API on Dec 17, 2024 + # as per https://platform.openai.com/docs/changelog. + # The updated OpenAI library version is 1.58.0 + # as per https://pypi.org/project/openai/. + return pkg_version.parse(_OPENAI_VERSION) >= pkg_version.parse("1.58.0") def is_azure_openai(instance): diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py index 7e0a0c4cd3..e4bcd18140 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py @@ -132,6 +132,11 @@ class TracedData(pydantic.BaseModel): request_model: Optional[str] = pydantic.Field(default=None) response_model: Optional[str] = pydantic.Field(default=None) + # Reasoning attributes + request_reasoning_summary: Optional[str] = pydantic.Field(default=None) + request_reasoning_effort: Optional[str] = pydantic.Field(default=None) + response_reasoning_effort: Optional[str] = pydantic.Field(default=None) + responses: dict[str, TracedData] = {} @@ -197,7 +202,46 @@ def set_data_attributes(traced_response: TracedData, span: Span): SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS, usage.input_tokens_details.cached_tokens, ) - # TODO: add reasoning tokens in output token details + + # Usage - count of reasoning tokens + reasoning_tokens = None + # Support both dict-style and object-style `usage` + tokens_details = ( + usage.get("output_tokens_details") if isinstance(usage, dict) + else getattr(usage, "output_tokens_details", None) + ) + + if tokens_details: + reasoning_tokens = ( + tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict) + else getattr(tokens_details, "reasoning_tokens", None) + ) + + _set_span_attribute( + span, + SpanAttributes.LLM_USAGE_REASONING_TOKENS, + reasoning_tokens or 0, + ) + + # Reasoning attributes + # Request - reasoning summary + _set_span_attribute( + span, + f"{SpanAttributes.LLM_REQUEST_REASONING_SUMMARY}", + traced_response.request_reasoning_summary or (), + ) + # Request - reasoning effort + _set_span_attribute( + span, + f"{SpanAttributes.LLM_REQUEST_REASONING_EFFORT}", + traced_response.request_reasoning_effort or (), + ) + # Response - reasoning effort + _set_span_attribute( + span, + f"{SpanAttributes.LLM_RESPONSE_REASONING_EFFORT}", + traced_response.response_reasoning_effort or (), + ) if should_send_prompts(): prompt_index = 0 @@ -416,6 +460,18 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa "model", existing_data.get("request_model", "") ), response_model=existing_data.get("response_model", ""), + # Reasoning attributes + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) except Exception: traced_data = None @@ -467,6 +523,18 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa output_text=existing_data.get("output_text", parsed_response_output_text), request_model=existing_data.get("request_model", kwargs.get("model")), response_model=existing_data.get("response_model", parsed_response.model), + # Reasoning attributes + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) responses[parsed_response.id] = traced_data except Exception: @@ -518,6 +586,18 @@ async def async_responses_get_or_create_wrapper( output_text=kwargs.get("output_text", existing_data.get("output_text")), request_model=kwargs.get("model", existing_data.get("request_model")), response_model=existing_data.get("response_model"), + # Reasoning attributes + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) except Exception: traced_data = None @@ -570,6 +650,18 @@ async def async_responses_get_or_create_wrapper( output_text=existing_data.get("output_text", parsed_response_output_text), request_model=existing_data.get("request_model", kwargs.get("model")), response_model=existing_data.get("response_model", parsed_response.model), + # Reasoning attributes + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) responses[parsed_response.id] = traced_data except Exception: diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_azure/test_chat_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_azure/test_chat_reasoning.yaml new file mode 100644 index 0000000000..870ccce410 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_azure/test_chat_reasoning.yaml @@ -0,0 +1,81 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Count r''s in strawberry"}], + "model": "gpt-5-nano", "reasoning_effort": "low"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '120' + content-type: + - application/json + host: + - traceloop-stg.openai.azure.com + user-agent: + - AzureOpenAI/Python 1.99.7 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.7 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.5 + method: POST + uri: https://traceloop-stg.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-02-01 + response: + body: + string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"3","refusal":null,"role":"assistant"}}],"created":1755601034,"id":"chatcmpl-C6EJeKZdEaC0VeeKH3lWwJBjCTcpd","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":203,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":11,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":214}} + + ' + headers: + Content-Length: + - '1204' + Content-Type: + - application/json + Date: + - Tue, 19 Aug 2025 10:57:14 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + apim-request-id: + - aebd8320-f701-4e7d-801f-2955f84e3811 + azureml-model-session: + - d004-20250815200304 + x-accel-buffering: + - 'no' + x-content-type-options: + - nosniff + x-ms-deployment-name: + - gpt-5-nano + x-ms-rai-invoked: + - 'true' + x-ms-region: + - East US 2 + x-ratelimit-limit-requests: + - '100' + x-ratelimit-limit-tokens: + - '100000' + x-ratelimit-remaining-requests: + - '99' + x-ratelimit-remaining-tokens: + - '99994' + x-request-id: + - 7acf5821-70fa-4fab-b202-ba3700578d08 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml new file mode 100644 index 0000000000..cb1035e2d0 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml @@ -0,0 +1,106 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Count r''s in strawberry"}], + "model": "gpt-5-nano", "reasoning_effort": "low"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '120' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.99.7 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.7 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA3SSQY/bIBCF7/4VIy7bSvbKceTGzrHtsadVe2pWFguTQIMBwbhtusp/ryBp7FV3 + Lxz45j3mzfBcADAt2RaYUJzE6E316cPnb2P9havuT3t8iA9K0bHfONXiqf7IyqRwTz9Q0D/VvXCj + N0ja2QsWATlhcl1t2rbtN11XZzA6iSbJDp6qtrLcuqqpm7aqu6reXMXKaYGRbeF7AQDwnM/UppX4 + m20hW+WbEWPkB2TbWxEAC86kG8Zj1JG4JVbOUDhLaHPn653d2a8KAwIPCKQCItyFOzBIhCGCtrBj + kQL/9YQhnHYM3nkXdYoZYV1CVwK3Evr398sXAu6nyFNIOxmzANxaRzyrU7bHKznf0uy11VENAXl0 + NnUYyXmW6bkAeMzTmV4EZj640dNA7ojZdrW62LF5JTNsmu5KyRE3C7Duy1f8BonEtYmL+TLBhUI5 + S+dl8ElqtwDFIt3/7bzmfUmu7WGRp2/efGAGQqAnlIMPKLV4GXouC5g+7VtltznnllnE8FMLHEhj + SLuQuOeTufwlFk+RcBz22h4w+KDzh0rrLs7FXwAAAP//AwBrW0kCUgMAAA== + headers: + CF-RAY: + - 9718d3ff7b437f99-MAA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 19 Aug 2025 10:04:43 GMT + Server: + - cloudflare + Set-Cookie: + - REDACTED + - REDACTED + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - REDACTED + openai-processing-ms: + - '3082' + openai-project: + - REDACTED + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '3130' + x-ratelimit-limit-requests: + - '500' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '499' + x-ratelimit-remaining-tokens: + - '199992' + x-ratelimit-reset-requests: + - 120ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_fb455d524b7f4775956fba99734cc8d9 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml new file mode 100644 index 0000000000..08a7a63384 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml @@ -0,0 +1,107 @@ +interactions: +- request: + body: '{"input": "Count r''s in strawberry", "model": "gpt-5-nano", "reasoning": + {"effort": "low", "summary": null}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '108' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.99.7 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.7 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.5 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA4RU246bMBB9z1cgP28qLiEm+ZXVCg0wzrprPMgebzda5d8rDCHQpuoLgnPm5nMG + f++SROhOnBPh0A/1sYLDMZdwlF1aZSfZHCp5PEKep1V1OChMO8hANipPs/xYFEq8jAWo+Ykt34uQ + 9TjhrUNg7GoYuUyW5TEtSllFzjNw8GNOS/1gkLGbkhpoPy6Ogh2nUmA8RhidIyfOiQ3GREDbe2Ld + IYM2fst6dqFlTXaD9/BVU+AhcM30gX+TTGTqFsy2XE8dmnHYy8D7cm/B0j5P83KfVvtUzirEsuKc + vO6SJEm+4/Mhr1/EVVnTjuICYFFJeZIql1JB/lTcWIKvA07ygier7eVB+dD34K5j27eI3V6ete/9 + 5d6/7fJyMrdUqjyo/JSfmkaq4v/9e/QeLrjq/g8XI9mSZbQPRdZjbcreDcEvXrJjAFhLDHcTX982 + pKHL4Kh5wsRC50QUYkFv89sSKByZ2By8157B8hQ8BsYgMYADY9BsV4JdmBZycPipKfj6vvN1VHpZ + mcFRP3DdQvuO9Qde19zDx/OsiEClyMWpDf2aVViZO6buZnOFB4V8rXWHlrXSuPkvPLpP3WLNEy46 + VBDMJKzwTA7Xp2DsB3TAIcLZj3RGo4DzaIpcD4/vlXExbr114hNdQ17zdVqXTodeLHNPQr6Tbifl + A5NYiIePgmmoV+6mCzisZ3TBtnE34im1h8bc75AQt3Q5gLab/z3LXv7GV5fIcszoXfdITDdH/fMa + KXL5jHlWePF/nb2tzsRgVnRRLTIGv3W8R4YOGMYOt93tNwAAAP//AwAoeBci1AUAAA== + headers: + CF-RAY: + - 97195f15cb219379-MAA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 19 Aug 2025 11:39:41 GMT + Server: + - cloudflare + Set-Cookie: + - REDACTED + - REDACTED + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - REDACTED + openai-processing-ms: + - '2646' + openai-project: + - REDACTED + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '2648' + x-ratelimit-limit-requests: + - '500' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '499' + x-ratelimit-remaining-tokens: + - '199772' + x-ratelimit-reset-requests: + - 120ms + x-ratelimit-reset-tokens: + - 68ms + x-request-id: + - req_f663a9d641c5ed48cd037da360350648 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py index 307bfb9c49..83cef88acd 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py @@ -1,6 +1,7 @@ import json import pytest +from opentelemetry.instrumentation.openai.utils import is_reasoning_supported from opentelemetry.sdk._logs import LogData from opentelemetry.semconv._incubating.attributes import ( event_attributes as EventAttributes, @@ -763,6 +764,30 @@ async def test_chat_async_streaming_with_events_with_no_content( assert_message_in_logs(logs[1], "gen_ai.choice", choice_event) +@pytest.mark.vcr +@pytest.mark.skipif(not is_reasoning_supported(), + reason="Reasoning is not supported in older OpenAI library versions") +def test_chat_reasoning(instrument_legacy, span_exporter, + log_exporter, azure_openai_client): + azure_openai_client.chat.completions.create( + model="gpt-5-nano", + messages=[ + { + "role": "user", + "content": "Count r's in strawberry" + } + ], + reasoning_effort="low", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + span = spans[-1] + + assert span.attributes["gen_ai.request.reasoning_effort"] == "low" + assert span.attributes["gen_ai.usage.reasoning_tokens"] > 0 + + def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict): assert log.log_record.attributes.get(EventAttributes.EVENT_NAME) == event_name assert ( diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py index 5204143a21..381847ff34 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py @@ -15,6 +15,7 @@ ) from opentelemetry.semconv_ai import SpanAttributes from opentelemetry.trace import StatusCode +from opentelemetry.instrumentation.openai.utils import is_reasoning_supported from .utils import assert_request_contains_tracecontext, spy_decorator @@ -1494,6 +1495,30 @@ def test_chat_history_message_pydantic(span_exporter, openai_client): assert second_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.2.role"] == "user" +@pytest.mark.vcr +@pytest.mark.skipif(not is_reasoning_supported(), + reason="Reasoning is not supported in older OpenAI library versions") +def test_chat_reasoning(instrument_legacy, span_exporter, + log_exporter, openai_client): + openai_client.chat.completions.create( + model="gpt-5-nano", + messages=[ + { + "role": "user", + "content": "Count r's in strawberry" + } + ], + reasoning_effort="low", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + span = spans[-1] + + assert span.attributes["gen_ai.request.reasoning_effort"] == "low" + assert span.attributes["gen_ai.usage.reasoning_tokens"] > 0 + + def test_chat_exception(instrument_legacy, span_exporter, openai_client): openai_client.api_key = "invalid" with pytest.raises(Exception): diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py index 83f82fe0eb..a55a9b534f 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py @@ -3,6 +3,7 @@ import pytest from openai import OpenAI +from opentelemetry.instrumentation.openai.utils import is_reasoning_supported from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter @@ -149,3 +150,29 @@ def test_responses_tool_calls(instrument_legacy, span_exporter: InMemorySpanExpo span.attributes["gen_ai.response.id"] == "resp_685ff8928dc4819aac45e085ba66838101c537ddeff5c2a2" ) + + +@pytest.mark.vcr +@pytest.mark.skipif(not is_reasoning_supported(), + reason="Reasoning is not supported in older OpenAI library versions") +def test_responses_reasoning(instrument_legacy, span_exporter: InMemorySpanExporter, + openai_client: OpenAI): + openai_client.responses.create( + model="gpt-5-nano", + input="Count r's in strawberry", + reasoning={ + "effort": "low", "summary": None + }, + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + assert span.attributes["gen_ai.request.reasoning_effort"] == "low" + assert span.attributes["gen_ai.request.reasoning_summary"] == () + + assert span.attributes["gen_ai.response.reasoning_effort"] == "low" + assert span.attributes["gen_ai.completion.0.reasoning"] == () # reasoning summary + + assert span.attributes["gen_ai.usage.reasoning_tokens"] > 0