From 9a1912ca96b8a90c2a598c6065ee4352f258fdb0 Mon Sep 17 00:00:00 2001 From: prane-eth Date: Wed, 20 Aug 2025 15:30:38 +0530 Subject: [PATCH 1/2] Added OpenAI reasoning attributes --- .../openai/shared/chat_wrappers.py | 31 +++++ .../instrumentation/openai/utils.py | 10 +- .../openai/v1/responses_wrappers.py | 71 ++++++++++- .../test_azure/test_chat_reasoning.yaml | 81 +++++++++++++ .../test_chat/test_chat_reasoning.yaml | 109 +++++++++++++++++ .../test_responses_reasoning.yaml | 110 ++++++++++++++++++ .../tests/traces/test_azure.py | 25 ++++ .../tests/traces/test_chat.py | 25 ++++ .../tests/traces/test_responses.py | 27 +++++ .../opentelemetry/semconv_ai/__init__.py | 4 + .../opentelemetry/semconv_ai/version.py | 2 +- .../pyproject.toml | 2 +- 12 files changed, 493 insertions(+), 4 deletions(-) create mode 100644 packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_azure/test_chat_reasoning.yaml create mode 100644 packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml create mode 100644 packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py index 217df4cff0..22c0ec7232 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py @@ -285,6 +285,14 @@ async def _handle_request(span, kwargs, instance): if Config.enable_trace_context_propagation: propagate_trace_context(span, kwargs) + # Reasoning request attributes + reasoning_effort = kwargs.get("reasoning_effort") + _set_span_attribute( + span, + SpanAttributes.LLM_REQUEST_REASONING_EFFORT, + reasoning_effort or () + ) + @dont_throw def _handle_response( @@ -316,6 +324,29 @@ def _handle_response( # span attributes _set_response_attributes(span, response_dict) + # Reasoning usage attributes + usage = response_dict.get("usage") + reasoning_tokens = None + if usage: + # Try dict-style access first (common when response_dict is a dict) + try: + tokens_details = usage.get("completion_tokens_details") + except Exception: + # Fallback to attribute access for object-like usage + tokens_details = getattr(usage, "completion_tokens_details", None) + + if tokens_details: + if isinstance(tokens_details, dict): + reasoning_tokens = tokens_details.get("reasoning_tokens") + else: + reasoning_tokens = getattr(tokens_details, "reasoning_tokens", None) + + _set_span_attribute( + span, + SpanAttributes.LLM_USAGE_REASONING_TOKENS, + reasoning_tokens or 0, + ) + if should_emit_events(): if response.choices is not None: for choice in response.choices: diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py index ed3029f005..a9b83f35c2 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py @@ -5,6 +5,7 @@ import traceback from contextlib import asynccontextmanager from importlib.metadata import version +from packaging import version as pkg_version from opentelemetry import context as context_api from opentelemetry._events import EventLogger @@ -18,7 +19,14 @@ def is_openai_v1(): - return _OPENAI_VERSION >= "1.0.0" + return pkg_version.parse(_OPENAI_VERSION) >= pkg_version.parse("1.0.0") + +def is_reasoning_supported(): + # Reasoning has been introduced in OpenAI API on Dec 17, 2024 + # as per https://platform.openai.com/docs/changelog. + # The updated OpenAI library version is 1.58.0 + # as per https://pypi.org/project/openai/. + return pkg_version.parse(_OPENAI_VERSION) >= pkg_version.parse("1.58.0") def is_azure_openai(instance): diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py index 7e0a0c4cd3..a0ca5cdf74 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py @@ -132,6 +132,11 @@ class TracedData(pydantic.BaseModel): request_model: Optional[str] = pydantic.Field(default=None) response_model: Optional[str] = pydantic.Field(default=None) + # Reasoning attributes + request_reasoning_summary: Optional[str] = pydantic.Field(default=None) + request_reasoning_effort: Optional[str] = pydantic.Field(default=None) + response_reasoning_effort: Optional[str] = pydantic.Field(default=None) + responses: dict[str, TracedData] = {} @@ -197,7 +202,47 @@ def set_data_attributes(traced_response: TracedData, span: Span): SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS, usage.input_tokens_details.cached_tokens, ) - # TODO: add reasoning tokens in output token details + + # Usage - count of reasoning tokens + reasoning_tokens = None + # Try dict-style access first (common when response_dict is a dict) + try: + tokens_details = usage.get("output_tokens_details") + except Exception as e: + # Fallback to attribute access for object-like usage + tokens_details = getattr(usage, "output_tokens_details", None) + + if tokens_details: + if isinstance(tokens_details, dict): + reasoning_tokens = tokens_details.get("reasoning_tokens", None) + else: + reasoning_tokens = getattr(tokens_details, "reasoning_tokens", None) + + _set_span_attribute( + span, + SpanAttributes.LLM_USAGE_REASONING_TOKENS, + reasoning_tokens or 0, + ) + + # Reasoning attributes + # Request - reasoning summary + _set_span_attribute( + span, + f"{SpanAttributes.LLM_REQUEST_REASONING_SUMMARY}", + traced_response.request_reasoning_summary or (), + ) + # Request - reasoning effort + _set_span_attribute( + span, + f"{SpanAttributes.LLM_REQUEST_REASONING_EFFORT}", + traced_response.request_reasoning_effort or (), + ) + # Response - reasoning effort + _set_span_attribute( + span, + f"{SpanAttributes.LLM_RESPONSE_REASONING_EFFORT}", + traced_response.response_reasoning_effort or (), + ) if should_send_prompts(): prompt_index = 0 @@ -416,6 +461,12 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa "model", existing_data.get("request_model", "") ), response_model=existing_data.get("response_model", ""), + # Reasoning attributes + request_reasoning_summary=kwargs.get("reasoning", {})\ + .get("summary", existing_data.get("request_reasoning_summary")), + request_reasoning_effort=kwargs.get("reasoning", {})\ + .get("effort", existing_data.get("request_reasoning_effort")), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) except Exception: traced_data = None @@ -467,6 +518,12 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa output_text=existing_data.get("output_text", parsed_response_output_text), request_model=existing_data.get("request_model", kwargs.get("model")), response_model=existing_data.get("response_model", parsed_response.model), + # Reasoning attributes + request_reasoning_summary=kwargs.get("reasoning", {})\ + .get("summary", existing_data.get("request_reasoning_summary")), + request_reasoning_effort=kwargs.get("reasoning", {})\ + .get("effort", existing_data.get("request_reasoning_effort")), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) responses[parsed_response.id] = traced_data except Exception: @@ -518,6 +575,12 @@ async def async_responses_get_or_create_wrapper( output_text=kwargs.get("output_text", existing_data.get("output_text")), request_model=kwargs.get("model", existing_data.get("request_model")), response_model=existing_data.get("response_model"), + # Reasoning attributes + request_reasoning_summary=kwargs.get("reasoning", {})\ + .get("summary", existing_data.get("request_reasoning_summary")), + request_reasoning_effort=kwargs.get("reasoning", {})\ + .get("effort", existing_data.get("request_reasoning_effort")), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) except Exception: traced_data = None @@ -570,6 +633,12 @@ async def async_responses_get_or_create_wrapper( output_text=existing_data.get("output_text", parsed_response_output_text), request_model=existing_data.get("request_model", kwargs.get("model")), response_model=existing_data.get("response_model", parsed_response.model), + # Reasoning attributes + request_reasoning_summary=kwargs.get("reasoning", {})\ + .get("summary", existing_data.get("request_reasoning_summary")), + request_reasoning_effort=kwargs.get("reasoning", {})\ + .get("effort", existing_data.get("request_reasoning_effort")), + response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) responses[parsed_response.id] = traced_data except Exception: diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_azure/test_chat_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_azure/test_chat_reasoning.yaml new file mode 100644 index 0000000000..870ccce410 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_azure/test_chat_reasoning.yaml @@ -0,0 +1,81 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Count r''s in strawberry"}], + "model": "gpt-5-nano", "reasoning_effort": "low"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '120' + content-type: + - application/json + host: + - traceloop-stg.openai.azure.com + user-agent: + - AzureOpenAI/Python 1.99.7 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.7 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.5 + method: POST + uri: https://traceloop-stg.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-02-01 + response: + body: + string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"3","refusal":null,"role":"assistant"}}],"created":1755601034,"id":"chatcmpl-C6EJeKZdEaC0VeeKH3lWwJBjCTcpd","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":203,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":11,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":214}} + + ' + headers: + Content-Length: + - '1204' + Content-Type: + - application/json + Date: + - Tue, 19 Aug 2025 10:57:14 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + apim-request-id: + - aebd8320-f701-4e7d-801f-2955f84e3811 + azureml-model-session: + - d004-20250815200304 + x-accel-buffering: + - 'no' + x-content-type-options: + - nosniff + x-ms-deployment-name: + - gpt-5-nano + x-ms-rai-invoked: + - 'true' + x-ms-region: + - East US 2 + x-ratelimit-limit-requests: + - '100' + x-ratelimit-limit-tokens: + - '100000' + x-ratelimit-remaining-requests: + - '99' + x-ratelimit-remaining-tokens: + - '99994' + x-request-id: + - 7acf5821-70fa-4fab-b202-ba3700578d08 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml new file mode 100644 index 0000000000..1095882205 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml @@ -0,0 +1,109 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Count r''s in strawberry"}], + "model": "gpt-5-nano", "reasoning_effort": "low"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '120' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.99.7 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.7 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA3SSQY/bIBCF7/4VIy7bSvbKceTGzrHtsadVe2pWFguTQIMBwbhtusp/ryBp7FV3 + Lxz45j3mzfBcADAt2RaYUJzE6E316cPnb2P9havuT3t8iA9K0bHfONXiqf7IyqRwTz9Q0D/VvXCj + N0ja2QsWATlhcl1t2rbtN11XZzA6iSbJDp6qtrLcuqqpm7aqu6reXMXKaYGRbeF7AQDwnM/UppX4 + m20hW+WbEWPkB2TbWxEAC86kG8Zj1JG4JVbOUDhLaHPn653d2a8KAwIPCKQCItyFOzBIhCGCtrBj + kQL/9YQhnHYM3nkXdYoZYV1CVwK3Evr398sXAu6nyFNIOxmzANxaRzyrU7bHKznf0uy11VENAXl0 + NnUYyXmW6bkAeMzTmV4EZj640dNA7ojZdrW62LF5JTNsmu5KyRE3C7Duy1f8BonEtYmL+TLBhUI5 + S+dl8ElqtwDFIt3/7bzmfUmu7WGRp2/efGAGQqAnlIMPKLV4GXouC5g+7VtltznnllnE8FMLHEhj + SLuQuOeTufwlFk+RcBz22h4w+KDzh0rrLs7FXwAAAP//AwBrW0kCUgMAAA== + headers: + CF-RAY: + - 9718d3ff7b437f99-MAA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 19 Aug 2025 10:04:43 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=jX.KUMPkmrNujZSfeuVtMejn4n2PXQaJ3UfUay23P24-1755597883-1.0.1.1-9j.IZbYn18B5_zSzmKk4UiJjJCLcrFYA6ZLrF0PNLj7dxXj04b.6C_PI8gqNZTf9tUIjFc2GT4POtucEG1zuFwrpazBNp8NcBNvFuohWo.U; + path=/; expires=Tue, 19-Aug-25 10:34:43 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=AU4qexISo77wiSZaphwvH1qVqT4f4U0O.KzH8aCZHjA-1755597883835-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - user-mktczbuqo14ok5zq3zvvus0l + openai-processing-ms: + - '3082' + openai-project: + - proj_HqO8HnKp7rJsjrDN6n3Y0TPc + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '3130' + x-ratelimit-limit-requests: + - '500' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '499' + x-ratelimit-remaining-tokens: + - '199992' + x-ratelimit-reset-requests: + - 120ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_fb455d524b7f4775956fba99734cc8d9 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml new file mode 100644 index 0000000000..5ae52bacf3 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml @@ -0,0 +1,110 @@ +interactions: +- request: + body: '{"input": "Count r''s in strawberry", "model": "gpt-5-nano", "reasoning": + {"effort": "low", "summary": null}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '108' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.99.7 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.7 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.5 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA4RU246bMBB9z1cgP28qLiEm+ZXVCg0wzrprPMgebzda5d8rDCHQpuoLgnPm5nMG + f++SROhOnBPh0A/1sYLDMZdwlF1aZSfZHCp5PEKep1V1OChMO8hANipPs/xYFEq8jAWo+Ykt34uQ + 9TjhrUNg7GoYuUyW5TEtSllFzjNw8GNOS/1gkLGbkhpoPy6Ogh2nUmA8RhidIyfOiQ3GREDbe2Ld + IYM2fst6dqFlTXaD9/BVU+AhcM30gX+TTGTqFsy2XE8dmnHYy8D7cm/B0j5P83KfVvtUzirEsuKc + vO6SJEm+4/Mhr1/EVVnTjuICYFFJeZIql1JB/lTcWIKvA07ygier7eVB+dD34K5j27eI3V6ete/9 + 5d6/7fJyMrdUqjyo/JSfmkaq4v/9e/QeLrjq/g8XI9mSZbQPRdZjbcreDcEvXrJjAFhLDHcTX982 + pKHL4Kh5wsRC50QUYkFv89sSKByZ2By8157B8hQ8BsYgMYADY9BsV4JdmBZycPipKfj6vvN1VHpZ + mcFRP3DdQvuO9Qde19zDx/OsiEClyMWpDf2aVViZO6buZnOFB4V8rXWHlrXSuPkvPLpP3WLNEy46 + VBDMJKzwTA7Xp2DsB3TAIcLZj3RGo4DzaIpcD4/vlXExbr114hNdQ17zdVqXTodeLHNPQr6Tbifl + A5NYiIePgmmoV+6mCzisZ3TBtnE34im1h8bc75AQt3Q5gLab/z3LXv7GV5fIcszoXfdITDdH/fMa + KXL5jHlWePF/nb2tzsRgVnRRLTIGv3W8R4YOGMYOt93tNwAAAP//AwAoeBci1AUAAA== + headers: + CF-RAY: + - 97195f15cb219379-MAA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 19 Aug 2025 11:39:41 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=FO78f1ufF1EPp.whHPQCmvOgkOm9TdL.jyNRfcB6MDU-1755603581-1.0.1.1-ZpyFa2.gOYg_vbQ57kZR1_DapBCvV91MUGUIQytOk3F5oC6asW5NdB8gOEhtIdu.R8MmJ3nDaPKckOm3CxjO90Uo6B_5eB0Lck9zTKbWumk; + path=/; expires=Tue, 19-Aug-25 12:09:41 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=avG9i6J4upsjvaHvdMI1ayE3_sGGFBLR2j0SRzjQAqo-1755603581185-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - user-mktczbuqo14ok5zq3zvvus0l + openai-processing-ms: + - '2646' + openai-project: + - proj_HqO8HnKp7rJsjrDN6n3Y0TPc + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '2648' + x-ratelimit-limit-requests: + - '500' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '499' + x-ratelimit-remaining-tokens: + - '199772' + x-ratelimit-reset-requests: + - 120ms + x-ratelimit-reset-tokens: + - 68ms + x-request-id: + - req_f663a9d641c5ed48cd037da360350648 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py index 307bfb9c49..83cef88acd 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_azure.py @@ -1,6 +1,7 @@ import json import pytest +from opentelemetry.instrumentation.openai.utils import is_reasoning_supported from opentelemetry.sdk._logs import LogData from opentelemetry.semconv._incubating.attributes import ( event_attributes as EventAttributes, @@ -763,6 +764,30 @@ async def test_chat_async_streaming_with_events_with_no_content( assert_message_in_logs(logs[1], "gen_ai.choice", choice_event) +@pytest.mark.vcr +@pytest.mark.skipif(not is_reasoning_supported(), + reason="Reasoning is not supported in older OpenAI library versions") +def test_chat_reasoning(instrument_legacy, span_exporter, + log_exporter, azure_openai_client): + azure_openai_client.chat.completions.create( + model="gpt-5-nano", + messages=[ + { + "role": "user", + "content": "Count r's in strawberry" + } + ], + reasoning_effort="low", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + span = spans[-1] + + assert span.attributes["gen_ai.request.reasoning_effort"] == "low" + assert span.attributes["gen_ai.usage.reasoning_tokens"] > 0 + + def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict): assert log.log_record.attributes.get(EventAttributes.EVENT_NAME) == event_name assert ( diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py index 5204143a21..381847ff34 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py @@ -15,6 +15,7 @@ ) from opentelemetry.semconv_ai import SpanAttributes from opentelemetry.trace import StatusCode +from opentelemetry.instrumentation.openai.utils import is_reasoning_supported from .utils import assert_request_contains_tracecontext, spy_decorator @@ -1494,6 +1495,30 @@ def test_chat_history_message_pydantic(span_exporter, openai_client): assert second_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.2.role"] == "user" +@pytest.mark.vcr +@pytest.mark.skipif(not is_reasoning_supported(), + reason="Reasoning is not supported in older OpenAI library versions") +def test_chat_reasoning(instrument_legacy, span_exporter, + log_exporter, openai_client): + openai_client.chat.completions.create( + model="gpt-5-nano", + messages=[ + { + "role": "user", + "content": "Count r's in strawberry" + } + ], + reasoning_effort="low", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) >= 1 + span = spans[-1] + + assert span.attributes["gen_ai.request.reasoning_effort"] == "low" + assert span.attributes["gen_ai.usage.reasoning_tokens"] > 0 + + def test_chat_exception(instrument_legacy, span_exporter, openai_client): openai_client.api_key = "invalid" with pytest.raises(Exception): diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py index 83f82fe0eb..a55a9b534f 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_responses.py @@ -3,6 +3,7 @@ import pytest from openai import OpenAI +from opentelemetry.instrumentation.openai.utils import is_reasoning_supported from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter @@ -149,3 +150,29 @@ def test_responses_tool_calls(instrument_legacy, span_exporter: InMemorySpanExpo span.attributes["gen_ai.response.id"] == "resp_685ff8928dc4819aac45e085ba66838101c537ddeff5c2a2" ) + + +@pytest.mark.vcr +@pytest.mark.skipif(not is_reasoning_supported(), + reason="Reasoning is not supported in older OpenAI library versions") +def test_responses_reasoning(instrument_legacy, span_exporter: InMemorySpanExporter, + openai_client: OpenAI): + openai_client.responses.create( + model="gpt-5-nano", + input="Count r's in strawberry", + reasoning={ + "effort": "low", "summary": None + }, + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + + assert span.attributes["gen_ai.request.reasoning_effort"] == "low" + assert span.attributes["gen_ai.request.reasoning_summary"] == () + + assert span.attributes["gen_ai.response.reasoning_effort"] == "low" + assert span.attributes["gen_ai.completion.0.reasoning"] == () # reasoning summary + + assert span.attributes["gen_ai.usage.reasoning_tokens"] > 0 diff --git a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py index 8ec1011638..a080ef2d90 100644 --- a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py +++ b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/__init__.py @@ -76,11 +76,15 @@ class SpanAttributes: LLM_COMPLETIONS = "gen_ai.completion" LLM_RESPONSE_MODEL = "gen_ai.response.model" LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens" + LLM_USAGE_REASONING_TOKENS = "gen_ai.usage.reasoning_tokens" LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens" LLM_USAGE_CACHE_CREATION_INPUT_TOKENS = "gen_ai.usage.cache_creation_input_tokens" LLM_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens" LLM_TOKEN_TYPE = "gen_ai.token.type" LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA = "gen_ai.request.structured_output_schema" + LLM_REQUEST_REASONING_EFFORT = "gen_ai.request.reasoning_effort" + LLM_REQUEST_REASONING_SUMMARY = "gen_ai.request.reasoning_summary" + LLM_RESPONSE_REASONING_EFFORT = "gen_ai.response.reasoning_effort" # LLM LLM_REQUEST_TYPE = "llm.request.type" diff --git a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/version.py b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/version.py index 9b084a6099..4b2ce7df3d 100644 --- a/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/version.py +++ b/packages/opentelemetry-semantic-conventions-ai/opentelemetry/semconv_ai/version.py @@ -1 +1 @@ -__version__ = "0.4.12" +__version__ = "0.4.13" diff --git a/packages/opentelemetry-semantic-conventions-ai/pyproject.toml b/packages/opentelemetry-semantic-conventions-ai/pyproject.toml index e82f1ca69f..eb4b7546eb 100644 --- a/packages/opentelemetry-semantic-conventions-ai/pyproject.toml +++ b/packages/opentelemetry-semantic-conventions-ai/pyproject.toml @@ -8,7 +8,7 @@ show_missing = true [tool.poetry] name = "opentelemetry-semantic-conventions-ai" -version = "0.4.12" +version = "0.4.13" description = "OpenTelemetry Semantic Conventions Extension for Large Language Models" authors = [ "Gal Kleinman ", From 5a119a34cb7a2be37c4b2f419ad8c1b08832133f Mon Sep 17 00:00:00 2001 From: prane-eth Date: Fri, 22 Aug 2025 20:29:53 +0530 Subject: [PATCH 2/2] Resolved AI comments and lint issues --- .../openai/shared/chat_wrappers.py | 19 +++-- .../instrumentation/openai/utils.py | 1 + .../openai/v1/responses_wrappers.py | 75 ++++++++++++------- .../test_chat/test_chat_reasoning.yaml | 11 +-- .../test_responses_reasoning.yaml | 11 +-- 5 files changed, 67 insertions(+), 50 deletions(-) diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py index 22c0ec7232..38df9731a7 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py @@ -328,18 +328,17 @@ def _handle_response( usage = response_dict.get("usage") reasoning_tokens = None if usage: - # Try dict-style access first (common when response_dict is a dict) - try: - tokens_details = usage.get("completion_tokens_details") - except Exception: - # Fallback to attribute access for object-like usage - tokens_details = getattr(usage, "completion_tokens_details", None) + # Support both dict-style and object-style `usage` + tokens_details = ( + usage.get("completion_tokens_details") if isinstance(usage, dict) + else getattr(usage, "completion_tokens_details", None) + ) if tokens_details: - if isinstance(tokens_details, dict): - reasoning_tokens = tokens_details.get("reasoning_tokens") - else: - reasoning_tokens = getattr(tokens_details, "reasoning_tokens", None) + reasoning_tokens = ( + tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict) + else getattr(tokens_details, "reasoning_tokens", None) + ) _set_span_attribute( span, diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py index a9b83f35c2..54ce157c97 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py @@ -21,6 +21,7 @@ def is_openai_v1(): return pkg_version.parse(_OPENAI_VERSION) >= pkg_version.parse("1.0.0") + def is_reasoning_supported(): # Reasoning has been introduced in OpenAI API on Dec 17, 2024 # as per https://platform.openai.com/docs/changelog. diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py index a0ca5cdf74..e4bcd18140 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py @@ -205,18 +205,17 @@ def set_data_attributes(traced_response: TracedData, span: Span): # Usage - count of reasoning tokens reasoning_tokens = None - # Try dict-style access first (common when response_dict is a dict) - try: - tokens_details = usage.get("output_tokens_details") - except Exception as e: - # Fallback to attribute access for object-like usage - tokens_details = getattr(usage, "output_tokens_details", None) + # Support both dict-style and object-style `usage` + tokens_details = ( + usage.get("output_tokens_details") if isinstance(usage, dict) + else getattr(usage, "output_tokens_details", None) + ) if tokens_details: - if isinstance(tokens_details, dict): - reasoning_tokens = tokens_details.get("reasoning_tokens", None) - else: - reasoning_tokens = getattr(tokens_details, "reasoning_tokens", None) + reasoning_tokens = ( + tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict) + else getattr(tokens_details, "reasoning_tokens", None) + ) _set_span_attribute( span, @@ -462,10 +461,16 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa ), response_model=existing_data.get("response_model", ""), # Reasoning attributes - request_reasoning_summary=kwargs.get("reasoning", {})\ - .get("summary", existing_data.get("request_reasoning_summary")), - request_reasoning_effort=kwargs.get("reasoning", {})\ - .get("effort", existing_data.get("request_reasoning_effort")), + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) except Exception: @@ -519,10 +524,16 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa request_model=existing_data.get("request_model", kwargs.get("model")), response_model=existing_data.get("response_model", parsed_response.model), # Reasoning attributes - request_reasoning_summary=kwargs.get("reasoning", {})\ - .get("summary", existing_data.get("request_reasoning_summary")), - request_reasoning_effort=kwargs.get("reasoning", {})\ - .get("effort", existing_data.get("request_reasoning_effort")), + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) responses[parsed_response.id] = traced_data @@ -576,10 +587,16 @@ async def async_responses_get_or_create_wrapper( request_model=kwargs.get("model", existing_data.get("request_model")), response_model=existing_data.get("response_model"), # Reasoning attributes - request_reasoning_summary=kwargs.get("reasoning", {})\ - .get("summary", existing_data.get("request_reasoning_summary")), - request_reasoning_effort=kwargs.get("reasoning", {})\ - .get("effort", existing_data.get("request_reasoning_effort")), + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) except Exception: @@ -634,10 +651,16 @@ async def async_responses_get_or_create_wrapper( request_model=existing_data.get("request_model", kwargs.get("model")), response_model=existing_data.get("response_model", parsed_response.model), # Reasoning attributes - request_reasoning_summary=kwargs.get("reasoning", {})\ - .get("summary", existing_data.get("request_reasoning_summary")), - request_reasoning_effort=kwargs.get("reasoning", {})\ - .get("effort", existing_data.get("request_reasoning_effort")), + request_reasoning_summary=( + kwargs.get("reasoning", {}).get( + "summary", existing_data.get("request_reasoning_summary") + ) + ), + request_reasoning_effort=( + kwargs.get("reasoning", {}).get( + "effort", existing_data.get("request_reasoning_effort") + ) + ), response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"), ) responses[parsed_response.id] = traced_data diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml index 1095882205..cb1035e2d0 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_chat/test_chat_reasoning.yaml @@ -62,11 +62,8 @@ interactions: Server: - cloudflare Set-Cookie: - - __cf_bm=jX.KUMPkmrNujZSfeuVtMejn4n2PXQaJ3UfUay23P24-1755597883-1.0.1.1-9j.IZbYn18B5_zSzmKk4UiJjJCLcrFYA6ZLrF0PNLj7dxXj04b.6C_PI8gqNZTf9tUIjFc2GT4POtucEG1zuFwrpazBNp8NcBNvFuohWo.U; - path=/; expires=Tue, 19-Aug-25 10:34:43 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=AU4qexISo77wiSZaphwvH1qVqT4f4U0O.KzH8aCZHjA-1755597883835-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + - REDACTED + - REDACTED Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -80,11 +77,11 @@ interactions: cf-cache-status: - DYNAMIC openai-organization: - - user-mktczbuqo14ok5zq3zvvus0l + - REDACTED openai-processing-ms: - '3082' openai-project: - - proj_HqO8HnKp7rJsjrDN6n3Y0TPc + - REDACTED openai-version: - '2020-10-01' x-envoy-upstream-service-time: diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml index 5ae52bacf3..08a7a63384 100644 --- a/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/cassettes/test_responses/test_responses_reasoning.yaml @@ -65,11 +65,8 @@ interactions: Server: - cloudflare Set-Cookie: - - __cf_bm=FO78f1ufF1EPp.whHPQCmvOgkOm9TdL.jyNRfcB6MDU-1755603581-1.0.1.1-ZpyFa2.gOYg_vbQ57kZR1_DapBCvV91MUGUIQytOk3F5oC6asW5NdB8gOEhtIdu.R8MmJ3nDaPKckOm3CxjO90Uo6B_5eB0Lck9zTKbWumk; - path=/; expires=Tue, 19-Aug-25 12:09:41 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=avG9i6J4upsjvaHvdMI1ayE3_sGGFBLR2j0SRzjQAqo-1755603581185-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + - REDACTED + - REDACTED Transfer-Encoding: - chunked X-Content-Type-Options: @@ -79,11 +76,11 @@ interactions: cf-cache-status: - DYNAMIC openai-organization: - - user-mktczbuqo14ok5zq3zvvus0l + - REDACTED openai-processing-ms: - '2646' openai-project: - - proj_HqO8HnKp7rJsjrDN6n3Y0TPc + - REDACTED openai-version: - '2020-10-01' strict-transport-security: