Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,14 @@ async def _handle_request(span, kwargs, instance):
if Config.enable_trace_context_propagation:
propagate_trace_context(span, kwargs)

# Reasoning request attributes
reasoning_effort = kwargs.get("reasoning_effort")
_set_span_attribute(
span,
SpanAttributes.LLM_REQUEST_REASONING_EFFORT,
reasoning_effort or ()
)

Comment thread
prane-eth marked this conversation as resolved.

@dont_throw
def _handle_response(
Expand Down Expand Up @@ -316,6 +324,28 @@ def _handle_response(
# span attributes
_set_response_attributes(span, response_dict)

# Reasoning usage attributes
usage = response_dict.get("usage")
reasoning_tokens = None
if usage:
# Support both dict-style and object-style `usage`
tokens_details = (
usage.get("completion_tokens_details") if isinstance(usage, dict)
else getattr(usage, "completion_tokens_details", None)
)

if tokens_details:
reasoning_tokens = (
tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict)
else getattr(tokens_details, "reasoning_tokens", None)
)

_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_REASONING_TOKENS,
reasoning_tokens or 0,
)

if should_emit_events():
if response.choices is not None:
for choice in response.choices:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import traceback
from contextlib import asynccontextmanager
from importlib.metadata import version
from packaging import version as pkg_version
Comment thread
prane-eth marked this conversation as resolved.

from opentelemetry import context as context_api
from opentelemetry._events import EventLogger
Expand All @@ -18,7 +19,15 @@


def is_openai_v1():
return _OPENAI_VERSION >= "1.0.0"
return pkg_version.parse(_OPENAI_VERSION) >= pkg_version.parse("1.0.0")


def is_reasoning_supported():
# Reasoning has been introduced in OpenAI API on Dec 17, 2024
# as per https://platform.openai.com/docs/changelog.
# The updated OpenAI library version is 1.58.0
# as per https://pypi.org/project/openai/.
return pkg_version.parse(_OPENAI_VERSION) >= pkg_version.parse("1.58.0")


def is_azure_openai(instance):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ class TracedData(pydantic.BaseModel):
request_model: Optional[str] = pydantic.Field(default=None)
response_model: Optional[str] = pydantic.Field(default=None)

# Reasoning attributes
request_reasoning_summary: Optional[str] = pydantic.Field(default=None)
request_reasoning_effort: Optional[str] = pydantic.Field(default=None)
response_reasoning_effort: Optional[str] = pydantic.Field(default=None)


responses: dict[str, TracedData] = {}

Expand Down Expand Up @@ -197,7 +202,46 @@ def set_data_attributes(traced_response: TracedData, span: Span):
SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
usage.input_tokens_details.cached_tokens,
)
# TODO: add reasoning tokens in output token details

# Usage - count of reasoning tokens
reasoning_tokens = None
# Support both dict-style and object-style `usage`
tokens_details = (
usage.get("output_tokens_details") if isinstance(usage, dict)
else getattr(usage, "output_tokens_details", None)
)

if tokens_details:
reasoning_tokens = (
tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict)
else getattr(tokens_details, "reasoning_tokens", None)
)

_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_REASONING_TOKENS,
reasoning_tokens or 0,
)

# Reasoning attributes
# Request - reasoning summary
_set_span_attribute(
span,
f"{SpanAttributes.LLM_REQUEST_REASONING_SUMMARY}",
traced_response.request_reasoning_summary or (),
)
# Request - reasoning effort
_set_span_attribute(
span,
f"{SpanAttributes.LLM_REQUEST_REASONING_EFFORT}",
traced_response.request_reasoning_effort or (),
)
# Response - reasoning effort
_set_span_attribute(
span,
f"{SpanAttributes.LLM_RESPONSE_REASONING_EFFORT}",
traced_response.response_reasoning_effort or (),
)
Comment thread
prane-eth marked this conversation as resolved.

if should_send_prompts():
prompt_index = 0
Expand Down Expand Up @@ -416,6 +460,18 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
"model", existing_data.get("request_model", "")
),
response_model=existing_data.get("response_model", ""),
# Reasoning attributes
request_reasoning_summary=(
kwargs.get("reasoning", {}).get(
"summary", existing_data.get("request_reasoning_summary")
)
),
request_reasoning_effort=(
kwargs.get("reasoning", {}).get(
"effort", existing_data.get("request_reasoning_effort")
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
)
except Exception:
traced_data = None
Expand Down Expand Up @@ -467,6 +523,18 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
output_text=existing_data.get("output_text", parsed_response_output_text),
request_model=existing_data.get("request_model", kwargs.get("model")),
response_model=existing_data.get("response_model", parsed_response.model),
# Reasoning attributes
request_reasoning_summary=(
kwargs.get("reasoning", {}).get(
"summary", existing_data.get("request_reasoning_summary")
)
),
request_reasoning_effort=(
kwargs.get("reasoning", {}).get(
"effort", existing_data.get("request_reasoning_effort")
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
)
responses[parsed_response.id] = traced_data
except Exception:
Expand Down Expand Up @@ -518,6 +586,18 @@ async def async_responses_get_or_create_wrapper(
output_text=kwargs.get("output_text", existing_data.get("output_text")),
request_model=kwargs.get("model", existing_data.get("request_model")),
response_model=existing_data.get("response_model"),
# Reasoning attributes
request_reasoning_summary=(
kwargs.get("reasoning", {}).get(
"summary", existing_data.get("request_reasoning_summary")
)
),
request_reasoning_effort=(
kwargs.get("reasoning", {}).get(
"effort", existing_data.get("request_reasoning_effort")
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
)
except Exception:
traced_data = None
Expand Down Expand Up @@ -570,6 +650,18 @@ async def async_responses_get_or_create_wrapper(
output_text=existing_data.get("output_text", parsed_response_output_text),
request_model=existing_data.get("request_model", kwargs.get("model")),
response_model=existing_data.get("response_model", parsed_response.model),
# Reasoning attributes
request_reasoning_summary=(
kwargs.get("reasoning", {}).get(
"summary", existing_data.get("request_reasoning_summary")
)
),
request_reasoning_effort=(
kwargs.get("reasoning", {}).get(
"effort", existing_data.get("request_reasoning_effort")
)
),
response_reasoning_effort=kwargs.get("reasoning", {}).get("effort"),
)
responses[parsed_response.id] = traced_data
except Exception:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "Count r''s in strawberry"}],
"model": "gpt-5-nano", "reasoning_effort": "low"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '120'
content-type:
- application/json
host:
- traceloop-stg.openai.azure.com
user-agent:
- AzureOpenAI/Python 1.99.7
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.99.7
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.5
method: POST
uri: https://traceloop-stg.openai.azure.com/openai/deployments/gpt-5-nano/chat/completions?api-version=2024-02-01
response:
body:
string: '{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"protected_material_code":{"filtered":false,"detected":false},"protected_material_text":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"finish_reason":"stop","index":0,"logprobs":null,"message":{"annotations":[],"content":"3","refusal":null,"role":"assistant"}}],"created":1755601034,"id":"chatcmpl-C6EJeKZdEaC0VeeKH3lWwJBjCTcpd","model":"gpt-5-nano-2025-08-07","object":"chat.completion","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"system_fingerprint":null,"usage":{"completion_tokens":203,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":192,"rejected_prediction_tokens":0},"prompt_tokens":11,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":214}}

'
headers:
Content-Length:
- '1204'
Content-Type:
- application/json
Date:
- Tue, 19 Aug 2025 10:57:14 GMT
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
apim-request-id:
- aebd8320-f701-4e7d-801f-2955f84e3811
azureml-model-session:
- d004-20250815200304
x-accel-buffering:
- 'no'
x-content-type-options:
- nosniff
x-ms-deployment-name:
- gpt-5-nano
x-ms-rai-invoked:
- 'true'
x-ms-region:
- East US 2
x-ratelimit-limit-requests:
- '100'
x-ratelimit-limit-tokens:
- '100000'
x-ratelimit-remaining-requests:
- '99'
x-ratelimit-remaining-tokens:
- '99994'
x-request-id:
- 7acf5821-70fa-4fab-b202-ba3700578d08
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "Count r''s in strawberry"}],
"model": "gpt-5-nano", "reasoning_effort": "low"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '120'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.99.7
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.99.7
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.5
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA3SSQY/bIBCF7/4VIy7bSvbKceTGzrHtsadVe2pWFguTQIMBwbhtusp/ryBp7FV3
Lxz45j3mzfBcADAt2RaYUJzE6E316cPnb2P9havuT3t8iA9K0bHfONXiqf7IyqRwTz9Q0D/VvXCj
N0ja2QsWATlhcl1t2rbtN11XZzA6iSbJDp6qtrLcuqqpm7aqu6reXMXKaYGRbeF7AQDwnM/UppX4
m20hW+WbEWPkB2TbWxEAC86kG8Zj1JG4JVbOUDhLaHPn653d2a8KAwIPCKQCItyFOzBIhCGCtrBj
kQL/9YQhnHYM3nkXdYoZYV1CVwK3Evr398sXAu6nyFNIOxmzANxaRzyrU7bHKznf0uy11VENAXl0
NnUYyXmW6bkAeMzTmV4EZj640dNA7ojZdrW62LF5JTNsmu5KyRE3C7Duy1f8BonEtYmL+TLBhUI5
S+dl8ElqtwDFIt3/7bzmfUmu7WGRp2/efGAGQqAnlIMPKLV4GXouC5g+7VtltznnllnE8FMLHEhj
SLuQuOeTufwlFk+RcBz22h4w+KDzh0rrLs7FXwAAAP//AwBrW0kCUgMAAA==
headers:
CF-RAY:
- 9718d3ff7b437f99-MAA
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Tue, 19 Aug 2025 10:04:43 GMT
Server:
- cloudflare
Set-Cookie:
- REDACTED
- REDACTED
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- REDACTED
openai-processing-ms:
- '3082'
openai-project:
- REDACTED
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '3130'
x-ratelimit-limit-requests:
- '500'
x-ratelimit-limit-tokens:
- '200000'
x-ratelimit-remaining-requests:
- '499'
x-ratelimit-remaining-tokens:
- '199992'
x-ratelimit-reset-requests:
- 120ms
x-ratelimit-reset-tokens:
- 2ms
x-request-id:
- req_fb455d524b7f4775956fba99734cc8d9
status:
code: 200
message: OK
version: 1
Loading