Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

-Add `gen_ai.usage.reasoning.output_tokens` attribute to capture thinking tokens on spans/events when the experimental sem conv flag is set. Add thinking tokens to output tokens. ([#4313](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4313))
-Add `gen_ai.usage.cache_read.input_tokens` attribute to capture cached tokens on spans/events when the experimental sem conv flag is set. ([#4313](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4313))

## Version 0.7b0 (2026-02-20)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ def __init__(
self._error_type = None
self._input_tokens = 0
self._cached_tokens = 0
self._thinking_tokens = 0
self._output_tokens = 0
sem_conv_opt_in_mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
_OpenTelemetryStabilitySignalType.GEN_AI
Expand Down Expand Up @@ -633,12 +634,21 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse):
cached_tokens = _get_response_property(
response, "usage_metadata.cached_content_token_count"
)
thinking_tokens = _get_response_property(
response, "usage_metadata.thoughts_token_count"
)
if cached_tokens and isinstance(cached_tokens, int):
self._cached_tokens = cached_tokens
if input_tokens and isinstance(input_tokens, int):
self._input_tokens = input_tokens
if output_tokens and isinstance(output_tokens, int):
self._output_tokens = output_tokens
if thinking_tokens and isinstance(thinking_tokens, int):
# Pricing of tokens is the sum of output tokens and thinking tokens:
# https://ai.google.dev/gemini-api/docs/thinking#pricing
# Also the sem conv recommends combining these counts.
self._output_tokens += thinking_tokens
self._thinking_tokens = thinking_tokens

def _maybe_update_error_type(self, response: GenerateContentResponse):
if response.candidates:
Expand Down Expand Up @@ -778,6 +788,14 @@ def _maybe_log_completion_details(
event.attributes[
gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS
] = self._cached_tokens
# TODO: replace these strings with the sem conv constant in `gen_ai_attributes` once it becomes available.
span.set_attribute(
"gen_ai.usage.reasoning.output_tokens",
self._thinking_tokens,
)
event.attributes["gen_ai.usage.reasoning.output_tokens"] = (
self._thinking_tokens
)
tool_definitions = tool_definitions or []
self.completion_hook.on_completion(
inputs=input_messages,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,17 +261,25 @@ def test_generated_span_has_vertex_ai_system_when_configured(self):

def test_generated_span_counts_tokens(self):
self.configure_valid_response(
input_tokens=123, output_tokens=456, cached_tokens=50
input_tokens=123,
output_tokens=456,
cached_tokens=50,
thinking_tokens=17,
)
self.generate_content(model="gemini-2.0-flash", contents="Some input")
self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
span = self.otel.get_span_named("generate_content gemini-2.0-flash")
self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456)
self.assertEqual(
span.attributes["gen_ai.usage.output_tokens"], 456 + 17
)
# New sem conv should not appear when flag is not experimental mode..
self.assertNotIn(
"gen_ai.usage.cache_read.input_tokens", span.attributes
)
self.assertNotIn(
"gen_ai.usage.reasoning.output_tokens", span.attributes
)

@patch.dict(
"os.environ",
Expand Down Expand Up @@ -452,7 +460,9 @@ def test_new_semconv_record_completion_as_log(self):
self.setUp()
with patched_environ, patched_otel_mapping:
self.configure_valid_response(
text=output, cached_tokens=50
text=output,
cached_tokens=50,
thinking_tokens=17,
)
self.generate_content(
model="gemini-2.0-flash",
Expand All @@ -475,6 +485,16 @@ def test_new_semconv_record_completion_as_log(self):
],
50,
)
self.assertEqual(
event.attributes[
"gen_ai.usage.reasoning.output_tokens"
],
17,
)
self.assertEqual(
event.attributes["gen_ai.usage.output_tokens"],
17,
)
assert (
event.attributes[
"gcp.gen_ai.operation.config.response_schema"
Expand Down Expand Up @@ -780,7 +800,9 @@ def test_new_semconv_record_completion_in_span(self):
self.setUp()
with patched_environ, patched_otel_mapping:
self.configure_valid_response(
text="Some response content", cached_tokens=50
text="Some response content",
cached_tokens=50,
thinking_tokens=19,
)
self.generate_content(
model="gemini-2.0-flash",
Expand All @@ -800,6 +822,16 @@ def test_new_semconv_record_completion_in_span(self):
],
50,
)
self.assertEqual(
span.attributes[
"gen_ai.usage.reasoning.output_tokens"
],
19,
)
self.assertEqual(
span.attributes["gen_ai.usage.output_tokens"],
19,
)
if mode in [
ContentCapturingMode.SPAN_ONLY,
ContentCapturingMode.SPAN_AND_EVENT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def create_response(
candidates: Optional[list[genai_types.Candidate]] = None,
text: Optional[str] = None,
input_tokens: Optional[int] = None,
thinking_tokens: Optional[int] = None,
output_tokens: Optional[int] = None,
cached_tokens: Optional[int] = None,
model_version: Optional[str] = None,
Expand Down Expand Up @@ -56,6 +57,8 @@ def create_response(
usage_metadata.candidates_token_count = output_tokens
if cached_tokens is not None:
usage_metadata.cached_content_token_count = cached_tokens
if thinking_tokens is not None:
usage_metadata.thoughts_token_count = thinking_tokens
return genai_types.GenerateContentResponse(
candidates=candidates,
usage_metadata=usage_metadata,
Expand Down
Loading