Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

-Add `gen_ai.usage.cache_read.input_tokens` attribute to capture cached tokens on spans/events when the experimental sem conv flag is set. ([#4313](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4313))
- Include thinking tokens in `gen_ai.usage.output_tokens` ([#4206](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4206)).
- Add `gen_ai.usage.reasoning.output_tokens` to span and event attributes ([#4276](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4276)).


## Version 0.7b0 (2026-02-20)
- Fix bug in how tokens are counted when using the streaming `generateContent` method. ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@
GEN_AI_TOOL_DEFINITIONS = getattr(
gen_ai_attributes, "GEN_AI_TOOL_DEFINITIONS", "gen_ai.tool.definitions"
)
# TODO: Replace with the new semconv once it's defined.
GEN_AI_USAGE_CACHE_REASONING_OUTPUT_TOKENS = (
"gen_ai.usage.reasoning.output_tokens"
)

# Constant used to make the absence of content more understandable.
_CONTENT_ELIDED = "<elided>"
Expand Down Expand Up @@ -526,6 +530,7 @@ def __init__(
self._input_tokens = 0
self._cached_tokens = 0
self._output_tokens = 0
self._thoughts_tokens = 0
sem_conv_opt_in_mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
_OpenTelemetryStabilitySignalType.GEN_AI
)
Expand Down Expand Up @@ -627,18 +632,30 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse):
input_tokens = _get_response_property(
response, "usage_metadata.prompt_token_count"
)
output_tokens = _get_response_property(
candidates_tokens = _get_response_property(
response, "usage_metadata.candidates_token_count"
)

thoughts_tokens = _get_response_property(
response, "usage_metadata.thoughts_token_count"
)
output_tokens: int = 0
if candidates_tokens and isinstance(candidates_tokens, int):
output_tokens += candidates_tokens
if thoughts_tokens and isinstance(thoughts_tokens, int):
self._thoughts_tokens = thoughts_tokens
output_tokens += self._thoughts_tokens

cached_tokens = _get_response_property(
response, "usage_metadata.cached_content_token_count"
)
if cached_tokens and isinstance(cached_tokens, int):
self._cached_tokens = cached_tokens

if input_tokens and isinstance(input_tokens, int):
self._input_tokens = input_tokens
if output_tokens and isinstance(output_tokens, int):
self._output_tokens = output_tokens

self._output_tokens = output_tokens

def _maybe_update_error_type(self, response: GenerateContentResponse):
if response.candidates:
Expand Down Expand Up @@ -778,6 +795,12 @@ def _maybe_log_completion_details(
event.attributes[
gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS
] = self._cached_tokens
span.set_attribute(
GEN_AI_USAGE_CACHE_REASONING_OUTPUT_TOKENS, self._thoughts_tokens
)
event.attributes[GEN_AI_USAGE_CACHE_REASONING_OUTPUT_TOKENS] = (
self._thoughts_tokens
)
tool_definitions = tool_definitions or []
self.completion_hook.on_completion(
inputs=input_messages,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,17 +261,25 @@ def test_generated_span_has_vertex_ai_system_when_configured(self):

def test_generated_span_counts_tokens(self):
self.configure_valid_response(
input_tokens=123, output_tokens=456, cached_tokens=50
input_tokens=123,
candidates_tokens=456,
thoughts_tokens=789,
cached_tokens=50,
)
self.generate_content(model="gemini-2.0-flash", contents="Some input")
self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
span = self.otel.get_span_named("generate_content gemini-2.0-flash")
self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456)
self.assertEqual(
span.attributes["gen_ai.usage.output_tokens"], 456 + 789
)
# New sem conv should not appear when flag is not experimental mode..
self.assertNotIn(
"gen_ai.usage.cache_read.input_tokens", span.attributes
)
self.assertNotIn(
"gen_ai.usage.reasoning.output_tokens", span.attributes
)

@patch.dict(
"os.environ",
Expand Down Expand Up @@ -452,7 +460,7 @@ def test_new_semconv_record_completion_as_log(self):
self.setUp()
with patched_environ, patched_otel_mapping:
self.configure_valid_response(
text=output, cached_tokens=50
text=output, cached_tokens=50, thoughts_tokens=10
)
self.generate_content(
model="gemini-2.0-flash",
Expand All @@ -475,6 +483,12 @@ def test_new_semconv_record_completion_as_log(self):
],
50,
)
self.assertEqual(
event.attributes[
"gen_ai.usage.reasoning.output_tokens"
],
10,
)
assert (
event.attributes[
"gcp.gen_ai.operation.config.response_schema"
Expand Down Expand Up @@ -780,7 +794,9 @@ def test_new_semconv_record_completion_in_span(self):
self.setUp()
with patched_environ, patched_otel_mapping:
self.configure_valid_response(
text="Some response content", cached_tokens=50
text="Some response content",
cached_tokens=50,
thoughts_tokens=10,
)
self.generate_content(
model="gemini-2.0-flash",
Expand All @@ -800,6 +816,12 @@ def test_new_semconv_record_completion_in_span(self):
],
50,
)
self.assertEqual(
span.attributes[
"gen_ai.usage.reasoning.output_tokens"
],
10,
)
if mode in [
ContentCapturingMode.SPAN_ONLY,
ContentCapturingMode.SPAN_AND_EVENT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,25 @@ def test_handles_multiple_ressponses(self):

def test_includes_token_counts_in_span_not_aggregated_from_responses(self):
# Tokens should not be aggregated in streaming. Cumulative counts are returned on each response.
self.configure_valid_response(input_tokens=3, output_tokens=5)
self.configure_valid_response(input_tokens=3, output_tokens=5)
self.configure_valid_response(input_tokens=3, output_tokens=5)
self.configure_valid_response(
input_tokens=3, candidates_tokens=5, thoughts_tokens=2
)
self.configure_valid_response(
input_tokens=3, candidates_tokens=5, thoughts_tokens=2
)
self.configure_valid_response(
input_tokens=3, candidates_tokens=5, thoughts_tokens=2
)

self.generate_content(model="gemini-2.0-flash", contents="Some input")

self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
span = self.otel.get_span_named("generate_content gemini-2.0-flash")
self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 3)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 5)
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 7)
self.assertNotIn(
"gen_ai.usage.reasoning.output_tokens", span.attributes
)

def test_new_semconv_log_has_extra_genai_attributes(self):
patched_environ = patch.dict(
Expand All @@ -117,7 +126,9 @@ def test_new_semconv_log_has_extra_genai_attributes(self):
},
)
with patched_environ, patched_otel_mapping:
self.configure_valid_response(text="Yep, it works!")
self.configure_valid_response(
text="Yep, it works!", thoughts_tokens=10
)
tok = context_api.attach(
context_api.set_value(
GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY,
Expand All @@ -135,6 +146,10 @@ def test_new_semconv_log_has_extra_genai_attributes(self):
event = self.otel.get_event_named(
"gen_ai.client.inference.operation.details"
)
self.assertEqual(
event.attributes["gen_ai.usage.reasoning.output_tokens"],
10,
)
assert (
event.attributes["extra_attribute_key"]
== "extra_attribute_value"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def create_response(
candidates: Optional[list[genai_types.Candidate]] = None,
text: Optional[str] = None,
input_tokens: Optional[int] = None,
output_tokens: Optional[int] = None,
candidates_tokens: Optional[int] = None,
thoughts_tokens: Optional[int] = None,
cached_tokens: Optional[int] = None,
model_version: Optional[str] = None,
usage_metadata: Optional[
Expand All @@ -52,8 +53,10 @@ def create_response(
usage_metadata = genai_types.GenerateContentResponseUsageMetadata()
if input_tokens is not None:
usage_metadata.prompt_token_count = input_tokens
if output_tokens is not None:
usage_metadata.candidates_token_count = output_tokens
if candidates_tokens is not None:
usage_metadata.candidates_token_count = candidates_tokens
if thoughts_tokens is not None:
usage_metadata.thoughts_token_count = thoughts_tokens
if cached_tokens is not None:
usage_metadata.cached_content_token_count = cached_tokens
return genai_types.GenerateContentResponse(
Expand Down
Loading