diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md index bdc05dc98b..a237a68be8 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +-Add `gen_ai.usage.reasoning.output_tokens` attribute to capture thinking tokens on spans/events when the experimental sem conv flag is set. Add thinking tokens to output tokens. ([#4313](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4313)) -Add `gen_ai.usage.cache_read.input_tokens` attribute to capture cached tokens on spans/events when the experimental sem conv flag is set. ([#4313](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4313)) ## Version 0.7b0 (2026-02-20) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index 874bcb8144..302fba0280 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -525,6 +525,7 @@ def __init__( self._error_type = None self._input_tokens = 0 self._cached_tokens = 0 + self._thinking_tokens = 0 self._output_tokens = 0 sem_conv_opt_in_mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode( _OpenTelemetryStabilitySignalType.GEN_AI @@ -633,12 +634,21 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse): cached_tokens = _get_response_property( response, "usage_metadata.cached_content_token_count" ) + thinking_tokens = _get_response_property( + response, "usage_metadata.thoughts_token_count" + ) if cached_tokens and isinstance(cached_tokens, int): self._cached_tokens = cached_tokens if input_tokens and isinstance(input_tokens, int): self._input_tokens = input_tokens if output_tokens and isinstance(output_tokens, int): self._output_tokens = output_tokens + if thinking_tokens and isinstance(thinking_tokens, int): + # Pricing of tokens is the sum of output tokens and thinking tokens: + # https://ai.google.dev/gemini-api/docs/thinking#pricing + # Also the sem conv recommends combining these counts. + self._output_tokens += thinking_tokens + self._thinking_tokens = thinking_tokens def _maybe_update_error_type(self, response: GenerateContentResponse): if response.candidates: @@ -778,6 +788,14 @@ def _maybe_log_completion_details( event.attributes[ gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS ] = self._cached_tokens + # TODO: replace these strings with the sem conv constant in `gen_ai_attributes` once it becomes available. + span.set_attribute( + "gen_ai.usage.reasoning.output_tokens", + self._thinking_tokens, + ) + event.attributes["gen_ai.usage.reasoning.output_tokens"] = ( + self._thinking_tokens + ) tool_definitions = tool_definitions or [] self.completion_hook.on_completion( inputs=input_messages, diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py index 00596d3ce4..8d22b7ebe9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py @@ -261,17 +261,25 @@ def test_generated_span_has_vertex_ai_system_when_configured(self): def test_generated_span_counts_tokens(self): self.configure_valid_response( - input_tokens=123, output_tokens=456, cached_tokens=50 + input_tokens=123, + output_tokens=456, + cached_tokens=50, + thinking_tokens=17, ) self.generate_content(model="gemini-2.0-flash", contents="Some input") self.otel.assert_has_span_named("generate_content gemini-2.0-flash") span = self.otel.get_span_named("generate_content gemini-2.0-flash") self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123) - self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456) + self.assertEqual( + span.attributes["gen_ai.usage.output_tokens"], 456 + 17 + ) # New sem conv should not appear when flag is not experimental mode.. self.assertNotIn( "gen_ai.usage.cache_read.input_tokens", span.attributes ) + self.assertNotIn( + "gen_ai.usage.reasoning.output_tokens", span.attributes + ) @patch.dict( "os.environ", @@ -452,7 +460,9 @@ def test_new_semconv_record_completion_as_log(self): self.setUp() with patched_environ, patched_otel_mapping: self.configure_valid_response( - text=output, cached_tokens=50 + text=output, + cached_tokens=50, + thinking_tokens=17, ) self.generate_content( model="gemini-2.0-flash", @@ -475,6 +485,16 @@ def test_new_semconv_record_completion_as_log(self): ], 50, ) + self.assertEqual( + event.attributes[ + "gen_ai.usage.reasoning.output_tokens" + ], + 17, + ) + self.assertEqual( + event.attributes["gen_ai.usage.output_tokens"], + 17, + ) assert ( event.attributes[ "gcp.gen_ai.operation.config.response_schema" @@ -780,7 +800,9 @@ def test_new_semconv_record_completion_in_span(self): self.setUp() with patched_environ, patched_otel_mapping: self.configure_valid_response( - text="Some response content", cached_tokens=50 + text="Some response content", + cached_tokens=50, + thinking_tokens=19, ) self.generate_content( model="gemini-2.0-flash", @@ -800,6 +822,16 @@ def test_new_semconv_record_completion_in_span(self): ], 50, ) + self.assertEqual( + span.attributes[ + "gen_ai.usage.reasoning.output_tokens" + ], + 19, + ) + self.assertEqual( + span.attributes["gen_ai.usage.output_tokens"], + 19, + ) if mode in [ ContentCapturingMode.SPAN_ONLY, ContentCapturingMode.SPAN_AND_EVENT, diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py index 3c3f81c646..1373577cca 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py @@ -25,6 +25,7 @@ def create_response( candidates: Optional[list[genai_types.Candidate]] = None, text: Optional[str] = None, input_tokens: Optional[int] = None, + thinking_tokens: Optional[int] = None, output_tokens: Optional[int] = None, cached_tokens: Optional[int] = None, model_version: Optional[str] = None, @@ -56,6 +57,8 @@ def create_response( usage_metadata.candidates_token_count = output_tokens if cached_tokens is not None: usage_metadata.cached_content_token_count = cached_tokens + if thinking_tokens is not None: + usage_metadata.thoughts_token_count = thinking_tokens return genai_types.GenerateContentResponse( candidates=candidates, usage_metadata=usage_metadata,