diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md index 4781ae1a93..4bc2789289 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +- Fix bug in how tokens are counted when using the streaming `generateContent` method. ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)). ## Version 0.6b0 (2026-01-27) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index 6d2983caf9..692f334109 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -430,9 +430,9 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse): response, "usage_metadata.candidates_token_count" ) if input_tokens and isinstance(input_tokens, int): - self._input_tokens += input_tokens + self._input_tokens = input_tokens if output_tokens and isinstance(output_tokens, int): - self._output_tokens += output_tokens + self._output_tokens = output_tokens def _maybe_update_error_type(self, response: GenerateContentResponse): if response.candidates: diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py index a0b0ec0738..9d702033bb 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py @@ -89,22 +89,18 @@ def test_handles_multiple_ressponses(self): choice_events = self.otel.get_events_named("gen_ai.choice") self.assertEqual(len(choice_events), 2) - def test_includes_token_counts_in_span_aggregated_from_responses(self): - # Configure multiple responses whose input/output tokens should be - # accumulated together when summarizing the end-to-end request. - # - # Input: 1 + 3 + 5 => 4 + 5 => 9 - # Output: 2 + 4 + 6 => 6 + 6 => 12 - self.configure_valid_response(input_tokens=1, output_tokens=2) - self.configure_valid_response(input_tokens=3, output_tokens=4) - self.configure_valid_response(input_tokens=5, output_tokens=6) + def test_includes_token_counts_in_span_not_aggregated_from_responses(self): + # Tokens should not be aggregated in streaming. Cumulative counts are returned on each response. + self.configure_valid_response(input_tokens=3, output_tokens=5) + self.configure_valid_response(input_tokens=3, output_tokens=5) + self.configure_valid_response(input_tokens=3, output_tokens=5) self.generate_content(model="gemini-2.0-flash", contents="Some input") self.otel.assert_has_span_named("generate_content gemini-2.0-flash") span = self.otel.get_span_named("generate_content gemini-2.0-flash") - self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 9) - self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 12) + self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 3) + self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 5) def test_new_semconv_log_has_extra_genai_attributes(self): patched_environ = patch.dict(