traceloop · nirga · Jul 31, 2025 · Jul 16, 2025 · Jul 20, 2025 · Jul 20, 2025
diff --git a/packages/opentelemetry-instrumentation-groq/opentelemetry/instrumentation/groq/__init__.py b/packages/opentelemetry-instrumentation-groq/opentelemetry/instrumentation/groq/__init__.py
@@ -385,14 +385,12 @@ class GroqInstrumentor(BaseInstrumentor):
 
     def __init__(
         self,
-        enrich_token_usage: bool = False,
         exception_logger=None,
         use_legacy_attributes: bool = True,
         get_common_metrics_attributes: Callable[[], dict] = lambda: {},
     ):
         super().__init__()
         Config.exception_logger = exception_logger
-        Config.enrich_token_usage = enrich_token_usage
         Config.get_common_metrics_attributes = get_common_metrics_attributes
         Config.use_legacy_attributes = use_legacy_attributes
 

diff --git a/packages/opentelemetry-instrumentation-groq/opentelemetry/instrumentation/groq/config.py b/packages/opentelemetry-instrumentation-groq/opentelemetry/instrumentation/groq/config.py
@@ -2,7 +2,6 @@
 
 
 class Config:
-    enrich_token_usage = False
     exception_logger = None
     get_common_metrics_attributes: Callable[[], dict] = lambda: {}
     use_legacy_attributes = True
diff --git a/packages/opentelemetry-instrumentation-groq/tests/traces/conftest.py b/packages/opentelemetry-instrumentation-groq/tests/traces/conftest.py
@@ -83,7 +83,7 @@ def async_groq_client():
 
 @pytest.fixture(scope="function")
 def instrument_legacy(reader, tracer_provider, meter_provider):
-    instrumentor = GroqInstrumentor(enrich_token_usage=True)
+    instrumentor = GroqInstrumentor()
     instrumentor.instrument(
         tracer_provider=tracer_provider,
         meter_provider=meter_provider,
@@ -102,7 +102,6 @@ def instrument_with_content(
 
     instrumentor = GroqInstrumentor(
         use_legacy_attributes=False,
-        enrich_token_usage=True,
     )
     instrumentor.instrument(
         tracer_provider=tracer_provider,
@@ -123,7 +122,7 @@ def instrument_with_no_content(
     os.environ.update({TRACELOOP_TRACE_CONTENT: "False"})
 
     instrumentor = GroqInstrumentor(
-        use_legacy_attributes=False, enrich_token_usage=True
+        use_legacy_attributes=False
     )
     instrumentor.instrument(
         tracer_provider=tracer_provider,

diff --git a/...ges/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/__init__.py b/...ges/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/__init__.py
@@ -14,7 +14,6 @@ class OpenAIInstrumentor(BaseInstrumentor):
     def __init__(
         self,
         enrich_assistant: bool = False,
-        enrich_token_usage: bool = False,
         exception_logger=None,
         get_common_metrics_attributes: Callable[[], dict] = lambda: {},
         upload_base64_image: Optional[
@@ -25,7 +24,6 @@ def __init__(
     ):
         super().__init__()
         Config.enrich_assistant = enrich_assistant
-        Config.enrich_token_usage = enrich_token_usage
         Config.exception_logger = exception_logger
         Config.get_common_metrics_attributes = get_common_metrics_attributes
         Config.upload_base64_image = upload_base64_image

diff --git a/...ntelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/__init__.py b/...ntelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/__init__.py
@@ -7,7 +7,6 @@
 from opentelemetry.instrumentation.openai.utils import (
     dont_throw,
     is_openai_v1,
-    should_record_stream_token_usage,
 )
 from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
     GEN_AI_RESPONSE_ID,
@@ -24,8 +23,6 @@
 
 _PYDANTIC_VERSION = version("pydantic")
 
-# tiktoken encodings map for different model, key is model_name, value is tiktoken encoding
-tiktoken_encodings = {}
 
 logger = logging.getLogger(__name__)
 
@@ -355,36 +352,6 @@ def model_as_dict(model):
         return model
 
 
-def get_token_count_from_string(string: str, model_name: str):
-    if not should_record_stream_token_usage():
-        return None
-
-    import tiktoken
-
-    if tiktoken_encodings.get(model_name) is None:
-        try:
-            encoding = tiktoken.encoding_for_model(model_name)
-        except KeyError as ex:
-            # no such model_name in tiktoken
-            logger.warning(
-                f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
-            )
-            return None
-        except Exception as ex:
-            # Other exceptions in tiktok
-            logger.warning(
-                f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
-            )
-            return None
-
-        tiktoken_encodings[model_name] = encoding
-    else:
-        encoding = tiktoken_encodings.get(model_name)
-
-    token_count = len(encoding.encode(string))
-    return token_count
-
-
 def _token_type(token_type: str):
     if token_type == "prompt_tokens":
         return "input"

diff --git a/...metry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/...metry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py
@@ -17,13 +17,11 @@
     _set_span_attribute,
     _set_span_stream_usage,
     _token_type,
-    get_token_count_from_string,
     is_streaming_response,
     metric_shared_attributes,
     model_as_dict,
     propagate_trace_context,
     set_tools_attributes,
-    should_record_stream_token_usage,
 )
 from opentelemetry.instrumentation.openai.shared.config import Config
 from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
@@ -529,48 +527,17 @@ def _set_completions(span, choices):
 def _set_streaming_token_metrics(
     request_kwargs, complete_response, span, token_counter, shared_attributes
 ):
-    if not should_record_stream_token_usage():
-        return
-
     prompt_usage = -1
     completion_usage = -1
 
-    # First, try to get usage from API response
+    # Use token usage from API response only
     if complete_response.get("usage"):
         usage = complete_response["usage"]
         if usage.get("prompt_tokens"):
             prompt_usage = usage["prompt_tokens"]
         if usage.get("completion_tokens"):
             completion_usage = usage["completion_tokens"]
 
-    # If API response doesn't have usage, fallback to tiktoken calculation
-    if prompt_usage == -1 or completion_usage == -1:
-        model_name = (
-            complete_response.get("model") or request_kwargs.get(
-                "model") or "gpt-4"
-        )
-
-        # Calculate prompt tokens if not available from API
-        if prompt_usage == -1 and request_kwargs and request_kwargs.get("messages"):
-            prompt_content = ""
-            for msg in request_kwargs.get("messages"):
-                if msg.get("content"):
-                    prompt_content += msg.get("content")
-            if model_name and should_record_stream_token_usage():
-                prompt_usage = get_token_count_from_string(
-                    prompt_content, model_name)
-
-        # Calculate completion tokens if not available from API
-        if completion_usage == -1 and complete_response.get("choices"):
-            completion_content = ""
-            for choice in complete_response.get("choices"):
-                if choice.get("message") and choice.get("message").get("content"):
-                    completion_content += choice["message"]["content"]
-            if model_name and should_record_stream_token_usage():
-                completion_usage = get_token_count_from_string(
-                    completion_content, model_name
-                )
-
     # span record
     _set_span_stream_usage(span, prompt_usage, completion_usage)
 

diff --git a/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py b/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py
@@ -8,11 +8,9 @@
     _set_response_attributes,
     _set_span_attribute,
     _set_span_stream_usage,
-    get_token_count_from_string,
     is_streaming_response,
     model_as_dict,
     propagate_trace_context,
-    should_record_stream_token_usage,
 )
 from opentelemetry.instrumentation.openai.shared.config import Config
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
@@ -231,35 +229,19 @@ def _emit_streaming_response_events(complete_response):
 
 @dont_throw
 def _set_token_usage(span, request_kwargs, complete_response):
-    # use tiktoken calculate token usage
-    if should_record_stream_token_usage():
-        prompt_usage = -1
-        completion_usage = -1
+    prompt_usage = -1
+    completion_usage = -1
 
-        # prompt_usage
-        if request_kwargs and request_kwargs.get("prompt"):
-            prompt_content = request_kwargs.get("prompt")
-            model_name = complete_response.get("model") or None
+    # Use token usage from API response only
+    if complete_response.get("usage"):
+        usage = complete_response["usage"]
+        if usage.get("prompt_tokens"):
+            prompt_usage = usage["prompt_tokens"]
+        if usage.get("completion_tokens"):
+            completion_usage = usage["completion_tokens"]
 
-            if model_name:
-                prompt_usage = get_token_count_from_string(prompt_content, model_name)
-
-        # completion_usage
-        if complete_response.get("choices"):
-            completion_content = ""
-            model_name = complete_response.get("model") or None
-
-            for choice in complete_response.get("choices"):
-                if choice.get("text"):
-                    completion_content += choice.get("text")
-
-            if model_name:
-                completion_usage = get_token_count_from_string(
-                    completion_content, model_name
-                )
-
-        # span record
-        _set_span_stream_usage(span, prompt_usage, completion_usage)
+    # span record
+    _set_span_stream_usage(span, prompt_usage, completion_usage)
 
 
 @dont_throw
@@ -269,6 +251,11 @@ def _accumulate_streaming_response(complete_response, item):
 
     complete_response["model"] = item.get("model")
     complete_response["id"] = item.get("id")
+
+    # capture usage information from the stream chunks
+    if item.get("usage"):
+        complete_response["usage"] = item.get("usage")
+
     for choice in item.get("choices"):
         index = choice.get("index")
         if len(complete_response.get("choices")) <= index:

diff --git a/...pentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/config.py b/...pentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/config.py
@@ -4,7 +4,6 @@
 
 
 class Config:
-    enrich_token_usage = False
     enrich_assistant = False
     exception_logger = None
     get_common_metrics_attributes: Callable[[], dict] = lambda: {}

diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py
@@ -31,10 +31,6 @@ def is_metrics_enabled() -> bool:
     return (os.getenv("TRACELOOP_METRICS_ENABLED") or "true").lower() == "true"
 
 
-def should_record_stream_token_usage():
-    return Config.enrich_token_usage
-
-
 def _with_image_gen_metric_wrapper(func):
     def _with_metric(duration_histogram, exception_counter):
         def wrapper(wrapped, instance, args, kwargs):