Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -385,14 +385,12 @@ class GroqInstrumentor(BaseInstrumentor):

def __init__(
self,
enrich_token_usage: bool = False,
exception_logger=None,
use_legacy_attributes: bool = True,
get_common_metrics_attributes: Callable[[], dict] = lambda: {},
):
super().__init__()
Config.exception_logger = exception_logger
Config.enrich_token_usage = enrich_token_usage
Config.get_common_metrics_attributes = get_common_metrics_attributes
Config.use_legacy_attributes = use_legacy_attributes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@


class Config:
enrich_token_usage = False
exception_logger = None
get_common_metrics_attributes: Callable[[], dict] = lambda: {}
use_legacy_attributes = True
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def async_groq_client():

@pytest.fixture(scope="function")
def instrument_legacy(reader, tracer_provider, meter_provider):
instrumentor = GroqInstrumentor(enrich_token_usage=True)
instrumentor = GroqInstrumentor()
instrumentor.instrument(
tracer_provider=tracer_provider,
meter_provider=meter_provider,
Expand All @@ -102,7 +102,6 @@ def instrument_with_content(

instrumentor = GroqInstrumentor(
use_legacy_attributes=False,
enrich_token_usage=True,
)
instrumentor.instrument(
tracer_provider=tracer_provider,
Expand All @@ -123,7 +122,7 @@ def instrument_with_no_content(
os.environ.update({TRACELOOP_TRACE_CONTENT: "False"})

instrumentor = GroqInstrumentor(
use_legacy_attributes=False, enrich_token_usage=True
use_legacy_attributes=False
)
instrumentor.instrument(
tracer_provider=tracer_provider,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class OpenAIInstrumentor(BaseInstrumentor):
def __init__(
self,
enrich_assistant: bool = False,
enrich_token_usage: bool = False,
exception_logger=None,
get_common_metrics_attributes: Callable[[], dict] = lambda: {},
upload_base64_image: Optional[
Expand All @@ -25,7 +24,6 @@ def __init__(
):
super().__init__()
Config.enrich_assistant = enrich_assistant
Config.enrich_token_usage = enrich_token_usage
Config.exception_logger = exception_logger
Config.get_common_metrics_attributes = get_common_metrics_attributes
Config.upload_base64_image = upload_base64_image
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from opentelemetry.instrumentation.openai.utils import (
dont_throw,
is_openai_v1,
should_record_stream_token_usage,
)
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
GEN_AI_RESPONSE_ID,
Expand All @@ -24,8 +23,6 @@

_PYDANTIC_VERSION = version("pydantic")

# tiktoken encodings map for different model, key is model_name, value is tiktoken encoding
tiktoken_encodings = {}

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -355,36 +352,6 @@ def model_as_dict(model):
return model


def get_token_count_from_string(string: str, model_name: str):
if not should_record_stream_token_usage():
return None

import tiktoken

if tiktoken_encodings.get(model_name) is None:
try:
encoding = tiktoken.encoding_for_model(model_name)
except KeyError as ex:
# no such model_name in tiktoken
logger.warning(
f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
)
return None
except Exception as ex:
# Other exceptions in tiktok
logger.warning(
f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
)
return None

tiktoken_encodings[model_name] = encoding
else:
encoding = tiktoken_encodings.get(model_name)

token_count = len(encoding.encode(string))
return token_count


def _token_type(token_type: str):
if token_type == "prompt_tokens":
return "input"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@
_set_span_attribute,
_set_span_stream_usage,
_token_type,
get_token_count_from_string,
is_streaming_response,
metric_shared_attributes,
model_as_dict,
propagate_trace_context,
set_tools_attributes,
should_record_stream_token_usage,
)
from opentelemetry.instrumentation.openai.shared.config import Config
from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
Expand Down Expand Up @@ -529,48 +527,17 @@ def _set_completions(span, choices):
def _set_streaming_token_metrics(
request_kwargs, complete_response, span, token_counter, shared_attributes
):
if not should_record_stream_token_usage():
return

prompt_usage = -1
completion_usage = -1

# First, try to get usage from API response
# Use token usage from API response only
if complete_response.get("usage"):
usage = complete_response["usage"]
if usage.get("prompt_tokens"):
prompt_usage = usage["prompt_tokens"]
if usage.get("completion_tokens"):
completion_usage = usage["completion_tokens"]

# If API response doesn't have usage, fallback to tiktoken calculation
if prompt_usage == -1 or completion_usage == -1:
model_name = (
complete_response.get("model") or request_kwargs.get(
"model") or "gpt-4"
)

# Calculate prompt tokens if not available from API
if prompt_usage == -1 and request_kwargs and request_kwargs.get("messages"):
prompt_content = ""
for msg in request_kwargs.get("messages"):
if msg.get("content"):
prompt_content += msg.get("content")
if model_name and should_record_stream_token_usage():
prompt_usage = get_token_count_from_string(
prompt_content, model_name)

# Calculate completion tokens if not available from API
if completion_usage == -1 and complete_response.get("choices"):
completion_content = ""
for choice in complete_response.get("choices"):
if choice.get("message") and choice.get("message").get("content"):
completion_content += choice["message"]["content"]
if model_name and should_record_stream_token_usage():
completion_usage = get_token_count_from_string(
completion_content, model_name
)

# span record
_set_span_stream_usage(span, prompt_usage, completion_usage)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
_set_response_attributes,
_set_span_attribute,
_set_span_stream_usage,
get_token_count_from_string,
is_streaming_response,
model_as_dict,
propagate_trace_context,
should_record_stream_token_usage,
)
from opentelemetry.instrumentation.openai.shared.config import Config
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
Expand Down Expand Up @@ -231,35 +229,19 @@ def _emit_streaming_response_events(complete_response):

@dont_throw
def _set_token_usage(span, request_kwargs, complete_response):
# use tiktoken calculate token usage
if should_record_stream_token_usage():
prompt_usage = -1
completion_usage = -1
prompt_usage = -1
completion_usage = -1

# prompt_usage
if request_kwargs and request_kwargs.get("prompt"):
prompt_content = request_kwargs.get("prompt")
model_name = complete_response.get("model") or None
# Use token usage from API response only
if complete_response.get("usage"):
usage = complete_response["usage"]
if usage.get("prompt_tokens"):
prompt_usage = usage["prompt_tokens"]
if usage.get("completion_tokens"):
completion_usage = usage["completion_tokens"]

if model_name:
prompt_usage = get_token_count_from_string(prompt_content, model_name)

# completion_usage
if complete_response.get("choices"):
completion_content = ""
model_name = complete_response.get("model") or None

for choice in complete_response.get("choices"):
if choice.get("text"):
completion_content += choice.get("text")

if model_name:
completion_usage = get_token_count_from_string(
completion_content, model_name
)

# span record
_set_span_stream_usage(span, prompt_usage, completion_usage)
# span record
_set_span_stream_usage(span, prompt_usage, completion_usage)


@dont_throw
Expand All @@ -269,6 +251,11 @@ def _accumulate_streaming_response(complete_response, item):

complete_response["model"] = item.get("model")
complete_response["id"] = item.get("id")

# capture usage information from the stream chunks
if item.get("usage"):
complete_response["usage"] = item.get("usage")

for choice in item.get("choices"):
index = choice.get("index")
if len(complete_response.get("choices")) <= index:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


class Config:
enrich_token_usage = False
enrich_assistant = False
exception_logger = None
get_common_metrics_attributes: Callable[[], dict] = lambda: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ def is_metrics_enabled() -> bool:
return (os.getenv("TRACELOOP_METRICS_ENABLED") or "true").lower() == "true"


def should_record_stream_token_usage():
return Config.enrich_token_usage


def _with_image_gen_metric_wrapper(func):
def _with_metric(duration_histogram, exception_counter):
def wrapper(wrapped, instance, args, kwargs):
Expand Down
Loading