From b5db09d041bcaaddbb45edcbf2e824c892404d8b Mon Sep 17 00:00:00 2001 From: Max Deygin Date: Mon, 23 Mar 2026 13:59:29 +0200 Subject: [PATCH 01/19] feat(bedrock): migrate to semconv 0.5.0 gen_ai conventions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Bump opentelemetry-semantic-conventions-ai to >=0.5.0,<0.6.0 - GEN_AI_SYSTEM → GEN_AI_PROVIDER_NAME with GenAiSystemValues.AWS_BEDROCK - LLM_REQUEST_TYPE → GEN_AI_OPERATION_NAME with GenAiOperationNameValues - LLM_USAGE_TOTAL_TOKENS → GEN_AI_USAGE_TOTAL_TOKENS - Indexed GEN_AI_PROMPT/COMPLETION → GEN_AI_INPUT/OUTPUT_MESSAGES (JSON) - Hardcoded "AWS" → "aws.bedrock" via GenAiSystemValues enum - Add shared semconv compliance test (test_semconv.py) - Update all trace/metric tests for new attribute names and values Co-Authored-By: Claude Opus 4.6 (1M context) --- .../instrumentation/bedrock/__init__.py | 5 +- .../instrumentation/bedrock/event_emitter.py | 2 +- .../instrumentation/bedrock/guardrail.py | 7 +- .../instrumentation/bedrock/span_utils.py | 278 +++++++++--------- .../test_bedrock_guardrails_metrics.py | 4 +- .../tests/metrics/test_bedrock_metrics.py | 4 +- .../tests/test_semconv.py | 1 + .../tests/traces/test_ai21.py | 13 +- .../tests/traces/test_anthropic.py | 123 ++++---- .../tests/traces/test_cohere.py | 40 ++- .../tests/traces/test_guardrails.py | 68 +++-- .../tests/traces/test_imported_model.py | 31 +- .../tests/traces/test_meta.py | 106 ++++--- .../tests/traces/test_nova.py | 211 ++++++------- .../tests/traces/test_titan.py | 138 ++++----- 15 files changed, 541 insertions(+), 490 deletions(-) create mode 100644 packages/opentelemetry-instrumentation-bedrock/tests/test_semconv.py diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py index 885dd92150..678adf26bc 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py @@ -48,6 +48,9 @@ unwrap, ) from opentelemetry.metrics import Counter, Histogram, Meter, get_meter +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiSystemValues, +) from opentelemetry.semconv_ai import ( SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, Meters, @@ -420,7 +423,7 @@ def wrap(*args, **kwargs): def _get_vendor_model(modelId): # Docs: # https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system - provider = "AWS" + provider = GenAiSystemValues.AWS_BEDROCK.value model_vendor = "imported_model" model = modelId diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py index 28be1c5ac6..bb77c4571f 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py @@ -25,7 +25,7 @@ class Roles(Enum): """The valid roles for naming the message event.""" EVENT_ATTRIBUTES = { - GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + GenAIAttributes.GEN_AI_PROVIDER_NAME: GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value } """The attributes to be used for the event.""" diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py index 565039f6d5..1cdef39abf 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py @@ -2,6 +2,9 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiSystemValues, +) from opentelemetry.instrumentation.bedrock.span_utils import set_guardrail_attributes @@ -156,7 +159,7 @@ def guardrail_converse(span, response, vendor, model, metric_params): attrs = { "gen_ai.vendor": vendor, GenAIAttributes.GEN_AI_RESPONSE_MODEL: model, - GenAIAttributes.GEN_AI_SYSTEM: "bedrock", + GenAIAttributes.GEN_AI_PROVIDER_NAME: GenAiSystemValues.AWS_BEDROCK.value, } input_filters = None output_filters = [] @@ -185,7 +188,7 @@ def guardrail_handling(span, response_body, vendor, model, metric_params): attrs = { "gen_ai.vendor": vendor, GenAIAttributes.GEN_AI_RESPONSE_MODEL: model, - GenAIAttributes.GEN_AI_SYSTEM: "bedrock", + GenAIAttributes.GEN_AI_PROVIDER_NAME: GenAiSystemValues.AWS_BEDROCK.value, } if "amazon-bedrock-trace" in response_body: bedrock_trace = response_body["amazon-bedrock-trace"] diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py index 0edc750b4b..2065827d4f 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py @@ -10,10 +10,11 @@ from opentelemetry.semconv._incubating.attributes.aws_attributes import ( AWS_BEDROCK_GUARDRAIL_ID ) -from opentelemetry.semconv_ai import ( - LLMRequestTypeValues, - SpanAttributes, +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiOperationNameValues, + GenAiSystemValues, ) +from opentelemetry.semconv_ai import SpanAttributes PROMPT_FILTER_KEY = "prompt_filter_results" CONTENT_FILTER_KEY = "content_filter_results" @@ -37,17 +38,17 @@ def set_model_message_span_attributes(model_vendor, span, request_body): if "prompt" in request_body: _set_prompt_span_attributes(span, request_body) elif "messages" in request_body: - for idx, message in enumerate(request_body.get("messages")): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.role", - message.get("role"), - ) - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", - json.dumps(message.get("content")), - ) + input_messages = [] + for message in request_body.get("messages"): + input_messages.append({ + "role": message.get("role"), + "content": json.dumps(message.get("content")), + }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps(input_messages), + ) elif model_vendor == "ai21": _set_prompt_span_attributes(span, request_body) elif model_vendor == "meta": @@ -91,7 +92,7 @@ def set_model_span_attributes( _set_span_attribute(span, AWS_BEDROCK_GUARDRAIL_ID, _guardrail_value(kwargs)) - _set_span_attribute(span, GenAIAttributes.GEN_AI_SYSTEM, provider) + _set_span_attribute(span, GenAIAttributes.GEN_AI_PROVIDER_NAME, provider) _set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_MODEL, model) _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL, response_model) _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, response_id) @@ -133,26 +134,28 @@ def set_guardrail_attributes(span, input_filters, output_filters): if input_filters: _set_span_attribute( span, - f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}", + f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}", json.dumps(input_filters, default=str) ) if output_filters: _set_span_attribute( span, - f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}", + f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}", json.dumps(output_filters, default=str) ) def _set_prompt_span_attributes(span, request_body): _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.user", request_body.get("prompt") + span, + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps([{"role": "user", "content": request_body.get("prompt")}]), ) def _set_cohere_span_attributes(span, request_body, response_body, metric_params): _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.COMPLETION.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.TEXT_COMPLETION.value ) _set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, request_body.get("p")) _set_span_attribute( @@ -183,19 +186,24 @@ def _set_cohere_span_attributes(span, request_body, response_body, metric_params def _set_generations_span_attributes(span, response_body): - for i, generation in enumerate(response_body.get("generations")): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", - generation.get("text"), - ) + output_messages = [] + for generation in response_body.get("generations"): + output_messages.append({ + "role": "assistant", + "content": generation.get("text"), + }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps(output_messages), + ) def _set_anthropic_completion_span_attributes( span, request_body, response_body, headers, metric_params ): _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.COMPLETION.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.TEXT_COMPLETION.value ) _set_span_attribute( span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, request_body.get("top_p") @@ -250,17 +258,14 @@ def _set_anthropic_response_span_attributes(span, response_body): if response_body.get("completion") is not None: _set_span_attribute( span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - response_body.get("completion"), + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([{"role": "assistant", "content": response_body.get("completion")}]), ) elif response_body.get("content") is not None: - _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", "assistant" - ) _set_span_attribute( span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - json.dumps(response_body.get("content")), + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([{"role": "assistant", "content": json.dumps(response_body.get("content"))}]), ) @@ -268,7 +273,7 @@ def _set_anthropic_messages_span_attributes( span, request_body, response_body, headers, metric_params ): _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.CHAT.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.CHAT.value ) _set_span_attribute( span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, request_body.get("top_p") @@ -348,7 +353,7 @@ def _count_anthropic_tokens(messages: list[str]): def _set_ai21_span_attributes(span, request_body, response_body, metric_params): _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.COMPLETION.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.TEXT_COMPLETION.value ) _set_span_attribute( span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, request_body.get("topP") @@ -369,17 +374,22 @@ def _set_ai21_span_attributes(span, request_body, response_body, metric_params): def _set_span_completions_attributes(span, response_body): - for i, completion in enumerate(response_body.get("completions")): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", - completion.get("data").get("text"), - ) + output_messages = [] + for completion in response_body.get("completions"): + output_messages.append({ + "role": "assistant", + "content": completion.get("data").get("text"), + }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps(output_messages), + ) def _set_llama_span_attributes(span, request_body, response_body, metric_params): _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.COMPLETION.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.TEXT_COMPLETION.value ) _set_span_attribute( span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, request_body.get("top_p") @@ -401,36 +411,35 @@ def _set_llama_span_attributes(span, request_body, response_body, metric_params) def _set_llama_prompt_span_attributes(span, request_body): _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", request_body.get("prompt") + span, + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps([{"role": "user", "content": request_body.get("prompt")}]), ) - _set_span_attribute(span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", "user") def _set_llama_response_span_attributes(span, response_body): if response_body.get("generation"): _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", "assistant" + span, + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([{"role": "assistant", "content": response_body.get("generation")}]), ) + else: + output_messages = [] + for generation in response_body.get("generations"): + output_messages.append({"role": "assistant", "content": generation}) _set_span_attribute( span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - response_body.get("generation"), + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps(output_messages), ) - else: - for i, generation in enumerate(response_body.get("generations")): - _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role", "assistant" - ) - _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", generation - ) def _set_amazon_span_attributes( span, request_body, response_body, headers, metric_params ): _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.COMPLETION.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.TEXT_COMPLETION.value ) if "textGenerationConfig" in request_body: @@ -486,66 +495,68 @@ def _set_amazon_input_span_attributes(span, request_body): if "inputText" in request_body: _set_span_attribute( span, - f"{GenAIAttributes.GEN_AI_PROMPT}.0.user", - request_body.get("inputText"), + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps([{"role": "user", "content": request_body.get("inputText")}]), ) else: - prompt_idx = 0 + input_messages = [] if "system" in request_body: - for idx, prompt in enumerate(request_body["system"]): - prompt_idx = idx + 1 - _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.role", "system" - ) - # TODO: add support for "image" - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.content", - prompt.get("text"), - ) - for idx, prompt in enumerate(request_body["messages"]): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.{prompt_idx + idx}.role", - prompt.get("role"), - ) - # TODO: here we stringify the object, consider moving these to events or prompt.{i}.content.{j} - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.{prompt_idx + idx}.content", - json.dumps(prompt.get("content", ""), default=str), - ) + for prompt in request_body["system"]: + input_messages.append({ + "role": "system", + "content": prompt.get("text"), + }) + for prompt in request_body["messages"]: + input_messages.append({ + "role": prompt.get("role"), + "content": json.dumps(prompt.get("content", ""), default=str), + }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps(input_messages), + ) def _set_amazon_response_span_attributes(span, response_body): if "results" in response_body: - for i, result in enumerate(response_body.get("results")): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content", - result.get("outputText"), - ) + output_messages = [] + for result in response_body.get("results"): + output_messages.append({ + "role": "assistant", + "content": result.get("outputText"), + }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps(output_messages), + ) elif "outputText" in response_body: _set_span_attribute( span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - response_body.get("outputText"), + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([{"role": "assistant", "content": response_body.get("outputText")}]), ) elif "output" in response_body: msgs = response_body.get("output").get("message", {}).get("content", []) - for idx, msg in enumerate(msgs): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.{idx}.content", - msg.get("text"), - ) + output_messages = [] + for msg in msgs: + output_messages.append({ + "role": "assistant", + "content": msg.get("text"), + }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps(output_messages), + ) def _set_imported_model_span_attributes( span, request_body, response_body, metric_params ): _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.COMPLETION.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.TEXT_COMPLETION.value ) _set_span_attribute( span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, request_body.get("topP") @@ -576,14 +587,16 @@ def _set_imported_model_span_attributes( def _set_imported_model_response_span_attributes(span, response_body): _set_span_attribute( span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - response_body.get("generation"), + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([{"role": "assistant", "content": response_body.get("generation")}]), ) def _set_imported_model_prompt_span_attributes(span, request_body): _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", request_body.get("prompt") + span, + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps([{"role": "user", "content": request_body.get("prompt")}]), ) @@ -600,7 +613,7 @@ def _record_usage_to_span(span, prompt_tokens, completion_tokens, metric_params) ) _set_span_attribute( span, - SpanAttributes.LLM_USAGE_TOTAL_TOKENS, + SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS, prompt_tokens + completion_tokens, ) @@ -647,16 +660,16 @@ def _metric_shared_attributes( return { "vendor": response_vendor, GenAIAttributes.GEN_AI_RESPONSE_MODEL: response_model, - GenAIAttributes.GEN_AI_SYSTEM: "bedrock", + GenAIAttributes.GEN_AI_PROVIDER_NAME: GenAiSystemValues.AWS_BEDROCK.value, "stream": is_streaming, } def set_converse_model_span_attributes(span, provider, model, kwargs): - _set_span_attribute(span, GenAIAttributes.GEN_AI_SYSTEM, provider) + _set_span_attribute(span, GenAIAttributes.GEN_AI_PROVIDER_NAME, provider) _set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_MODEL, model) _set_span_attribute( - span, SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.CHAT.value + span, GenAIAttributes.GEN_AI_OPERATION_NAME, GenAiOperationNameValues.CHAT.value ) guardrail_config = kwargs.get("guardrailConfig") @@ -679,32 +692,24 @@ def set_converse_model_span_attributes(span, provider, model, kwargs): def set_converse_input_prompt_span_attributes(kwargs, span): if not should_send_prompts(): return - prompt_idx = 0 + input_messages = [] if "system" in kwargs: - for idx, prompt in enumerate(kwargs["system"]): - prompt_idx = idx + 1 - _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.role", "system" - ) - # TODO: add support for "image" - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.content", - prompt.get("text"), - ) + for prompt in kwargs["system"]: + input_messages.append({ + "role": "system", + "content": prompt.get("text"), + }) if "messages" in kwargs: - for idx, prompt in enumerate(kwargs["messages"]): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.{prompt_idx+idx}.role", - prompt.get("role"), - ) - # TODO: here we stringify the object, consider moving these to events or prompt.{i}.content.{j} - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.{prompt_idx+idx}.content", - json.dumps(prompt.get("content", ""), default=str), - ) + for prompt in kwargs["messages"]: + input_messages.append({ + "role": prompt.get("role"), + "content": json.dumps(prompt.get("content", ""), default=str), + }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_INPUT_MESSAGES, + json.dumps(input_messages), + ) def set_converse_response_span_attributes(response, span): @@ -712,23 +717,22 @@ def set_converse_response_span_attributes(response, span): return if "output" in response: message = response["output"]["message"] + contents = [content.get("text") for content in message["content"]] + content = contents[0] if len(contents) == 1 else json.dumps(contents) _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", message.get("role") + span, + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([{"role": message.get("role"), "content": content}]), ) - for idx, content in enumerate(message["content"]): - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.{idx}.content", - content.get("text"), - ) def set_converse_streaming_response_span_attributes(response, role, span): if not should_send_prompts(): return - _set_span_attribute(span, f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", role) _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", "".join(response) + span, + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, + json.dumps([{"role": role, "content": "".join(response)}]), ) diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py index c4797b9fe5..115a20661c 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py @@ -124,8 +124,8 @@ def assert_guardrails(reader): assert data_point.value > 0 assert ( - metric.data.data_points[0].attributes[GenAIAttributes.GEN_AI_SYSTEM] - == "bedrock" + metric.data.data_points[0].attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] + == "aws.bedrock" ) assert found_activations is True diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_metrics.py b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_metrics.py index c8686938f7..c616631c26 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_metrics.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_metrics.py @@ -63,8 +63,8 @@ def test_invoke_model_metrics(test_context, brt): ) assert ( - metric.data.data_points[0].attributes[GenAIAttributes.GEN_AI_SYSTEM] - == "bedrock" + metric.data.data_points[0].attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] + == "aws.bedrock" ) assert found_token_metric is True diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/test_semconv.py b/packages/opentelemetry-instrumentation-bedrock/tests/test_semconv.py new file mode 100644 index 0000000000..016c2aac34 --- /dev/null +++ b/packages/opentelemetry-instrumentation-bedrock/tests/test_semconv.py @@ -0,0 +1 @@ +from opentelemetry.semconv_ai._testing import * # noqa: F401, F403 diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_ai21.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_ai21.py index 4a22a80aac..a55769104a 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_ai21.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_ai21.py @@ -5,6 +5,9 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiSystemValues, +) from opentelemetry.semconv_ai import SpanAttributes @@ -47,7 +50,7 @@ def test_ai21_j2_completion_string_content( response_body.get("completions")[0].get("data").get("tokens") ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] + meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] ) @@ -100,7 +103,7 @@ def test_ai21_j2_completion_string_content_with_events_with_content( response_body.get("completions")[0].get("data").get("tokens") ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] + meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] ) @@ -159,7 +162,7 @@ def test_ai21_j2_completion_string_content_with_events_with_no_content( response_body.get("completions")[0].get("data").get("tokens") ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] + meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] ) @@ -186,8 +189,8 @@ def test_ai21_j2_completion_string_content_with_events_with_no_content( def assert_message_in_logs(log: ReadableLogRecord, event_name: str, expected_content: dict): assert log.log_record.event_name == event_name assert ( - log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM) - == GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + log.log_record.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value ) if not expected_content: diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py index 065253fcd6..4aa15e8d3b 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py @@ -6,6 +6,7 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GenAiOperationNameValues, GenAiSystemValues from opentelemetry.semconv_ai import SpanAttributes @@ -33,21 +34,23 @@ def test_anthropic_2_completion(instrument_legacy, brt, span_exporter, log_expor assert all(span.name == "bedrock.completion" for span in spans) anthropic_span = spans[0] - assert ( - anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.user"] - == "Human: Tell me a joke about opentelemetry Assistant:" + input_messages = json.loads( + anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert ( - anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content") - == completion + assert input_messages[0]["content"] == "Human: Tell me a joke about opentelemetry Assistant:" + assert input_messages[0]["role"] == "user" + + output_messages = json.loads( + anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) + assert output_messages[0]["content"] == completion assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 18 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) # Bedrock does not return the response id for claude-v2:1 assert anthropic_span.attributes.get("gen_ai.response.id") is None @@ -89,7 +92,7 @@ def test_anthropic_2_completion_with_events_with_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) # Bedrock does not return the response id for claude-v2:1 assert anthropic_span.attributes.get("gen_ai.response.id") is None @@ -145,7 +148,7 @@ def test_anthropic_2_completion_with_events_with_no_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) # Bedrock does not return the response id for claude-v2:1 assert anthropic_span.attributes.get("gen_ai.response.id") is None @@ -199,25 +202,24 @@ def test_anthropic_3_completion_complex_content( assert all(span.name == "bedrock.completion" for span in spans) anthropic_span = spans[0] - assert json.loads( - anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] - ) == [ + input_messages = json.loads( + anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + ) + assert json.loads(input_messages[0]["content"]) == [ {"type": "text", "text": "Tell me a joke about opentelemetry"}, ] - assert ( - json.loads( - anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content") - ) - == completion + output_messages = json.loads( + anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) + assert json.loads(output_messages[0]["content"]) == completion assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 16 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_01Q6Z4xmUkMigo9K4qd1fshW" @@ -269,7 +271,7 @@ def test_anthropic_3_completion_complex_content_with_events_with_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_01Q6Z4xmUkMigo9K4qd1fshW" @@ -339,7 +341,7 @@ def test_anthropic_3_completion_complex_content_with_events_with_no_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_01Q6Z4xmUkMigo9K4qd1fshW" @@ -400,15 +402,17 @@ def test_anthropic_3_completion_streaming( assert all(span.name == "bedrock.completion" for span in spans) anthropic_span = spans[0] - assert json.loads( - anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] - ) == [ + input_messages = json.loads( + anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + ) + assert json.loads(input_messages[0]["content"]) == [ {"type": "text", "text": "Tell me a joke about opentelemetry"}, ] - assert json.loads( - anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content") - ) == [ + output_messages = json.loads( + anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + ) + assert json.loads(output_messages[0]["content"]) == [ { "type": "text", "text": completion, @@ -420,7 +424,7 @@ def test_anthropic_3_completion_streaming( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_014eJfxWXNnxFKhmuiT8FYf7" @@ -477,7 +481,7 @@ def test_anthropic_3_completion_streaming_with_events_with_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_014eJfxWXNnxFKhmuiT8FYf7" @@ -548,7 +552,7 @@ def test_anthropic_3_completion_streaming_with_events_with_no_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_014eJfxWXNnxFKhmuiT8FYf7" @@ -602,24 +606,22 @@ def test_anthropic_3_completion_string_content( assert all(span.name == "bedrock.completion" for span in spans) anthropic_span = spans[0] - assert ( - json.loads(anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"]) - == "Tell me a joke about opentelemetry" + input_messages = json.loads( + anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) + assert json.loads(input_messages[0]["content"]) == "Tell me a joke about opentelemetry" - assert ( - json.loads( - anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content") - ) - == completion + output_messages = json.loads( + anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) + assert json.loads(output_messages[0]["content"]) == completion assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 16 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_01WR9VHqpyBzBhzgwCDapaQD" @@ -669,7 +671,7 @@ def test_anthropic_3_completion_string_content_with_events_with_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_01WR9VHqpyBzBhzgwCDapaQD" @@ -732,7 +734,7 @@ def test_anthropic_3_completion_string_content_with_events_with_no_content( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) assert ( anthropic_span.attributes.get("gen_ai.response.id") == "msg_bdrk_01WR9VHqpyBzBhzgwCDapaQD" @@ -784,22 +786,23 @@ def test_anthropic_cross_region(instrument_legacy, brt, span_exporter, log_expor anthropic_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "claude-3-7-sonnet-20250219-v1" ) - assert anthropic_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value - assert anthropic_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content" - ] == json.dumps(messages[0]["content"]) - assert ( - anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content") - == completion + input_messages = json.loads( + anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + ) + assert input_messages[0]["content"] == json.dumps(messages[0]["content"]) + output_messages = json.loads( + anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) + assert output_messages[0]["content"] == completion assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 20 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) # Bedrock does not return the response id for claude-v2:1 assert anthropic_span.attributes.get("gen_ai.response.id") is None @@ -840,14 +843,14 @@ def test_anthropic_cross_region_with_events_with_content( anthropic_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "claude-3-7-sonnet-20250219-v1" ) - assert anthropic_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 20 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) # Bedrock does not return the response id for claude-v2:1 assert anthropic_span.attributes.get("gen_ai.response.id") is None @@ -906,14 +909,14 @@ def test_anthropic_cross_region_with_events_with_no_content( anthropic_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "claude-3-7-sonnet-20250219-v1" ) - assert anthropic_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 20 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS ) + anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS - ) == anthropic_span.attributes.get(SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + ) == anthropic_span.attributes.get(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) # Bedrock does not return the response id for claude-v2:1 assert anthropic_span.attributes.get("gen_ai.response.id") is None @@ -1040,15 +1043,21 @@ def test_anthropic_converse_stream_with_tool_use( # Assert on model name assert ( - bedrock_span.attributes.get("gen_ai.request.model") + bedrock_span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "claude-3-sonnet-20240229-v1:0" ) # Assert on vendor - assert bedrock_span.attributes.get("gen_ai.system") == "AWS" + assert ( + bedrock_span.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value + ) # Assert on request type - assert bedrock_span.attributes.get("llm.request.type") == "chat" + assert ( + bedrock_span.attributes.get(GenAIAttributes.GEN_AI_OPERATION_NAME) + == GenAiOperationNameValues.CHAT.value + ) # tool use should have been triggered # (This test validates that non-text deltas don't crash the instrumentation) @@ -1063,8 +1072,8 @@ def test_anthropic_converse_stream_with_tool_use( def assert_message_in_logs(log: ReadableLogRecord, event_name: str, expected_content: dict): assert log.log_record.event_name == event_name assert ( - log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM) - == GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + log.log_record.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value ) if not expected_content: diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py index e755ef0dc1..976e8f3f60 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py @@ -5,7 +5,10 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) -from opentelemetry.semconv_ai import SpanAttributes +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiOperationNameValues, + GenAiSystemValues, +) @pytest.mark.vcr @@ -42,20 +45,23 @@ def test_cohere_completion(instrument_legacy, brt, span_exporter, log_exporter): ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.user"] == prompt + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["content"] == prompt + assert input_messages[0]["role"] == "user" # Assert on response generated_text = response_body["generations"][0]["text"] - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] - == generated_text - ) + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["content"] == generated_text assert ( bedrock_span.attributes.get("gen_ai.response.id") == "3266ca30-473c-4491-b6ef-5b1f033798d2" @@ -104,10 +110,13 @@ def test_cohere_completion_with_events_with_no_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on response assert ( @@ -172,10 +181,13 @@ def test_cohere_completion_with_events_with_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on response generated_text = response_body["generations"][0]["text"] @@ -208,8 +220,8 @@ def test_cohere_completion_with_events_with_content( def assert_message_in_logs(log: ReadableLogRecord, event_name: str, expected_content: dict): assert log.log_record.event_name == event_name assert ( - log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM) - == GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + log.log_record.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value ) if not expected_content: diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_guardrails.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_guardrails.py index cc4802893f..2be2638943 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_guardrails.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_guardrails.py @@ -1,7 +1,13 @@ import json import pytest -from opentelemetry.semconv_ai import SpanAttributes +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiOperationNameValues, + GenAiSystemValues, +) from opentelemetry.instrumentation.bedrock.span_utils import PROMPT_FILTER_KEY, CONTENT_FILTER_KEY from opentelemetry.semconv._incubating.attributes.aws_attributes import ( AWS_BEDROCK_GUARDRAIL_ID @@ -46,23 +52,26 @@ def test_guardrail_invoke(instrument_legacy, brt, span_exporter, log_exporter): # Assert on model name assert ( - bedrock_span.attributes[SpanAttributes.LLM_REQUEST_MODEL] + bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "titan-text-express-v1" ) # Assert on vendor - assert bedrock_span.attributes[SpanAttributes.LLM_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on guardrail data assert bedrock_span.attributes[AWS_BEDROCK_GUARDRAIL_ID] == f"{guardrailId}:{guardrailVersion}" - assert bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"] != "" - assert bedrock_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}"] != "" + assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"] != "" + assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}"] != "" - input_guardrail = json.loads(bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"]) - output_guardrail = json.loads(bedrock_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}"]) + input_guardrail = json.loads(bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"]) + output_guardrail = json.loads(bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}"]) assert input_guardrail["topic"] == [] assert input_guardrail["content"] == [] @@ -117,22 +126,25 @@ def test_guardrail_invoke_stream(instrument_legacy, brt, span_exporter, log_expo # Assert on model name assert ( - bedrock_span.attributes[SpanAttributes.LLM_REQUEST_MODEL] + bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "titan-text-express-v1" ) # Assert on vendor - assert bedrock_span.attributes[SpanAttributes.LLM_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on guardrail data assert bedrock_span.attributes[AWS_BEDROCK_GUARDRAIL_ID] == f"{guardrailId}:{guardrailVersion}" - assert bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"] != "" - assert bedrock_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}") is None + assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"] != "" + assert bedrock_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}") is None - input_guardrail = json.loads(bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"]) + input_guardrail = json.loads(bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"]) assert input_guardrail["topic"] == ["topic-1"] assert input_guardrail["content"] == [] @@ -186,23 +198,23 @@ def test_guardrail_converse( # Assert on model name assert ( - bedrock_span.attributes[SpanAttributes.LLM_REQUEST_MODEL] + bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "titan-text-express-v1" ) # Assert on vendor - assert bedrock_span.attributes[SpanAttributes.LLM_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on guardrail data assert bedrock_span.attributes[AWS_BEDROCK_GUARDRAIL_ID] == f"{guardrailId}:{guardrailVersion}" - assert bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"] != "" - assert bedrock_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}"] != "" + assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"] != "" + assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}"] != "" - input_guardrail = json.loads(bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"]) - output_guardrail = json.loads(bedrock_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}"]) + input_guardrail = json.loads(bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"]) + output_guardrail = json.loads(bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}"]) assert input_guardrail["topic"] == [] assert input_guardrail["content"] == [] @@ -269,23 +281,23 @@ def test_guardrail_converse_stream( # Assert on model name assert ( - bedrock_span.attributes[SpanAttributes.LLM_REQUEST_MODEL] + bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "titan-text-express-v1" ) # Assert on vendor - assert bedrock_span.attributes[SpanAttributes.LLM_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on guardrail data assert bedrock_span.attributes[AWS_BEDROCK_GUARDRAIL_ID] == f"{guardrailId}:{guardrailVersion}" - assert bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"] != "" - assert bedrock_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}"] != "" + assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"] != "" + assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}"] != "" - input_guardrail = json.loads(bedrock_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.{PROMPT_FILTER_KEY}"]) - output_guardrail = json.loads(bedrock_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.{CONTENT_FILTER_KEY}"]) + input_guardrail = json.loads(bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.{PROMPT_FILTER_KEY}"]) + output_guardrail = json.loads(bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{CONTENT_FILTER_KEY}"]) assert input_guardrail["topic"] == [] assert input_guardrail["content"] == [] diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py index b8a52723a7..f29c707625 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py @@ -5,8 +5,10 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) -from opentelemetry.semconv_ai import SpanAttributes - +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiOperationNameValues, + GenAiSystemValues, +) @pytest.mark.vcr def test_imported_model_completion(instrument_legacy, brt, span_exporter, log_exporter): @@ -28,18 +30,17 @@ def test_imported_model_completion(instrument_legacy, brt, span_exporter, log_ex == "arn:aws:sagemaker:us-east-1:767398002385:endpoint/endpoint-quick-start-idr7y" ) assert ( - imported_model_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + imported_model_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value ) - assert imported_model_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert imported_model_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value assert imported_model_span.attributes.get("gen_ai.response.id") is None assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 100 assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] == 0.5 assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_TOP_P] == 2 - assert ( - imported_model_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] - == prompt - ) + input_messages = json.loads(imported_model_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["content"] == prompt assert data is not None logs = log_exporter.get_finished_logs() @@ -70,9 +71,10 @@ def test_imported_model_completion_with_events_with_content( == "arn:aws:sagemaker:us-east-1:767398002385:endpoint/endpoint-quick-start-idr7y" ) assert ( - imported_model_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + imported_model_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value ) - assert imported_model_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert imported_model_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value assert imported_model_span.attributes.get("gen_ai.response.id") is None assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 100 assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] == 0.5 @@ -118,9 +120,10 @@ def test_imported_model_completion_with_events_with_no_content( == "arn:aws:sagemaker:us-east-1:767398002385:endpoint/endpoint-quick-start-idr7y" ) assert ( - imported_model_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + imported_model_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value ) - assert imported_model_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert imported_model_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value assert imported_model_span.attributes.get("gen_ai.response.id") is None assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 100 assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] == 0.5 @@ -147,8 +150,8 @@ def test_imported_model_completion_with_events_with_no_content( def assert_message_in_logs(log: ReadableLogRecord, event_name: str, expected_content: dict): assert log.log_record.event_name == event_name assert ( - log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM) - == GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + log.log_record.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value ) if not expected_content: diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py index 55931697c7..2c964760dd 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py @@ -5,6 +5,9 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiSystemValues, +) from opentelemetry.semconv_ai import SpanAttributes @@ -44,7 +47,7 @@ def test_meta_llama2_completion_string_content( == response_body["generation_token_count"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response_body["generation_token_count"] + response_body["prompt_token_count"] ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -91,7 +94,7 @@ def test_meta_llama2_completion_string_content_with_events_with_content( == response_body["generation_token_count"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response_body["generation_token_count"] + response_body["prompt_token_count"] ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -148,7 +151,7 @@ def test_meta_llama2_completion_string_content_with_events_with_no_content( == response_body["generation_token_count"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response_body["generation_token_count"] + response_body["prompt_token_count"] ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -195,14 +198,21 @@ def test_meta_llama3_completion(instrument_legacy, brt, span_exporter, log_expor == response_body["generation_token_count"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response_body["generation_token_count"] + response_body["prompt_token_count"] ) - assert meta_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == prompt - assert ( - meta_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] - == response_body["generation"] + input_messages = json.loads( + meta_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + ) + assert len(input_messages) == 1 + assert input_messages[0]["content"] == prompt + + output_messages = json.loads( + meta_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] ) + assert len(output_messages) == 1 + assert output_messages[0]["content"] == response_body["generation"] + assert meta_span.attributes.get("gen_ai.response.id") is None logs = log_exporter.get_finished_logs() @@ -239,7 +249,7 @@ def test_meta_llama3_completion_with_events_with_content( == response_body["generation_token_count"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response_body["generation_token_count"] + response_body["prompt_token_count"] ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -288,7 +298,7 @@ def test_meta_llama3_completion_with_events_with_no_content( == response_body["generation_token_count"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response_body["generation_token_count"] + response_body["prompt_token_count"] ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -344,26 +354,26 @@ def test_meta_converse(instrument_legacy, brt, span_exporter, log_exporter): == response["usage"]["outputTokens"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response["usage"]["totalTokens"] ) - assert meta_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "system" - assert ( - meta_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == system_prompt - ) - assert meta_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.1.role"] == "user" - assert meta_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.1.content" - ] == json.dumps(messages[0]["content"]) - for i in range(0, len(generated_text)): - assert ( - meta_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.role"] - == "assistant" - ) - assert ( - meta_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content"] - == generated_text[i]["text"] - ) + input_messages = json.loads( + meta_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] + ) + assert len(input_messages) == 2 + assert input_messages[0]["role"] == "system" + assert input_messages[0]["content"] == system_prompt + assert input_messages[1]["role"] == "user" + assert input_messages[1]["content"] == json.dumps(messages[0]["content"]) + + output_messages = json.loads( + meta_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] + ) + assert len(output_messages) == len(generated_text) + for i in range(len(generated_text)): + assert output_messages[i]["role"] == "assistant" + assert output_messages[i]["content"] == generated_text[i]["text"] + assert meta_span.attributes.get("gen_ai.response.id") is None logs = log_exporter.get_finished_logs() @@ -409,7 +419,7 @@ def test_meta_converse_with_events_with_content( == response["usage"]["outputTokens"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response["usage"]["totalTokens"] ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -476,7 +486,7 @@ def test_meta_converse_with_events_with_no_content( == response["usage"]["outputTokens"] ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response["usage"]["totalTokens"] ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -552,25 +562,25 @@ def test_meta_converse_stream(instrument_legacy, brt, span_exporter, log_exporte meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] == outputTokens ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) - assert meta_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "system" - assert ( - meta_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == system_prompt + input_messages = json.loads( + meta_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert meta_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.1.role"] == "user" - assert meta_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.1.content" - ] == json.dumps(messages[0]["content"]) + assert len(input_messages) == 2 + assert input_messages[0]["role"] == "system" + assert input_messages[0]["content"] == system_prompt + assert input_messages[1]["role"] == "user" + assert input_messages[1]["content"] == json.dumps(messages[0]["content"]) - assert ( - meta_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role"] - == response_role - ) - assert ( - meta_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] == content + output_messages = json.loads( + meta_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] ) + assert len(output_messages) == 1 + assert output_messages[0]["role"] == response_role + assert output_messages[0]["content"] == content + assert meta_span.attributes.get("gen_ai.response.id") is None logs = log_exporter.get_finished_logs() @@ -628,7 +638,7 @@ def test_meta_converse_stream_with_events_with_content( meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] == outputTokens ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -708,7 +718,7 @@ def test_meta_converse_stream_with_events_with_no_content( meta_span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] == outputTokens ) assert ( - meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -740,8 +750,8 @@ def test_meta_converse_stream_with_events_with_no_content( def assert_message_in_logs(log: ReadableLogRecord, event_name: str, expected_content: dict): assert log.log_record.event_name == event_name assert ( - log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM) - == GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + log.log_record.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value ) if not expected_content: diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py index d99947d939..c6dd447067 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py @@ -5,6 +5,10 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiOperationNameValues, + GenAiSystemValues, +) from opentelemetry.semconv_ai import SpanAttributes @@ -43,31 +47,26 @@ def test_nova_completion(instrument_legacy, brt, span_exporter, log_exporter): assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" - - # Assert on system prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "system" - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content" - ] == system_list[0].get("text") + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.1.role"] == "user" - - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.1.content" - ] == json.dumps(message_list[0].get("content"), default=str) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "system" + assert input_messages[0]["content"] == system_list[0].get("text") + assert input_messages[1]["role"] == "user" + assert input_messages[1]["content"] == json.dumps(message_list[0].get("content"), default=str) # Assert on response generated_text = response_body["output"]["message"]["content"] + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) for i in range(0, len(generated_text)): - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content"] - == generated_text[i]["text"] - ) + assert output_messages[i]["content"] == generated_text[i]["text"] # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 500 @@ -120,10 +119,13 @@ def test_nova_completion_with_events_with_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on response generated_text = response_body["output"]["message"]["content"] @@ -199,10 +201,13 @@ def test_nova_completion_with_events_with_no_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 500 @@ -278,30 +283,25 @@ def test_nova_invoke_stream(instrument_legacy, brt, span_exporter, log_exporter) assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" - - # Assert on system prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "system" - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content" - ] == system_list[0].get("text") + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.1.role"] == "user" - - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.1.content" - ] == json.dumps(message_list[0].get("content"), default=str) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "system" + assert input_messages[0]["content"] == system_list[0].get("text") + assert input_messages[1]["role"] == "user" + assert input_messages[1]["content"] == json.dumps(message_list[0].get("content"), default=str) # Assert on response completion_msg = "".join(generated_text) - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] - == completion_msg - ) + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["content"] == completion_msg # Assert on other request parameters assert bedrock_span.attributes[ @@ -371,10 +371,13 @@ def test_nova_invoke_stream_with_events_with_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on response completion_msg = "".join(generated_text) @@ -469,10 +472,13 @@ def test_nova_invoke_stream_with_events_with_no_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[ @@ -565,29 +571,22 @@ def test_nova_converse(instrument_legacy, brt, span_exporter, log_exporter): assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" - - # Assert on system prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "system" - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == system[ - 0 - ].get("text") + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on prompt - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.1.content" - ] == json.dumps(messages[0].get("content"), default=str) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "system" + assert input_messages[0]["content"] == system[0].get("text") + assert input_messages[1]["content"] == json.dumps(messages[0].get("content"), default=str) # Assert on response generated_text = response["output"]["message"]["content"] + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) for i in range(0, len(generated_text)): - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content"] - == generated_text[i]["text"] - ) + assert output_messages[i]["content"] == generated_text[i]["text"] # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -659,10 +658,10 @@ def test_nova_converse_with_events_with_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -755,10 +754,10 @@ def test_nova_converse_with_events_with_no_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -863,31 +862,21 @@ def test_nova_converse_stream(instrument_legacy, brt, span_exporter, log_exporte assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" - - # Assert on system prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "system" - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"] == system[ - 0 - ].get("text") + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on prompt - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.1.content" - ] == json.dumps(messages[0].get("content"), default=str) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "system" + assert input_messages[0]["content"] == system[0].get("text") + assert input_messages[1]["content"] == json.dumps(messages[0].get("content"), default=str) # Assert on response - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] - == content - ) - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role"] - == response_role - ) + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["content"] == content + assert output_messages[0]["role"] == response_role # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -903,7 +892,7 @@ def test_nova_converse_stream(instrument_legacy, brt, span_exporter, log_exporte == outputTokens ) assert ( - bedrock_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + bedrock_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) @@ -989,10 +978,10 @@ def test_nova_converse_stream_with_events_with_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -1008,7 +997,7 @@ def test_nova_converse_stream_with_events_with_content( == outputTokens ) assert ( - bedrock_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + bedrock_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) @@ -1114,10 +1103,10 @@ def test_nova_converse_stream_with_events_with_no_content( assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -1133,7 +1122,7 @@ def test_nova_converse_stream_with_events_with_no_content( == outputTokens ) assert ( - bedrock_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + bedrock_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) @@ -1189,28 +1178,24 @@ def test_nova_cross_region_invoke(instrument_legacy, brt, span_exporter, log_exp # Assert on model name and vendor assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" - - # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user" - - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content" - ] == json.dumps(message_list[0].get("content"), default=str) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "user" + assert input_messages[0]["content"] == json.dumps(message_list[0].get("content"), default=str) # Assert on response generated_text = response_body["output"]["message"]["content"] + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) for i in range(0, len(generated_text)): - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content"] - == generated_text[i]["text"] - ) + assert output_messages[i]["content"] == generated_text[i]["text"] # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 500 @@ -1260,13 +1245,13 @@ def test_nova_cross_region_invoke_with_events_with_content( # Assert on model name and vendor assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" - - # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 500 @@ -1327,13 +1312,13 @@ def test_nova_cross_region_invoke_with_events_with_no_content( # Assert on model name and vendor assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "nova-lite-v1:0" - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" - - # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 500 @@ -1362,8 +1347,8 @@ def test_nova_cross_region_invoke_with_events_with_no_content( def assert_message_in_logs(log: ReadableLogRecord, event_name: str, expected_content: dict): assert log.log_record.event_name == event_name assert ( - log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM) - == GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + log.log_record.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value ) if not expected_content: diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py index b8a1d7790d..fab0d409de 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py @@ -5,6 +5,10 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import ( + GenAiOperationNameValues, + GenAiSystemValues, +) from opentelemetry.semconv_ai import SpanAttributes @@ -45,27 +49,27 @@ def test_titan_completion(instrument_legacy, brt, span_exporter, log_exporter): ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on prompt expected_prompt = ( "Translate to spanish: 'Amazon Bedrock is the easiest way to build and" "scale generative AI applications with base models (FMs)'." ) - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.user"] - == expected_prompt - ) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["content"] == expected_prompt + assert input_messages[0]["role"] == "user" # Assert on response generated_text = response_body["results"][0]["outputText"] - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] - == generated_text - ) + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["content"] == generated_text # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -120,10 +124,13 @@ def test_titan_completion_with_events_with_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -192,10 +199,13 @@ def test_titan_completion_with_events_with_no_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -268,27 +278,27 @@ def test_titan_invoke_stream(instrument_legacy, brt, span_exporter, log_exporter ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on prompt expected_prompt = ( "Translate to spanish: 'Amazon Bedrock is the easiest way to build and" "scale generative AI applications with base models (FMs)'." ) - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.user"] - == expected_prompt - ) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["content"] == expected_prompt + assert input_messages[0]["role"] == "user" # Assert on response completion_text = "".join(generated_text) - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] - == completion_text - ) + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["content"] == completion_text # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -353,10 +363,13 @@ def test_titan_invoke_stream_with_events_with_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -438,10 +451,13 @@ def test_titan_invoke_stream_with_events_with_no_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "completion" + assert ( + bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + == GenAiOperationNameValues.TEXT_COMPLETION.value + ) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -518,24 +534,20 @@ def test_titan_converse(instrument_legacy, brt, span_exporter, log_exporter): ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user" - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content" - ] == json.dumps(messages[0].get("content"), default=str) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "user" + assert input_messages[0]["content"] == json.dumps(messages[0].get("content"), default=str) # Assert on response generated_text = response["output"]["message"]["content"] - for i in range(0, len(generated_text)): - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.{i}.content"] - == generated_text[i]["text"] - ) + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["content"] == generated_text[0]["text"] logs = log_exporter.get_finished_logs() assert ( @@ -596,10 +608,10 @@ def test_titan_converse_with_events_with_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value logs = log_exporter.get_finished_logs() assert len(logs) == 2 @@ -673,10 +685,10 @@ def test_titan_converse_with_events_with_no_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value logs = log_exporter.get_finished_logs() assert len(logs) == 2 @@ -768,26 +780,20 @@ def test_titan_converse_stream(instrument_legacy, brt, span_exporter, log_export ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on prompt - assert bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user" - assert bedrock_span.attributes[ - f"{GenAIAttributes.GEN_AI_PROMPT}.0.content" - ] == json.dumps(messages[0].get("content"), default=str) + input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) + assert input_messages[0]["role"] == "user" + assert input_messages[0]["content"] == json.dumps(messages[0].get("content"), default=str) # Assert on response - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content"] - == content - ) - assert ( - bedrock_span.attributes[f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role"] - == response_role - ) + output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["content"] == content + assert output_messages[0]["role"] == response_role # Assert on usage data assert ( @@ -798,7 +804,7 @@ def test_titan_converse_stream(instrument_legacy, brt, span_exporter, log_export == outputTokens ) assert ( - bedrock_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + bedrock_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) @@ -880,10 +886,10 @@ def test_titan_converse_stream_with_events_with_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on usage data assert ( @@ -894,7 +900,7 @@ def test_titan_converse_stream_with_events_with_content( == outputTokens ) assert ( - bedrock_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + bedrock_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) @@ -988,10 +994,10 @@ def test_titan_converse_stream_with_events_with_no_content( ) # Assert on vendor - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM] == "AWS" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] == GenAiSystemValues.AWS_BEDROCK.value # Assert on request type - assert bedrock_span.attributes[SpanAttributes.LLM_REQUEST_TYPE] == "chat" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value # Assert on usage data assert ( @@ -1002,7 +1008,7 @@ def test_titan_converse_stream_with_events_with_no_content( == outputTokens ) assert ( - bedrock_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] + bedrock_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) @@ -1025,8 +1031,8 @@ def test_titan_converse_stream_with_events_with_no_content( def assert_message_in_logs(log: ReadableLogRecord, event_name: str, expected_content: dict): assert log.log_record.event_name == event_name assert ( - log.log_record.attributes.get(GenAIAttributes.GEN_AI_SYSTEM) - == GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value + log.log_record.attributes.get(GenAIAttributes.GEN_AI_PROVIDER_NAME) + == GenAiSystemValues.AWS_BEDROCK.value ) if not expected_content: From 1e07d4386e9705d50ef5ae3695fe06781d3a6eb9 Mon Sep 17 00:00:00 2001 From: Max Deygin Date: Tue, 24 Mar 2026 16:43:43 +0200 Subject: [PATCH 02/19] =?UTF-8?q?feat(bedrock):=20OTel=20GenAI=20spec=20co?= =?UTF-8?q?mpliance=20=E2=80=94=20parts=20format,=20finish=20reason=20mapp?= =?UTF-8?q?ing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Messages use parts array format: {"role": "...", "parts": [{"type": "text", "content": "..."}]} - Output messages include finish_reason with OTel-mapped values - Set gen_ai.response.finish_reasons span attribute on all responses - Finish reason mapping: end_turn→stop, tool_use→tool_call, max_tokens→length, COMPLETE→stop, FINISH→stop, guardrail_intervened→content_filter - Added helpers for converting Anthropic and Converse content blocks to parts - Updated all existing tests to assert new format Co-Authored-By: Claude Opus 4.6 (1M context) --- .../instrumentation/bedrock/__init__.py | 5 +- .../instrumentation/bedrock/span_utils.py | 232 ++++++++++++++++-- .../tests/traces/test_anthropic.py | 50 ++-- .../tests/traces/test_cohere.py | 9 +- .../tests/traces/test_imported_model.py | 9 +- .../tests/traces/test_meta.py | 38 ++- .../tests/traces/test_nova.py | 58 +++-- .../tests/traces/test_titan.py | 48 +++- 8 files changed, 365 insertions(+), 84 deletions(-) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py index 678adf26bc..f2d447cc73 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py @@ -401,16 +401,17 @@ def wrap(*args, **kwargs): converse_usage_record(span, event["metadata"], metric_params) span.end() elif "messageStop" in event: + stop_reason = event.get("messageStop", {}).get("stopReason", "unknown") if should_emit_events() and event_logger: emit_streaming_converse_response_event( event_logger, response_msg, role, - event.get("messageStop", {}).get("stopReason", "unknown"), + stop_reason, ) else: set_converse_streaming_response_span_attributes( - response_msg, role, span + response_msg, role, span, finish_reason=stop_reason ) return event diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py index 2065827d4f..9099a87847 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py @@ -19,6 +19,69 @@ PROMPT_FILTER_KEY = "prompt_filter_results" CONTENT_FILTER_KEY = "content_filter_results" +# Bedrock finish reason → OTel GenAI enum mapping +# OTel values: stop, length, content_filter, tool_call, error +BEDROCK_FINISH_REASON_MAP = { + # Anthropic via Bedrock + "end_turn": "stop", + "stop_sequence": "stop", + "tool_use": "tool_call", + "max_tokens": "length", + # Cohere via Bedrock + "COMPLETE": "stop", + "TOOL_CALL": "tool_call", + "MAX_TOKENS": "length", + # Amazon Titan + "FINISH": "stop", + # Converse API + "guardrail_intervened": "content_filter", +} + + +def _map_finish_reason(reason): + """Map provider-specific finish reason to OTel GenAI enum value.""" + if not reason: + return "stop" + return BEDROCK_FINISH_REASON_MAP.get(reason, reason) + + +def _text_part(content): + """Create a text part for the parts array.""" + return {"type": "text", "content": content} + + +def _anthropic_content_to_parts(content_blocks): + """Convert Anthropic content blocks to OTel parts format.""" + parts = [] + for block in content_blocks: + if isinstance(block, str): + parts.append(_text_part(block)) + elif isinstance(block, dict): + block_type = block.get("type", "text") + if block_type == "text": + parts.append(_text_part(block.get("text", ""))) + elif block_type == "tool_use": + parts.append({ + "type": "tool_call", + "name": block.get("name"), + "id": block.get("id"), + "arguments": json.dumps(block.get("input", {})), + }) + elif block_type == "tool_result": + parts.append({ + "type": "tool_call_response", + "id": block.get("tool_use_id"), + "response": block.get("content", ""), + }) + elif block_type == "image": + parts.append({"type": "image", "data": block.get("source", {})}) + else: + parts.append({"type": block_type, "content": json.dumps(block)}) + else: + parts.append(_text_part(str(block))) + return parts + + anthropic_client = None @@ -40,9 +103,16 @@ def set_model_message_span_attributes(model_vendor, span, request_body): elif "messages" in request_body: input_messages = [] for message in request_body.get("messages"): + content = message.get("content") + if isinstance(content, str): + parts = [_text_part(content)] + elif isinstance(content, list): + parts = _anthropic_content_to_parts(content) + else: + parts = [_text_part(json.dumps(content))] input_messages.append({ "role": message.get("role"), - "content": json.dumps(message.get("content")), + "parts": parts, }) _set_span_attribute( span, @@ -149,7 +219,7 @@ def _set_prompt_span_attributes(span, request_body): _set_span_attribute( span, GenAIAttributes.GEN_AI_INPUT_MESSAGES, - json.dumps([{"role": "user", "content": request_body.get("prompt")}]), + json.dumps([{"role": "user", "parts": [_text_part(request_body.get("prompt"))]}]), ) @@ -187,16 +257,21 @@ def _set_cohere_span_attributes(span, request_body, response_body, metric_params def _set_generations_span_attributes(span, response_body): output_messages = [] + finish_reasons = [] for generation in response_body.get("generations"): + fr = _map_finish_reason(generation.get("finish_reason")) + finish_reasons.append(fr) output_messages.append({ "role": "assistant", - "content": generation.get("text"), + "parts": [_text_part(generation.get("text"))], + "finish_reason": fr, }) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(finish_reasons)) def _set_anthropic_completion_span_attributes( @@ -255,18 +330,29 @@ def _set_anthropic_completion_span_attributes( def _set_anthropic_response_span_attributes(span, response_body): + fr = _map_finish_reason(response_body.get("stop_reason")) if response_body.get("completion") is not None: _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{"role": "assistant", "content": response_body.get("completion")}]), + json.dumps([{ + "role": "assistant", + "parts": [_text_part(response_body.get("completion"))], + "finish_reason": fr, + }]), ) elif response_body.get("content") is not None: + parts = _anthropic_content_to_parts(response_body.get("content")) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{"role": "assistant", "content": json.dumps(response_body.get("content"))}]), + json.dumps([{ + "role": "assistant", + "parts": parts, + "finish_reason": fr, + }]), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_anthropic_messages_span_attributes( @@ -375,16 +461,23 @@ def _set_ai21_span_attributes(span, request_body, response_body, metric_params): def _set_span_completions_attributes(span, response_body): output_messages = [] + finish_reasons = [] for completion in response_body.get("completions"): + fr_data = completion.get("finishReason", {}) + raw_reason = fr_data.get("reason", "unknown") if isinstance(fr_data, dict) else str(fr_data) + fr = _map_finish_reason(raw_reason) + finish_reasons.append(fr) output_messages.append({ "role": "assistant", - "content": completion.get("data").get("text"), + "parts": [_text_part(completion.get("data").get("text"))], + "finish_reason": fr, }) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(finish_reasons)) def _set_llama_span_attributes(span, request_body, response_body, metric_params): @@ -413,26 +506,36 @@ def _set_llama_prompt_span_attributes(span, request_body): _set_span_attribute( span, GenAIAttributes.GEN_AI_INPUT_MESSAGES, - json.dumps([{"role": "user", "content": request_body.get("prompt")}]), + json.dumps([{"role": "user", "parts": [_text_part(request_body.get("prompt"))]}]), ) def _set_llama_response_span_attributes(span, response_body): + fr = _map_finish_reason(response_body.get("stop_reason")) if response_body.get("generation"): _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{"role": "assistant", "content": response_body.get("generation")}]), + json.dumps([{ + "role": "assistant", + "parts": [_text_part(response_body.get("generation"))], + "finish_reason": fr, + }]), ) else: output_messages = [] for generation in response_body.get("generations"): - output_messages.append({"role": "assistant", "content": generation}) + output_messages.append({ + "role": "assistant", + "parts": [_text_part(generation)], + "finish_reason": fr, + }) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_amazon_span_attributes( @@ -496,7 +599,7 @@ def _set_amazon_input_span_attributes(span, request_body): _set_span_attribute( span, GenAIAttributes.GEN_AI_INPUT_MESSAGES, - json.dumps([{"role": "user", "content": request_body.get("inputText")}]), + json.dumps([{"role": "user", "parts": [_text_part(request_body.get("inputText"))]}]), ) else: input_messages = [] @@ -504,12 +607,19 @@ def _set_amazon_input_span_attributes(span, request_body): for prompt in request_body["system"]: input_messages.append({ "role": "system", - "content": prompt.get("text"), + "parts": [_text_part(prompt.get("text"))], }) for prompt in request_body["messages"]: + content = prompt.get("content", "") + if isinstance(content, str): + parts = [_text_part(content)] + elif isinstance(content, list): + parts = _converse_content_to_parts(content) + else: + parts = [_text_part(json.dumps(content, default=str))] input_messages.append({ "role": prompt.get("role"), - "content": json.dumps(prompt.get("content", ""), default=str), + "parts": parts, }) _set_span_attribute( span, @@ -521,35 +631,49 @@ def _set_amazon_input_span_attributes(span, request_body): def _set_amazon_response_span_attributes(span, response_body): if "results" in response_body: output_messages = [] + finish_reasons = [] for result in response_body.get("results"): + fr = _map_finish_reason(result.get("completionReason")) + finish_reasons.append(fr) output_messages.append({ "role": "assistant", - "content": result.get("outputText"), + "parts": [_text_part(result.get("outputText"))], + "finish_reason": fr, }) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(finish_reasons)) elif "outputText" in response_body: + fr = _map_finish_reason(response_body.get("completionReason")) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{"role": "assistant", "content": response_body.get("outputText")}]), + json.dumps([{ + "role": "assistant", + "parts": [_text_part(response_body.get("outputText"))], + "finish_reason": fr, + }]), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) elif "output" in response_body: + fr = _map_finish_reason(response_body.get("stopReason")) msgs = response_body.get("output").get("message", {}).get("content", []) output_messages = [] for msg in msgs: output_messages.append({ "role": "assistant", - "content": msg.get("text"), + "parts": [_text_part(msg.get("text"))], + "finish_reason": fr, }) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_imported_model_span_attributes( @@ -585,18 +709,27 @@ def _set_imported_model_span_attributes( def _set_imported_model_response_span_attributes(span, response_body): + fr = _map_finish_reason(response_body.get("stop_reason")) + content = response_body.get("generation") + if content is None and response_body.get("choices"): + content = response_body["choices"][0].get("text") _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{"role": "assistant", "content": response_body.get("generation")}]), + json.dumps([{ + "role": "assistant", + "parts": [_text_part(content)], + "finish_reason": fr, + }]), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_imported_model_prompt_span_attributes(span, request_body): _set_span_attribute( span, GenAIAttributes.GEN_AI_INPUT_MESSAGES, - json.dumps([{"role": "user", "content": request_body.get("prompt")}]), + json.dumps([{"role": "user", "parts": [_text_part(request_body.get("prompt"))]}]), ) @@ -689,6 +822,39 @@ def set_converse_model_span_attributes(span, provider, model, kwargs): ) +def _converse_content_to_parts(content_blocks): + """Convert Bedrock Converse API content blocks to OTel parts format.""" + parts = [] + for block in content_blocks: + if isinstance(block, str): + parts.append(_text_part(block)) + elif isinstance(block, dict): + if "text" in block: + parts.append(_text_part(block["text"])) + elif "toolUse" in block: + tool = block["toolUse"] + parts.append({ + "type": "tool_call", + "name": tool.get("name"), + "id": tool.get("toolUseId"), + "arguments": json.dumps(tool.get("input", {})), + }) + elif "toolResult" in block: + result = block["toolResult"] + parts.append({ + "type": "tool_call_response", + "id": result.get("toolUseId"), + "response": json.dumps(result.get("content", ""), default=str), + }) + elif "guardContent" in block: + parts.append({"type": "text", "content": json.dumps(block, default=str)}) + else: + parts.append({"type": "text", "content": json.dumps(block, default=str)}) + else: + parts.append(_text_part(str(block))) + return parts + + def set_converse_input_prompt_span_attributes(kwargs, span): if not should_send_prompts(): return @@ -697,13 +863,20 @@ def set_converse_input_prompt_span_attributes(kwargs, span): for prompt in kwargs["system"]: input_messages.append({ "role": "system", - "content": prompt.get("text"), + "parts": [_text_part(prompt.get("text"))], }) if "messages" in kwargs: for prompt in kwargs["messages"]: + content = prompt.get("content", "") + if isinstance(content, str): + parts = [_text_part(content)] + elif isinstance(content, list): + parts = _converse_content_to_parts(content) + else: + parts = [_text_part(json.dumps(content, default=str))] input_messages.append({ "role": prompt.get("role"), - "content": json.dumps(prompt.get("content", ""), default=str), + "parts": parts, }) _set_span_attribute( span, @@ -717,23 +890,34 @@ def set_converse_response_span_attributes(response, span): return if "output" in response: message = response["output"]["message"] - contents = [content.get("text") for content in message["content"]] - content = contents[0] if len(contents) == 1 else json.dumps(contents) + parts = _converse_content_to_parts(message.get("content", [])) + fr = _map_finish_reason(response.get("stopReason")) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{"role": message.get("role"), "content": content}]), + json.dumps([{ + "role": message.get("role"), + "parts": parts, + "finish_reason": fr, + }]), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) -def set_converse_streaming_response_span_attributes(response, role, span): +def set_converse_streaming_response_span_attributes(response, role, span, finish_reason=None): if not should_send_prompts(): return + fr = _map_finish_reason(finish_reason) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{"role": role, "content": "".join(response)}]), + json.dumps([{ + "role": role, + "parts": [_text_part("".join(response))], + "finish_reason": fr, + }]), ) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def converse_usage_record(span, response, metric_params): diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py index 4aa15e8d3b..c24d79ce38 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py @@ -37,14 +37,16 @@ def test_anthropic_2_completion(instrument_legacy, brt, span_exporter, log_expor input_messages = json.loads( anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert input_messages[0]["content"] == "Human: Tell me a joke about opentelemetry Assistant:" + assert input_messages[0]["parts"][0]["content"] == "Human: Tell me a joke about opentelemetry Assistant:" assert input_messages[0]["role"] == "user" output_messages = json.loads( anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) - assert output_messages[0]["content"] == completion + assert output_messages[0]["parts"][0]["content"] == completion + assert output_messages[0]["finish_reason"] == "stop" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 18 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS @@ -195,8 +197,7 @@ def test_anthropic_3_completion_complex_content( contentType="application/json", ) - response_body = json.loads(response.get("body").read()) - completion = response_body.get("content") + json.loads(response.get("body").read()) spans = span_exporter.get_finished_spans() assert all(span.name == "bedrock.completion" for span in spans) @@ -205,15 +206,17 @@ def test_anthropic_3_completion_complex_content( input_messages = json.loads( anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert json.loads(input_messages[0]["content"]) == [ - {"type": "text", "text": "Tell me a joke about opentelemetry"}, - ] + assert input_messages[0]["parts"][0]["content"] == "Tell me a joke about opentelemetry" + assert input_messages[0]["parts"][0]["type"] == "text" output_messages = json.loads( anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) - assert json.loads(output_messages[0]["content"]) == completion + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] is not None + assert output_messages[0]["finish_reason"] == "stop" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 16 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS @@ -405,20 +408,17 @@ def test_anthropic_3_completion_streaming( input_messages = json.loads( anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert json.loads(input_messages[0]["content"]) == [ - {"type": "text", "text": "Tell me a joke about opentelemetry"}, - ] + assert input_messages[0]["parts"][0]["content"] == "Tell me a joke about opentelemetry" + assert input_messages[0]["parts"][0]["type"] == "text" output_messages = json.loads( anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) - assert json.loads(output_messages[0]["content"]) == [ - { - "type": "text", - "text": completion, - } - ] + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] == completion + assert output_messages[0]["finish_reason"] == "stop" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 16 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS @@ -599,8 +599,7 @@ def test_anthropic_3_completion_string_content( contentType="application/json", ) - response_body = json.loads(response.get("body").read()) - completion = response_body.get("content") + json.loads(response.get("body").read()) spans = span_exporter.get_finished_spans() assert all(span.name == "bedrock.completion" for span in spans) @@ -609,13 +608,16 @@ def test_anthropic_3_completion_string_content( input_messages = json.loads( anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert json.loads(input_messages[0]["content"]) == "Tell me a joke about opentelemetry" + assert input_messages[0]["parts"][0]["content"] == "Tell me a joke about opentelemetry" output_messages = json.loads( anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) - assert json.loads(output_messages[0]["content"]) == completion + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] is not None + assert output_messages[0]["finish_reason"] == "stop" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 16 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS @@ -791,12 +793,14 @@ def test_anthropic_cross_region(instrument_legacy, brt, span_exporter, log_expor input_messages = json.loads( anthropic_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert input_messages[0]["content"] == json.dumps(messages[0]["content"]) + assert input_messages[0]["parts"][0]["content"] is not None output_messages = json.loads( anthropic_span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) ) - assert output_messages[0]["content"] == completion + assert output_messages[0]["parts"][0]["content"] == completion + assert output_messages[0]["finish_reason"] == "stop" + assert anthropic_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 20 assert anthropic_span.attributes.get( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py index 976e8f3f60..c4388f2a0c 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_cohere.py @@ -55,13 +55,18 @@ def test_cohere_completion(instrument_legacy, brt, span_exporter, log_exporter): # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["content"] == prompt assert input_messages[0]["role"] == "user" + assert input_messages[0]["parts"][0]["type"] == "text" + assert input_messages[0]["parts"][0]["content"] == prompt # Assert on response generated_text = response_body["generations"][0]["text"] output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) - assert output_messages[0]["content"] == generated_text + assert output_messages[0]["role"] == "assistant" + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] == generated_text + assert output_messages[0]["finish_reason"] == "stop" + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) assert ( bedrock_span.attributes.get("gen_ai.response.id") == "3266ca30-473c-4491-b6ef-5b1f033798d2" diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py index f29c707625..c6c5852dde 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py @@ -40,7 +40,14 @@ def test_imported_model_completion(instrument_legacy, brt, span_exporter, log_ex assert imported_model_span.attributes[GenAIAttributes.GEN_AI_REQUEST_TOP_P] == 2 input_messages = json.loads(imported_model_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["content"] == prompt + assert input_messages[0]["parts"][0]["content"] == prompt + + output_messages = json.loads(imported_model_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) + assert output_messages[0]["role"] == "assistant" + assert output_messages[0]["finish_reason"] == "stop" + + assert imported_model_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) + assert data is not None logs = log_exporter.get_finished_logs() diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py index 2c964760dd..9b1695d90a 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py @@ -205,13 +205,19 @@ def test_meta_llama3_completion(instrument_legacy, brt, span_exporter, log_expor meta_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) assert len(input_messages) == 1 - assert input_messages[0]["content"] == prompt + assert input_messages[0]["parts"][0]["content"] == prompt output_messages = json.loads( meta_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] ) assert len(output_messages) == 1 - assert output_messages[0]["content"] == response_body["generation"] + assert output_messages[0]["parts"][0]["content"] == response_body["generation"] + assert output_messages[0]["finish_reason"] == "stop" + + assert ( + meta_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] + == ("stop",) + ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -362,9 +368,11 @@ def test_meta_converse(instrument_legacy, brt, span_exporter, log_exporter): ) assert len(input_messages) == 2 assert input_messages[0]["role"] == "system" - assert input_messages[0]["content"] == system_prompt + assert input_messages[0]["parts"][0]["content"] == system_prompt assert input_messages[1]["role"] == "user" - assert input_messages[1]["content"] == json.dumps(messages[0]["content"]) + assert input_messages[1]["parts"] == [ + {"type": "text", "content": "Tell me a joke about opentelemetry"} + ] output_messages = json.loads( meta_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] @@ -372,7 +380,13 @@ def test_meta_converse(instrument_legacy, brt, span_exporter, log_exporter): assert len(output_messages) == len(generated_text) for i in range(len(generated_text)): assert output_messages[i]["role"] == "assistant" - assert output_messages[i]["content"] == generated_text[i]["text"] + assert output_messages[i]["parts"][0]["content"] == generated_text[i]["text"] + assert output_messages[i]["finish_reason"] == "stop" + + assert ( + meta_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] + == ("stop",) + ) assert meta_span.attributes.get("gen_ai.response.id") is None @@ -570,16 +584,24 @@ def test_meta_converse_stream(instrument_legacy, brt, span_exporter, log_exporte ) assert len(input_messages) == 2 assert input_messages[0]["role"] == "system" - assert input_messages[0]["content"] == system_prompt + assert input_messages[0]["parts"][0]["content"] == system_prompt assert input_messages[1]["role"] == "user" - assert input_messages[1]["content"] == json.dumps(messages[0]["content"]) + assert input_messages[1]["parts"] == [ + {"type": "text", "content": "Tell me a joke about opentelemetry"} + ] output_messages = json.loads( meta_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] ) assert len(output_messages) == 1 assert output_messages[0]["role"] == response_role - assert output_messages[0]["content"] == content + assert output_messages[0]["parts"][0]["content"] == content + assert output_messages[0]["finish_reason"] == "stop" + + assert ( + meta_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] + == ("stop",) + ) assert meta_span.attributes.get("gen_ai.response.id") is None diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py index c6dd447067..140c8f91e0 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py @@ -58,15 +58,19 @@ def test_nova_completion(instrument_legacy, brt, span_exporter, log_exporter): # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) assert input_messages[0]["role"] == "system" - assert input_messages[0]["content"] == system_list[0].get("text") + assert input_messages[0]["parts"][0]["content"] == system_list[0].get("text") assert input_messages[1]["role"] == "user" - assert input_messages[1]["content"] == json.dumps(message_list[0].get("content"), default=str) + assert input_messages[1]["parts"] == [{"type": "text", "content": "A camping trip"}] # Assert on response generated_text = response_body["output"]["message"]["content"] output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) for i in range(0, len(generated_text)): - assert output_messages[i]["content"] == generated_text[i]["text"] + assert output_messages[i]["parts"][0]["content"] == generated_text[i]["text"] + assert output_messages[i]["finish_reason"] == "stop" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 500 @@ -294,14 +298,18 @@ def test_nova_invoke_stream(instrument_legacy, brt, span_exporter, log_exporter) # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) assert input_messages[0]["role"] == "system" - assert input_messages[0]["content"] == system_list[0].get("text") + assert input_messages[0]["parts"][0]["content"] == system_list[0].get("text") assert input_messages[1]["role"] == "user" - assert input_messages[1]["content"] == json.dumps(message_list[0].get("content"), default=str) + assert input_messages[1]["parts"] == [{"type": "text", "content": "A camping trip"}] # Assert on response completion_msg = "".join(generated_text) output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) - assert output_messages[0]["content"] == completion_msg + assert output_messages[0]["parts"][0]["content"] == completion_msg + assert output_messages[0]["finish_reason"] == "stop" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) # Assert on other request parameters assert bedrock_span.attributes[ @@ -579,14 +587,21 @@ def test_nova_converse(instrument_legacy, brt, span_exporter, log_exporter): # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) assert input_messages[0]["role"] == "system" - assert input_messages[0]["content"] == system[0].get("text") - assert input_messages[1]["content"] == json.dumps(messages[0].get("content"), default=str) + assert input_messages[0]["parts"][0]["content"] == system[0].get("text") + # User content has 3 blocks: 2 guardContent + 1 text, each becomes a part + assert len(input_messages[1]["parts"]) == 3 + assert input_messages[1]["parts"][0]["type"] == "text" + assert input_messages[1]["parts"][2]["content"] == "What is the capital of Japan?" - # Assert on response + # Assert on response (guardrail_intervened maps to content_filter) generated_text = response["output"]["message"]["content"] output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) for i in range(0, len(generated_text)): - assert output_messages[i]["content"] == generated_text[i]["text"] + assert output_messages[i]["parts"][0]["content"] == generated_text[i]["text"] + assert output_messages[i]["finish_reason"] == "content_filter" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("content_filter",) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -870,13 +885,20 @@ def test_nova_converse_stream(instrument_legacy, brt, span_exporter, log_exporte # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) assert input_messages[0]["role"] == "system" - assert input_messages[0]["content"] == system[0].get("text") - assert input_messages[1]["content"] == json.dumps(messages[0].get("content"), default=str) + assert input_messages[0]["parts"][0]["content"] == system[0].get("text") + # User content has 3 blocks: 2 guardContent + 1 text, each becomes a part + assert len(input_messages[1]["parts"]) == 3 + assert input_messages[1]["parts"][0]["type"] == "text" + assert input_messages[1]["parts"][2]["content"] == "What is the capital of Japan?" - # Assert on response + # Assert on response (guardrail_intervened maps to content_filter) output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) - assert output_messages[0]["content"] == content + assert output_messages[0]["parts"][0]["content"] == content assert output_messages[0]["role"] == response_role + assert output_messages[0]["finish_reason"] == "content_filter" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("content_filter",) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 300 @@ -1189,13 +1211,17 @@ def test_nova_cross_region_invoke(instrument_legacy, brt, span_exporter, log_exp # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) assert input_messages[0]["role"] == "user" - assert input_messages[0]["content"] == json.dumps(message_list[0].get("content"), default=str) + assert input_messages[0]["parts"] == [{"type": "text", "content": "Tell me a joke about OpenTelemetry"}] # Assert on response generated_text = response_body["output"]["message"]["content"] output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) for i in range(0, len(generated_text)): - assert output_messages[i]["content"] == generated_text[i]["text"] + assert output_messages[i]["parts"][0]["content"] == generated_text[i]["text"] + assert output_messages[i]["finish_reason"] == "stop" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 500 diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py index fab0d409de..1c4e258023 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py @@ -63,13 +63,20 @@ def test_titan_completion(instrument_legacy, brt, span_exporter, log_exporter): "scale generative AI applications with base models (FMs)'." ) input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["content"] == expected_prompt assert input_messages[0]["role"] == "user" + assert input_messages[0]["parts"][0]["type"] == "text" + assert input_messages[0]["parts"][0]["content"] == expected_prompt # Assert on response generated_text = response_body["results"][0]["outputText"] output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) - assert output_messages[0]["content"] == generated_text + assert output_messages[0]["role"] == "assistant" + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] == generated_text + assert output_messages[0]["finish_reason"] == "stop" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -292,13 +299,20 @@ def test_titan_invoke_stream(instrument_legacy, brt, span_exporter, log_exporter "scale generative AI applications with base models (FMs)'." ) input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["content"] == expected_prompt assert input_messages[0]["role"] == "user" + assert input_messages[0]["parts"][0]["type"] == "text" + assert input_messages[0]["parts"][0]["content"] == expected_prompt # Assert on response completion_text = "".join(generated_text) output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) - assert output_messages[0]["content"] == completion_text + assert output_messages[0]["role"] == "assistant" + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] == completion_text + assert output_messages[0]["finish_reason"] == "stop" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) # Assert on other request parameters assert bedrock_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 200 @@ -542,12 +556,21 @@ def test_titan_converse(instrument_legacy, brt, span_exporter, log_exporter): # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) assert input_messages[0]["role"] == "user" - assert input_messages[0]["content"] == json.dumps(messages[0].get("content"), default=str) + # guardContent blocks become text parts with json.dumps of each block + assert len(input_messages[0]["parts"]) == 2 + for i, block in enumerate(messages[0]["content"]): + assert input_messages[0]["parts"][i]["type"] == "text" + assert input_messages[0]["parts"][i]["content"] == json.dumps(block, default=str) # Assert on response generated_text = response["output"]["message"]["content"] output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) - assert output_messages[0]["content"] == generated_text[0]["text"] + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] == generated_text[0]["text"] + assert output_messages[0]["finish_reason"] == "content_filter" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("content_filter",) logs = log_exporter.get_finished_logs() assert ( @@ -788,12 +811,21 @@ def test_titan_converse_stream(instrument_legacy, brt, span_exporter, log_export # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) assert input_messages[0]["role"] == "user" - assert input_messages[0]["content"] == json.dumps(messages[0].get("content"), default=str) + # guardContent blocks become text parts with json.dumps of each block + assert len(input_messages[0]["parts"]) == 2 + for i, block in enumerate(messages[0]["content"]): + assert input_messages[0]["parts"][i]["type"] == "text" + assert input_messages[0]["parts"][i]["content"] == json.dumps(block, default=str) # Assert on response output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) - assert output_messages[0]["content"] == content assert output_messages[0]["role"] == response_role + assert output_messages[0]["parts"][0]["type"] == "text" + assert output_messages[0]["parts"][0]["content"] == content + assert output_messages[0]["finish_reason"] == "content_filter" + + # Assert on finish reasons + assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("content_filter",) # Assert on usage data assert ( From ecbee732fe912b7cf807ed33ceaa73922dd4118b Mon Sep 17 00:00:00 2001 From: Max Deygin Date: Sun, 29 Mar 2026 12:09:59 +0300 Subject: [PATCH 03/19] fix(bedrock): OTel GenAI semconv compliance - round 1 - Fix _map_finish_reason(None) to return None instead of "stop" - Capture Anthropic system instructions via gen_ai.system_instructions - Fix tool call arguments double-encoding - Map image content to BlobPart/UriPart - Move system instructions to gen_ai.system_instructions (Converse + Nova) - Fix deprecated gen_ai.system in prompt_caching.py - Fix converse stream missing stop reason default - Handle non-text content block deltas in streaming wrapper - Capture message_delta stop_reason in streaming wrapper - Conditionally include finish_reason in output messages - Remove debug print in streaming wrapper - Fix hardcoded gen_ai.vendor in guardrail/span_utils - Fix hardcoded exception counter metric name - Rename LLM_ prefix constants to GEN_AI_ - Update all tests for new behavior --- .../instrumentation/bedrock/__init__.py | 37 ++-- .../instrumentation/bedrock/guardrail.py | 2 - .../instrumentation/bedrock/prompt_caching.py | 7 +- .../instrumentation/bedrock/span_utils.py | 175 +++++++++--------- .../bedrock/streaming_wrapper.py | 24 ++- .../test_bedrock_guardrails_metrics.py | 6 +- .../test_bedrock_prompt_caching_metrics.py | 2 +- .../tests/traces/test_anthropic.py | 4 +- .../tests/traces/test_imported_model.py | 5 +- .../tests/traces/test_meta.py | 24 ++- .../tests/traces/test_nova.py | 48 ++--- 11 files changed, 179 insertions(+), 155 deletions(-) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py index f2d447cc73..b0704c4b40 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py @@ -401,7 +401,7 @@ def wrap(*args, **kwargs): converse_usage_record(span, event["metadata"], metric_params) span.end() elif "messageStop" in event: - stop_reason = event.get("messageStop", {}).get("stopReason", "unknown") + stop_reason = event.get("messageStop", {}).get("stopReason") if should_emit_events() and event_logger: emit_streaming_converse_response_event( event_logger, @@ -454,18 +454,18 @@ def _cross_region_check(value): class GuardrailMeters: - LLM_BEDROCK_GUARDRAIL_ACTIVATION = "gen_ai.bedrock.guardrail.activation" - LLM_BEDROCK_GUARDRAIL_LATENCY = "gen_ai.bedrock.guardrail.latency" - LLM_BEDROCK_GUARDRAIL_COVERAGE = "gen_ai.bedrock.guardrail.coverage" - LLM_BEDROCK_GUARDRAIL_SENSITIVE = "gen_ai.bedrock.guardrail.sensitive_info" - LLM_BEDROCK_GUARDRAIL_TOPICS = "gen_ai.bedrock.guardrail.topics" - LLM_BEDROCK_GUARDRAIL_CONTENT = "gen_ai.bedrock.guardrail.content" - LLM_BEDROCK_GUARDRAIL_WORDS = "gen_ai.bedrock.guardrail.words" + GEN_AI_BEDROCK_GUARDRAIL_ACTIVATION = "gen_ai.bedrock.guardrail.activation" + GEN_AI_BEDROCK_GUARDRAIL_LATENCY = "gen_ai.bedrock.guardrail.latency" + GEN_AI_BEDROCK_GUARDRAIL_COVERAGE = "gen_ai.bedrock.guardrail.coverage" + GEN_AI_BEDROCK_GUARDRAIL_SENSITIVE = "gen_ai.bedrock.guardrail.sensitive_info" + GEN_AI_BEDROCK_GUARDRAIL_TOPICS = "gen_ai.bedrock.guardrail.topics" + GEN_AI_BEDROCK_GUARDRAIL_CONTENT = "gen_ai.bedrock.guardrail.content" + GEN_AI_BEDROCK_GUARDRAIL_WORDS = "gen_ai.bedrock.guardrail.words" class PromptCaching: # will be moved under the AI SemConv. Not namespaced since also OpenAI supports this. - LLM_BEDROCK_PROMPT_CACHING = "gen_ai.prompt.caching" + GEN_AI_PROMPT_CACHING = "gen_ai.prompt.caching" def _create_metrics(meter: Meter): @@ -488,58 +488,57 @@ def _create_metrics(meter: Meter): ) exception_counter = meter.create_counter( - # TODO: will fix this in future as a consolidation for semantic convention - name="llm.bedrock.completions.exceptions", + name="gen_ai.bedrock.completions.exceptions", unit="time", description="Number of exceptions occurred during chat completions", ) # Guardrail metrics guardrail_activation = meter.create_counter( - name=GuardrailMeters.LLM_BEDROCK_GUARDRAIL_ACTIVATION, + name=GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_ACTIVATION, unit="", description="Number of guardrail activation", ) guardrail_latency_histogram = meter.create_histogram( - name=GuardrailMeters.LLM_BEDROCK_GUARDRAIL_LATENCY, + name=GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_LATENCY, unit="ms", description="GenAI guardrail latency", ) guardrail_coverage = meter.create_counter( - name=GuardrailMeters.LLM_BEDROCK_GUARDRAIL_COVERAGE, + name=GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_COVERAGE, unit="char", description="GenAI guardrail coverage", ) guardrail_sensitive_info = meter.create_counter( - name=GuardrailMeters.LLM_BEDROCK_GUARDRAIL_SENSITIVE, + name=GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_SENSITIVE, unit="", description="GenAI guardrail sensitive information protection", ) guardrail_topic = meter.create_counter( - name=GuardrailMeters.LLM_BEDROCK_GUARDRAIL_TOPICS, + name=GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_TOPICS, unit="", description="GenAI guardrail topics protection", ) guardrail_content = meter.create_counter( - name=GuardrailMeters.LLM_BEDROCK_GUARDRAIL_CONTENT, + name=GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_CONTENT, unit="", description="GenAI guardrail content filter protection", ) guardrail_words = meter.create_counter( - name=GuardrailMeters.LLM_BEDROCK_GUARDRAIL_WORDS, + name=GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_WORDS, unit="", description="GenAI guardrail words filter protection", ) # Prompt Caching prompt_caching = meter.create_counter( - name=PromptCaching.LLM_BEDROCK_PROMPT_CACHING, + name=PromptCaching.GEN_AI_PROMPT_CACHING, unit="", description="Number of cached tokens", ) diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py index 1cdef39abf..b7e48468e5 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/guardrail.py @@ -157,7 +157,6 @@ def handle_words(t: Type, guardrail, attrs, metric_params): def guardrail_converse(span, response, vendor, model, metric_params): attrs = { - "gen_ai.vendor": vendor, GenAIAttributes.GEN_AI_RESPONSE_MODEL: model, GenAIAttributes.GEN_AI_PROVIDER_NAME: GenAiSystemValues.AWS_BEDROCK.value, } @@ -186,7 +185,6 @@ def guardrail_handling(span, response_body, vendor, model, metric_params): output_filters = [] if "amazon-bedrock-guardrailAction" in response_body: attrs = { - "gen_ai.vendor": vendor, GenAIAttributes.GEN_AI_RESPONSE_MODEL: model, GenAIAttributes.GEN_AI_PROVIDER_NAME: GenAiSystemValues.AWS_BEDROCK.value, } diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py index b94dc66127..660bdd6c8e 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py @@ -1,4 +1,7 @@ from opentelemetry import trace +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) class CachingHeaders: @@ -13,8 +16,8 @@ class CacheSpanAttrs: # TODO: move it under SemConv pkg def prompt_caching_handling(headers, vendor, model, metric_params): base_attrs = { - "gen_ai.system": vendor, - "gen_ai.response.model": model, + GenAIAttributes.GEN_AI_PROVIDER_NAME: vendor, + GenAIAttributes.GEN_AI_RESPONSE_MODEL: model, } span = trace.get_current_span() if not isinstance(span, trace.Span): diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py index 9099a87847..cec161b012 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py @@ -41,7 +41,7 @@ def _map_finish_reason(reason): """Map provider-specific finish reason to OTel GenAI enum value.""" if not reason: - return "stop" + return None return BEDROCK_FINISH_REASON_MAP.get(reason, reason) @@ -50,6 +50,14 @@ def _text_part(content): return {"type": "text", "content": content} +def _output_message(role, parts, finish_reason=None): + """Create an output message dict, omitting finish_reason when None.""" + msg = {"role": role, "parts": parts} + if finish_reason: + msg["finish_reason"] = finish_reason + return msg + + def _anthropic_content_to_parts(content_blocks): """Convert Anthropic content blocks to OTel parts format.""" parts = [] @@ -65,7 +73,7 @@ def _anthropic_content_to_parts(content_blocks): "type": "tool_call", "name": block.get("name"), "id": block.get("id"), - "arguments": json.dumps(block.get("input", {})), + "arguments": block.get("input", {}), }) elif block_type == "tool_result": parts.append({ @@ -74,7 +82,28 @@ def _anthropic_content_to_parts(content_blocks): "response": block.get("content", ""), }) elif block_type == "image": - parts.append({"type": "image", "data": block.get("source", {})}) + source = block.get("source", {}) + source_type = source.get("type") + if source_type == "base64": + parts.append({ + "type": "blob", + "modality": "image", + "mime_type": source.get("media_type", ""), + "content": source.get("data", ""), + }) + elif source_type == "url": + parts.append({ + "type": "uri", + "modality": "image", + "uri": source.get("url", ""), + }) + else: + parts.append({ + "type": "blob", + "modality": "image", + "mime_type": source.get("media_type", ""), + "content": source.get("data", ""), + }) else: parts.append({"type": block_type, "content": json.dumps(block)}) else: @@ -101,6 +130,19 @@ def set_model_message_span_attributes(model_vendor, span, request_body): if "prompt" in request_body: _set_prompt_span_attributes(span, request_body) elif "messages" in request_body: + if "system" in request_body: + system_val = request_body["system"] + if isinstance(system_val, str): + system_parts = [_text_part(system_val)] + elif isinstance(system_val, list): + system_parts = _anthropic_content_to_parts(system_val) + else: + system_parts = [_text_part(json.dumps(system_val))] + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps(system_parts), + ) input_messages = [] for message in request_body.get("messages"): content = message.get("content") @@ -261,17 +303,13 @@ def _set_generations_span_attributes(span, response_body): for generation in response_body.get("generations"): fr = _map_finish_reason(generation.get("finish_reason")) finish_reasons.append(fr) - output_messages.append({ - "role": "assistant", - "parts": [_text_part(generation.get("text"))], - "finish_reason": fr, - }) + output_messages.append(_output_message("assistant", [_text_part(generation.get("text"))], fr)) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(finish_reasons)) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(fr for fr in finish_reasons if fr)) def _set_anthropic_completion_span_attributes( @@ -335,24 +373,17 @@ def _set_anthropic_response_span_attributes(span, response_body): _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{ - "role": "assistant", - "parts": [_text_part(response_body.get("completion"))], - "finish_reason": fr, - }]), + json.dumps([_output_message("assistant", [_text_part(response_body.get("completion"))], fr)]), ) elif response_body.get("content") is not None: parts = _anthropic_content_to_parts(response_body.get("content")) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{ - "role": "assistant", - "parts": parts, - "finish_reason": fr, - }]), + json.dumps([_output_message("assistant", parts, fr)]), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) + if fr: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_anthropic_messages_span_attributes( @@ -467,17 +498,13 @@ def _set_span_completions_attributes(span, response_body): raw_reason = fr_data.get("reason", "unknown") if isinstance(fr_data, dict) else str(fr_data) fr = _map_finish_reason(raw_reason) finish_reasons.append(fr) - output_messages.append({ - "role": "assistant", - "parts": [_text_part(completion.get("data").get("text"))], - "finish_reason": fr, - }) + output_messages.append(_output_message("assistant", [_text_part(completion.get("data").get("text"))], fr)) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(finish_reasons)) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(fr for fr in finish_reasons if fr)) def _set_llama_span_attributes(span, request_body, response_body, metric_params): @@ -516,26 +543,19 @@ def _set_llama_response_span_attributes(span, response_body): _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{ - "role": "assistant", - "parts": [_text_part(response_body.get("generation"))], - "finish_reason": fr, - }]), + json.dumps([_output_message("assistant", [_text_part(response_body.get("generation"))], fr)]), ) else: output_messages = [] for generation in response_body.get("generations"): - output_messages.append({ - "role": "assistant", - "parts": [_text_part(generation)], - "finish_reason": fr, - }) + output_messages.append(_output_message("assistant", [_text_part(generation)], fr)) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) + if fr: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_amazon_span_attributes( @@ -602,13 +622,13 @@ def _set_amazon_input_span_attributes(span, request_body): json.dumps([{"role": "user", "parts": [_text_part(request_body.get("inputText"))]}]), ) else: - input_messages = [] if "system" in request_body: - for prompt in request_body["system"]: - input_messages.append({ - "role": "system", - "parts": [_text_part(prompt.get("text"))], - }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps([_text_part(prompt.get("text")) for prompt in request_body["system"]]), + ) + input_messages = [] for prompt in request_body["messages"]: content = prompt.get("content", "") if isinstance(content, str): @@ -635,45 +655,35 @@ def _set_amazon_response_span_attributes(span, response_body): for result in response_body.get("results"): fr = _map_finish_reason(result.get("completionReason")) finish_reasons.append(fr) - output_messages.append({ - "role": "assistant", - "parts": [_text_part(result.get("outputText"))], - "finish_reason": fr, - }) + output_messages.append(_output_message("assistant", [_text_part(result.get("outputText"))], fr)) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(finish_reasons)) + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(fr for fr in finish_reasons if fr)) elif "outputText" in response_body: fr = _map_finish_reason(response_body.get("completionReason")) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{ - "role": "assistant", - "parts": [_text_part(response_body.get("outputText"))], - "finish_reason": fr, - }]), + json.dumps([_output_message("assistant", [_text_part(response_body.get("outputText"))], fr)]), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) + if fr: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) elif "output" in response_body: fr = _map_finish_reason(response_body.get("stopReason")) msgs = response_body.get("output").get("message", {}).get("content", []) output_messages = [] for msg in msgs: - output_messages.append({ - "role": "assistant", - "parts": [_text_part(msg.get("text"))], - "finish_reason": fr, - }) + output_messages.append(_output_message("assistant", [_text_part(msg.get("text"))], fr)) _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) + if fr: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_imported_model_span_attributes( @@ -716,13 +726,10 @@ def _set_imported_model_response_span_attributes(span, response_body): _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{ - "role": "assistant", - "parts": [_text_part(content)], - "finish_reason": fr, - }]), + json.dumps([_output_message("assistant", [_text_part(content)], fr)]), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) + if fr: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def _set_imported_model_prompt_span_attributes(span, request_body): @@ -791,10 +798,8 @@ def _metric_shared_attributes( response_vendor: str, response_model: str, is_streaming: bool = False ): return { - "vendor": response_vendor, GenAIAttributes.GEN_AI_RESPONSE_MODEL: response_model, GenAIAttributes.GEN_AI_PROVIDER_NAME: GenAiSystemValues.AWS_BEDROCK.value, - "stream": is_streaming, } @@ -837,7 +842,7 @@ def _converse_content_to_parts(content_blocks): "type": "tool_call", "name": tool.get("name"), "id": tool.get("toolUseId"), - "arguments": json.dumps(tool.get("input", {})), + "arguments": tool.get("input", {}), }) elif "toolResult" in block: result = block["toolResult"] @@ -858,13 +863,13 @@ def _converse_content_to_parts(content_blocks): def set_converse_input_prompt_span_attributes(kwargs, span): if not should_send_prompts(): return - input_messages = [] if "system" in kwargs: - for prompt in kwargs["system"]: - input_messages.append({ - "role": "system", - "parts": [_text_part(prompt.get("text"))], - }) + _set_span_attribute( + span, + GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps([_text_part(prompt.get("text")) for prompt in kwargs["system"]]), + ) + input_messages = [] if "messages" in kwargs: for prompt in kwargs["messages"]: content = prompt.get("content", "") @@ -895,13 +900,10 @@ def set_converse_response_span_attributes(response, span): _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{ - "role": message.get("role"), - "parts": parts, - "finish_reason": fr, - }]), + json.dumps([_output_message(message.get("role"), parts, fr)]), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) + if fr: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def set_converse_streaming_response_span_attributes(response, role, span, finish_reason=None): @@ -911,13 +913,10 @@ def set_converse_streaming_response_span_attributes(response, role, span, finish _set_span_attribute( span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, - json.dumps([{ - "role": role, - "parts": [_text_part("".join(response))], - "finish_reason": fr, - }]), + json.dumps([_output_message(role, [_text_part("".join(response))], fr)]), ) - _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) + if fr: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, (fr,)) def converse_usage_record(span, response, metric_params): diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/streaming_wrapper.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/streaming_wrapper.py index 8881bb5a3c..18650f71cb 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/streaming_wrapper.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/streaming_wrapper.py @@ -1,10 +1,13 @@ import json +import logging from opentelemetry.instrumentation.bedrock.utils import ( dont_throw, ) from wrapt import ObjectProxy +logger = logging.getLogger(__name__) + class StreamingWrapper(ObjectProxy): def __init__( @@ -47,16 +50,29 @@ def _process_event(self, event): decoded_chunk.get("content_block") ) elif type == "content_block_delta": - self._accumulating_body["content"][-1]["text"] += decoded_chunk.get( - "delta" - ).get("text") + delta = decoded_chunk.get("delta", {}) + if delta.get("text") is not None: + self._accumulating_body["content"][-1]["text"] += delta["text"] + elif delta.get("type") == "input_json_delta": + partial_json = delta.get("partial_json", "") + current = self._accumulating_body["content"][-1] + current.setdefault("input", "") + current["input"] += partial_json + elif type == "message_delta": + delta = decoded_chunk.get("delta", {}) + if delta.get("stop_reason"): + self._accumulating_body["stop_reason"] = delta["stop_reason"] + if decoded_chunk.get("usage"): + usage = self._accumulating_body.get("usage", {}) + usage.update(decoded_chunk["usage"]) + self._accumulating_body["usage"] = usage elif type == "message_stop": self._accumulating_body["invocation_metrics"] = decoded_chunk.get( "amazon-bedrock-invocationMetrics" ) def _accumulate_events(self, event): - print(self._accumulating_body) + logger.debug("Accumulating body: %s", self._accumulating_body) for key in event: if key == "contentBlockDelta": delta = event.get(key).get("delta", {}).get("text") diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py index 115a20661c..586f4877dd 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_guardrails_metrics.py @@ -94,13 +94,13 @@ def assert_guardrails(reader): for sm in rm.scope_metrics: for metric in sm.metrics: - if metric.name == GuardrailMeters.LLM_BEDROCK_GUARDRAIL_ACTIVATION: + if metric.name == GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_ACTIVATION: found_activations = True for data_point in metric.data.data_points: assert data_point.attributes["gen_ai.guardrail"] != "" assert data_point.value > 0 - if metric.name == GuardrailMeters.LLM_BEDROCK_GUARDRAIL_LATENCY: + if metric.name == GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_LATENCY: found_latency = True for data_point in metric.data.data_points: assert data_point.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] in [ @@ -114,7 +114,7 @@ def assert_guardrails(reader): data_point.sum > 0 for data_point in metric.data.data_points ) - if metric.name == GuardrailMeters.LLM_BEDROCK_GUARDRAIL_COVERAGE: + if metric.name == GuardrailMeters.GEN_AI_BEDROCK_GUARDRAIL_COVERAGE: found_coverage = True for data_point in metric.data.data_points: assert data_point.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] in [ diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_prompt_caching_metrics.py b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_prompt_caching_metrics.py index abc6ddd523..a6c52ea02b 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_prompt_caching_metrics.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/metrics/test_bedrock_prompt_caching_metrics.py @@ -52,7 +52,7 @@ def assert_metric(reader, usage): resource_metrics = metrics_data.resource_metrics assert len(resource_metrics) > 0 - m = get_metric(resource_metrics, PromptCaching.LLM_BEDROCK_PROMPT_CACHING) + m = get_metric(resource_metrics, PromptCaching.GEN_AI_PROMPT_CACHING) for data_point in m.data.data_points: assert data_point.attributes[CacheSpanAttrs.TYPE] in [ "read", diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py index c24d79ce38..3b3478cd87 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py @@ -501,7 +501,7 @@ def test_anthropic_3_completion_streaming_with_events_with_content( # Validate the ai response choice_event = { "index": 0, - "finish_reason": "unknown", + "finish_reason": "end_turn", "message": {"content": response.get("body")._accumulating_body.get("content")}, } assert_message_in_logs(logs[1], "gen_ai.choice", choice_event) @@ -568,7 +568,7 @@ def test_anthropic_3_completion_streaming_with_events_with_no_content( # Validate the ai response choice_event = { "index": 0, - "finish_reason": "unknown", + "finish_reason": "end_turn", "message": {}, } assert_message_in_logs(logs[1], "gen_ai.choice", choice_event) diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py index c6c5852dde..32f126848f 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_imported_model.py @@ -44,9 +44,10 @@ def test_imported_model_completion(instrument_legacy, brt, span_exporter, log_ex output_messages = json.loads(imported_model_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) assert output_messages[0]["role"] == "assistant" - assert output_messages[0]["finish_reason"] == "stop" + # This cassette has no stop_reason, so finish_reason is omitted + assert "finish_reason" not in output_messages[0] - assert imported_model_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) + assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS not in imported_model_span.attributes assert data is not None diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py index 9b1695d90a..dc0efb5856 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_meta.py @@ -363,14 +363,16 @@ def test_meta_converse(instrument_legacy, brt, span_exporter, log_exporter): meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == response["usage"]["totalTokens"] ) + # Assert on system instructions + system_instructions = json.loads(meta_span.attributes[GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert system_instructions[0]["content"] == system_prompt + input_messages = json.loads( meta_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert len(input_messages) == 2 - assert input_messages[0]["role"] == "system" - assert input_messages[0]["parts"][0]["content"] == system_prompt - assert input_messages[1]["role"] == "user" - assert input_messages[1]["parts"] == [ + assert len(input_messages) == 1 + assert input_messages[0]["role"] == "user" + assert input_messages[0]["parts"] == [ {"type": "text", "content": "Tell me a joke about opentelemetry"} ] @@ -579,14 +581,16 @@ def test_meta_converse_stream(instrument_legacy, brt, span_exporter, log_exporte meta_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == inputTokens + outputTokens ) + # Assert on system instructions + system_instructions = json.loads(meta_span.attributes[GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert system_instructions[0]["content"] == system_prompt + input_messages = json.loads( meta_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES] ) - assert len(input_messages) == 2 - assert input_messages[0]["role"] == "system" - assert input_messages[0]["parts"][0]["content"] == system_prompt - assert input_messages[1]["role"] == "user" - assert input_messages[1]["parts"] == [ + assert len(input_messages) == 1 + assert input_messages[0]["role"] == "user" + assert input_messages[0]["parts"] == [ {"type": "text", "content": "Tell me a joke about opentelemetry"} ] diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py index 140c8f91e0..bb9acea77f 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_nova.py @@ -55,12 +55,14 @@ def test_nova_completion(instrument_legacy, brt, span_exporter, log_exporter): == GenAiOperationNameValues.TEXT_COMPLETION.value ) + # Assert on system instructions + system_instructions = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert system_instructions[0]["content"] == system_list[0].get("text") + # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["role"] == "system" - assert input_messages[0]["parts"][0]["content"] == system_list[0].get("text") - assert input_messages[1]["role"] == "user" - assert input_messages[1]["parts"] == [{"type": "text", "content": "A camping trip"}] + assert input_messages[0]["role"] == "user" + assert input_messages[0]["parts"] == [{"type": "text", "content": "A camping trip"}] # Assert on response generated_text = response_body["output"]["message"]["content"] @@ -295,21 +297,19 @@ def test_nova_invoke_stream(instrument_legacy, brt, span_exporter, log_exporter) == GenAiOperationNameValues.TEXT_COMPLETION.value ) + # Assert on system instructions + system_instructions = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert system_instructions[0]["content"] == system_list[0].get("text") + # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["role"] == "system" - assert input_messages[0]["parts"][0]["content"] == system_list[0].get("text") - assert input_messages[1]["role"] == "user" - assert input_messages[1]["parts"] == [{"type": "text", "content": "A camping trip"}] + assert input_messages[0]["role"] == "user" + assert input_messages[0]["parts"] == [{"type": "text", "content": "A camping trip"}] # Assert on response completion_msg = "".join(generated_text) output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) assert output_messages[0]["parts"][0]["content"] == completion_msg - assert output_messages[0]["finish_reason"] == "stop" - - # Assert on finish reasons - assert bedrock_span.attributes[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ("stop",) # Assert on other request parameters assert bedrock_span.attributes[ @@ -584,14 +584,16 @@ def test_nova_converse(instrument_legacy, brt, span_exporter, log_exporter): # Assert on request type assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value + # Assert on system instructions + system_instructions = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert system_instructions[0]["content"] == system[0].get("text") + # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["role"] == "system" - assert input_messages[0]["parts"][0]["content"] == system[0].get("text") # User content has 3 blocks: 2 guardContent + 1 text, each becomes a part - assert len(input_messages[1]["parts"]) == 3 - assert input_messages[1]["parts"][0]["type"] == "text" - assert input_messages[1]["parts"][2]["content"] == "What is the capital of Japan?" + assert len(input_messages[0]["parts"]) == 3 + assert input_messages[0]["parts"][0]["type"] == "text" + assert input_messages[0]["parts"][2]["content"] == "What is the capital of Japan?" # Assert on response (guardrail_intervened maps to content_filter) generated_text = response["output"]["message"]["content"] @@ -882,14 +884,16 @@ def test_nova_converse_stream(instrument_legacy, brt, span_exporter, log_exporte # Assert on request type assert bedrock_span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAiOperationNameValues.CHAT.value + # Assert on system instructions + system_instructions = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert system_instructions[0]["content"] == system[0].get("text") + # Assert on prompt input_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_INPUT_MESSAGES]) - assert input_messages[0]["role"] == "system" - assert input_messages[0]["parts"][0]["content"] == system[0].get("text") # User content has 3 blocks: 2 guardContent + 1 text, each becomes a part - assert len(input_messages[1]["parts"]) == 3 - assert input_messages[1]["parts"][0]["type"] == "text" - assert input_messages[1]["parts"][2]["content"] == "What is the capital of Japan?" + assert len(input_messages[0]["parts"]) == 3 + assert input_messages[0]["parts"][0]["type"] == "text" + assert input_messages[0]["parts"][2]["content"] == "What is the capital of Japan?" # Assert on response (guardrail_intervened maps to content_filter) output_messages = json.loads(bedrock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]) From 3ab36a7ca81b711f4de60e058bf6c2fce4e2dca8 Mon Sep 17 00:00:00 2001 From: Max Deygin Date: Sun, 29 Mar 2026 12:55:23 +0300 Subject: [PATCH 04/19] fix(bedrock): round 2 OTel semconv compliance fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - #14: _converse_content_to_parts handles image blocks → BlobPart - #15: _anthropic_content_to_parts maps thinking blocks → ReasoningPart - #16: ChoiceEvent.finish_reason defaults to None (not "unknown") - #17: Event emitter maps provider finish reasons via _map_finish_reason() - #18: _converse_content_to_parts handles video/document blocks - #19: _anthropic_content_to_parts parses string tool_use input via json.loads - #22: finish_reasons set unconditionally (not gated by should_send_prompts) New unit tests: test_content_parts.py, test_event_emitter.py Updated integration tests: anthropic, cohere, meta, nova, titan --- .../instrumentation/bedrock/event_emitter.py | 28 +- .../instrumentation/bedrock/event_models.py | 2 +- .../instrumentation/bedrock/span_utils.py | 99 ++++- .../tests/test_content_parts.py | 359 ++++++++++++++++++ .../tests/test_event_emitter.py | 219 +++++++++++ .../tests/traces/test_anthropic.py | 20 +- .../tests/traces/test_cohere.py | 4 +- .../tests/traces/test_meta.py | 8 +- .../tests/traces/test_nova.py | 20 +- .../tests/traces/test_titan.py | 14 +- 10 files changed, 716 insertions(+), 57 deletions(-) create mode 100644 packages/opentelemetry-instrumentation-bedrock/tests/test_content_parts.py create mode 100644 packages/opentelemetry-instrumentation-bedrock/tests/test_event_emitter.py diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py index bb77c4571f..bab6a971a9 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_emitter.py @@ -5,6 +5,7 @@ from opentelemetry._logs import Logger, LogRecord from opentelemetry.instrumentation.bedrock.event_models import ChoiceEvent, MessageEvent +from opentelemetry.instrumentation.bedrock.span_utils import _map_finish_reason from opentelemetry.instrumentation.bedrock.utils import ( should_emit_events, should_send_prompts, @@ -73,8 +74,8 @@ def emit_choice_events(event_logger: Optional[Logger], response): "content": message.get("data", {}).get("text"), "role": "assistant", }, - finish_reason=message.get("finishReason", {}).get( - "reason", "unknown" + finish_reason=_map_finish_reason( + message.get("finishReason", {}).get("reason") ), ), event_logger, @@ -91,7 +92,7 @@ def emit_choice_events(event_logger: Optional[Logger], response): or response_body.get("generation"), "role": "assistant", }, - finish_reason=response_body.get("stop_reason", "unknown"), + finish_reason=_map_finish_reason(response_body.get("stop_reason")), ), event_logger, ) @@ -101,7 +102,7 @@ def emit_choice_events(event_logger: Optional[Logger], response): ChoiceEvent( index=i, message={"content": message.get("text"), "role": "assistant"}, - finish_reason=message.get("finish_reason", "unknown"), + finish_reason=_map_finish_reason(message.get("finish_reason")), ), event_logger, ) @@ -111,7 +112,7 @@ def emit_choice_events(event_logger: Optional[Logger], response): ChoiceEvent( index=i, message={"content": message.get("text"), "role": "assistant"}, - finish_reason=message.get("finish_reason", "unknown"), + finish_reason=_map_finish_reason(message.get("finish_reason")), ), event_logger, ) @@ -125,7 +126,7 @@ def emit_choice_events(event_logger: Optional[Logger], response): .get("content"), "role": "assistant", }, - finish_reason=response_body.get("stopReason", "unknown"), + finish_reason=_map_finish_reason(response_body.get("stopReason")), ), event_logger, ) @@ -135,7 +136,7 @@ def emit_choice_events(event_logger: Optional[Logger], response): ChoiceEvent( index=i, message={"content": message.get("outputText"), "role": "assistant"}, - finish_reason=message.get("completionReason", "unknown"), + finish_reason=_map_finish_reason(message.get("completionReason")), ), event_logger, ) @@ -144,7 +145,7 @@ def emit_choice_events(event_logger: Optional[Logger], response): ChoiceEvent( index=0, message={"content": response_body.get("content"), "role": "assistant"}, - finish_reason=response_body.get("stop_reason", "unknown"), + finish_reason=_map_finish_reason(response_body.get("stop_reason")), ), event_logger, ) @@ -184,7 +185,7 @@ def emit_response_event_converse(response, event_logger): "content": response.get("output", {}).get("message", {}).get("content"), "role": response.get("output", {}).get("message", {}).get("role"), }, - finish_reason=response.get("stopReason", "unknown"), + finish_reason=_map_finish_reason(response.get("stopReason")), ), event_logger, ) @@ -199,8 +200,7 @@ def emit_streaming_response_event(response_body, event_logger): or response_body.get("outputText"), "role": "assistant", }, - # Sometimes, the value is None, what goes agains the semantic conventions - finish_reason=response_body.get("stop_reason") or "unknown", + finish_reason=_map_finish_reason(response_body.get("stop_reason")), ), event_logger, ) @@ -210,14 +210,14 @@ def emit_streaming_converse_response_event( event_logger: Optional[Logger], response_msg: List[str], role: str, - finish_reason: str, + finish_reason: Optional[str], ): accumulated_text = "".join(response_msg) emit_event( ChoiceEvent( index=0, message={"content": accumulated_text, "role": role}, - finish_reason=finish_reason, + finish_reason=_map_finish_reason(finish_reason), ), event_logger, ) @@ -279,6 +279,8 @@ def _emit_message_event( def _emit_choice_event(event: ChoiceEvent, event_logger: Optional[Logger]) -> None: body = asdict(event) + if event.finish_reason is None: + body.pop("finish_reason", None) if event.message["role"] == Roles.ASSISTANT.value: # According to the semantic conventions, the role is conditionally required if available # and not equal to "assistant", so remove the role from the body if it is "assistant". diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_models.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_models.py index e3b5f3cc60..f5f3881494 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_models.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/event_models.py @@ -37,5 +37,5 @@ class ChoiceEvent: index: int message: CompletionMessage - finish_reason: str = "unknown" + finish_reason: Optional[str] = None tool_calls: Optional[List[ToolCall]] = None diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py index cec161b012..3511010515 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/span_utils.py @@ -69,11 +69,17 @@ def _anthropic_content_to_parts(content_blocks): if block_type == "text": parts.append(_text_part(block.get("text", ""))) elif block_type == "tool_use": + raw_input = block.get("input", {}) + if isinstance(raw_input, str): + try: + raw_input = json.loads(raw_input) + except (json.JSONDecodeError, TypeError): + pass parts.append({ "type": "tool_call", "name": block.get("name"), "id": block.get("id"), - "arguments": block.get("input", {}), + "arguments": raw_input, }) elif block_type == "tool_result": parts.append({ @@ -104,6 +110,8 @@ def _anthropic_content_to_parts(content_blocks): "mime_type": source.get("media_type", ""), "content": source.get("data", ""), }) + elif block_type == "thinking": + parts.append({"type": "reasoning", "content": block.get("thinking", "")}) else: parts.append({"type": block_type, "content": json.dumps(block)}) else: @@ -172,6 +180,7 @@ def set_model_message_span_attributes(model_vendor, span, request_body): def set_model_choice_span_attributes(model_vendor, span, response_body): + _set_finish_reasons_unconditionally(model_vendor, span, response_body) if not should_send_prompts(): return if model_vendor == "cohere": @@ -188,6 +197,51 @@ def set_model_choice_span_attributes(model_vendor, span, response_body): _set_imported_model_response_span_attributes(span, response_body) +def _set_finish_reasons_unconditionally(model_vendor, span, response_body): + """Set finish_reasons on span regardless of should_send_prompts() — it's metadata, not content.""" + finish_reasons = [] + if model_vendor == "cohere": + for gen in response_body.get("generations", []): + fr = _map_finish_reason(gen.get("finish_reason")) + if fr: + finish_reasons.append(fr) + elif model_vendor == "anthropic": + fr = _map_finish_reason(response_body.get("stop_reason")) + if fr: + finish_reasons.append(fr) + elif model_vendor == "ai21": + for comp in response_body.get("completions", []): + fr_data = comp.get("finishReason", {}) + raw = fr_data.get("reason") if isinstance(fr_data, dict) else str(fr_data) + fr = _map_finish_reason(raw) + if fr: + finish_reasons.append(fr) + elif model_vendor == "meta": + fr = _map_finish_reason(response_body.get("stop_reason")) + if fr: + finish_reasons.append(fr) + elif model_vendor == "amazon": + if "results" in response_body: + for result in response_body.get("results", []): + fr = _map_finish_reason(result.get("completionReason")) + if fr: + finish_reasons.append(fr) + elif "output" in response_body: + fr = _map_finish_reason(response_body.get("stopReason")) + if fr: + finish_reasons.append(fr) + else: + fr = _map_finish_reason(response_body.get("completionReason")) + if fr: + finish_reasons.append(fr) + elif model_vendor == "imported_model": + fr = _map_finish_reason(response_body.get("stop_reason")) + if fr: + finish_reasons.append(fr) + if finish_reasons: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, tuple(finish_reasons)) + + def set_model_span_attributes( provider, model_vendor, @@ -851,6 +905,33 @@ def _converse_content_to_parts(content_blocks): "id": result.get("toolUseId"), "response": json.dumps(result.get("content", ""), default=str), }) + elif "image" in block: + img = block["image"] + fmt = img.get("format", "") + parts.append({ + "type": "blob", + "modality": "image", + "mime_type": f"image/{fmt}" if fmt else "", + "content": "", + }) + elif "video" in block: + vid = block["video"] + fmt = vid.get("format", "") + parts.append({ + "type": "blob", + "modality": "video", + "mime_type": f"video/{fmt}" if fmt else "", + "content": "